Compare commits
46 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| eb21a02b6d | |||
| 498ce4e287 | |||
| 7081a8e900 | |||
| da4b86a159 | |||
| 81d864f4bc | |||
| c9795a6c4d | |||
| f5dc55f1d1 | |||
| fd92df486c | |||
| fc7498fef0 | |||
| 51dcca592d | |||
| 27c1e18e98 | |||
| 73502db9f4 | |||
| 4f85ef5209 | |||
| def18f28fa | |||
| 8fc27f4d69 | |||
| 6137657704 | |||
| 704a8ab7de | |||
| e358b9b92f | |||
| 7f59b7fd35 | |||
| c37caa2ec9 | |||
| 6e77083b84 | |||
| 660fc20124 | |||
| 3a3f670662 | |||
| 07457ad556 | |||
| 30a8aa10b8 | |||
| e9c4f23ae2 | |||
| 08b3aa8a2c | |||
| 022cc1136b | |||
| a1cfd085a8 | |||
| e97eb95d9d | |||
| 522e8708a5 | |||
| 3ceebf3b1f | |||
| e62db981e8 | |||
| 679314aa8f | |||
| 1e850af6de | |||
| 256eeedc69 | |||
| a501d33f80 | |||
| 73faaf9448 | |||
| 0aff9cf85f | |||
| f088e0ee90 | |||
| ea4681299d | |||
| b343995c05 | |||
| 4be7966654 | |||
| 4c290f49f2 | |||
| f13a675408 | |||
| 9aa4764301 |
@@ -0,0 +1,174 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Shared path-filter helper for Gitea Actions workflows.
|
||||
|
||||
Computes changed files against the PR base SHA or push-before SHA and writes
|
||||
boolean outputs to GITHUB_OUTPUT. If the diff base is missing or untrusted, the
|
||||
helper fails open by setting every output in the selected profile to true.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
PROFILES: dict[str, dict[str, str]] = {
|
||||
"ci": {
|
||||
"platform": r"^workspace-server/",
|
||||
"canvas": r"^canvas/",
|
||||
"python": r"^workspace/",
|
||||
"scripts": r"^tests/e2e/|^scripts/|^infra/scripts/",
|
||||
},
|
||||
"handlers-postgres": {
|
||||
"handlers": (
|
||||
r"^workspace-server/internal/handlers/"
|
||||
r"|^workspace-server/internal/wsauth/"
|
||||
r"|^workspace-server/migrations/"
|
||||
r"|^\.gitea/workflows/handlers-postgres-integration\.yml$"
|
||||
),
|
||||
},
|
||||
"e2e-api": {
|
||||
"api": r"^workspace-server/|^tests/e2e/|^\.gitea/workflows/e2e-api\.yml$",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def classify(profile: str, paths: list[str]) -> dict[str, bool]:
|
||||
patterns = PROFILES[profile]
|
||||
return {
|
||||
name: any(re.search(pattern, path) for path in paths)
|
||||
for name, pattern in patterns.items()
|
||||
}
|
||||
|
||||
|
||||
def all_true(profile: str) -> dict[str, bool]:
|
||||
return {name: True for name in PROFILES[profile]}
|
||||
|
||||
|
||||
def resolve_base(event_name: str, pr_base_sha: str, push_before: str) -> str:
|
||||
if event_name == "pull_request" and pr_base_sha:
|
||||
return pr_base_sha
|
||||
return push_before
|
||||
|
||||
|
||||
def is_zero_sha(value: str) -> bool:
|
||||
return not value or bool(re.fullmatch(r"0+", value))
|
||||
|
||||
|
||||
def run_git(args: list[str], *, timeout: int = 30) -> subprocess.CompletedProcess[str]:
|
||||
return subprocess.run(
|
||||
["git", *args],
|
||||
check=False,
|
||||
text=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
|
||||
def base_exists(base: str) -> bool:
|
||||
return run_git(["cat-file", "-e", base]).returncode == 0
|
||||
|
||||
|
||||
def fetch_base(base: str, base_ref: str) -> None:
|
||||
# Gitea may reject fetching an arbitrary unadvertised SHA from a shallow
|
||||
# PR checkout. Fetch the advertised base branch first, then fall back to
|
||||
# the SHA for hosts that allow it.
|
||||
if base_ref:
|
||||
run_git(["fetch", "--depth=1", "origin", base_ref])
|
||||
if not base_exists(base):
|
||||
run_git(["fetch", "--depth=1", "origin", base])
|
||||
|
||||
|
||||
def deepen_base_ref(base_ref: str) -> None:
|
||||
if base_ref:
|
||||
run_git(["fetch", "--deepen=200", "origin", base_ref], timeout=60)
|
||||
|
||||
|
||||
def merge_base(base: str) -> str | None:
|
||||
proc = run_git(["merge-base", base, "HEAD"])
|
||||
if proc.returncode != 0:
|
||||
return None
|
||||
value = proc.stdout.strip()
|
||||
return value or None
|
||||
|
||||
|
||||
def changed_paths(base: str, *, use_merge_base: bool) -> list[str] | None:
|
||||
compare_base = base
|
||||
if use_merge_base:
|
||||
compare_base = merge_base(base) or ""
|
||||
if not compare_base:
|
||||
return None
|
||||
|
||||
proc = run_git(["diff", "--name-only", compare_base, "HEAD"])
|
||||
if proc.returncode != 0:
|
||||
return None
|
||||
return [line for line in proc.stdout.splitlines() if line]
|
||||
|
||||
|
||||
def write_outputs(values: dict[str, bool], output_path: str | None) -> None:
|
||||
lines = [f"{name}={'true' if value else 'false'}" for name, value in values.items()]
|
||||
if output_path:
|
||||
with Path(output_path).open("a", encoding="utf-8") as fh:
|
||||
for line in lines:
|
||||
fh.write(line + "\n")
|
||||
else:
|
||||
for line in lines:
|
||||
print(line)
|
||||
|
||||
|
||||
def detect(
|
||||
profile: str,
|
||||
event_name: str,
|
||||
pr_base_sha: str,
|
||||
push_before: str,
|
||||
base_ref: str = "",
|
||||
) -> dict[str, bool]:
|
||||
base = resolve_base(event_name, pr_base_sha, push_before)
|
||||
if is_zero_sha(base):
|
||||
return all_true(profile)
|
||||
|
||||
if not base_exists(base):
|
||||
fetch_base(base, base_ref)
|
||||
if not base_exists(base):
|
||||
return all_true(profile)
|
||||
|
||||
use_merge_base = event_name == "pull_request"
|
||||
if use_merge_base and base_ref and merge_base(base) is None:
|
||||
deepen_base_ref(base_ref)
|
||||
|
||||
paths = changed_paths(base, use_merge_base=use_merge_base)
|
||||
if paths is None:
|
||||
return all_true(profile)
|
||||
return classify(profile, paths)
|
||||
|
||||
|
||||
def parse_args(argv: list[str]) -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--profile", required=True, choices=sorted(PROFILES))
|
||||
parser.add_argument("--event-name", default=os.environ.get("GITHUB_EVENT_NAME", ""))
|
||||
parser.add_argument("--pr-base-sha", default="")
|
||||
parser.add_argument("--base-ref", default="")
|
||||
parser.add_argument("--push-before", default=os.environ.get("GITHUB_EVENT_BEFORE", ""))
|
||||
return parser.parse_args(argv)
|
||||
|
||||
|
||||
def main(argv: list[str]) -> int:
|
||||
args = parse_args(argv)
|
||||
values = detect(
|
||||
args.profile,
|
||||
args.event_name,
|
||||
args.pr_base_sha,
|
||||
args.push_before,
|
||||
args.base_ref,
|
||||
)
|
||||
write_outputs(values, os.environ.get("GITHUB_OUTPUT"))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main(sys.argv[1:]))
|
||||
@@ -61,6 +61,7 @@ import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
@@ -89,6 +90,19 @@ API = f"https://{GITEA_HOST}/api/v1" if GITEA_HOST else ""
|
||||
# match by exact title without parsing.
|
||||
TITLE_PREFIX = "[main-red]"
|
||||
|
||||
# Settling window (seconds) between initial red detection and the
|
||||
# pre-file recheck. The recheck filters out the two largest false-
|
||||
# positive classes seen in mc#1597..1630 (task #394, 2026-05-21):
|
||||
# 1. HEAD moved on (a new commit landed mid-tick) — the prior red SHA
|
||||
# is no longer authoritative; let the next cron tick re-evaluate.
|
||||
# 2. Combined status recovered on the SAME SHA (transient
|
||||
# cancel-cascade rolled forward to success on retry).
|
||||
# 90s is well below the hourly cron cadence; a real failure that
|
||||
# persists past it is the one we want surfaced.
|
||||
# Override with WATCHDOG_RECHECK_DELAY_SECS for tests / local probes
|
||||
# (the test suite stubs time.sleep to a no-op).
|
||||
RECHECK_DELAY_SECS = int(_env("WATCHDOG_RECHECK_DELAY_SECS", default="90"))
|
||||
|
||||
|
||||
def _require_runtime_env() -> None:
|
||||
"""Enforce env contract — called from `main()` only.
|
||||
@@ -172,6 +186,49 @@ def api(
|
||||
return status, {"_raw": raw.decode("utf-8", errors="replace")}
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# action_run.status resolver — extensibility hook for task #394.
|
||||
# --------------------------------------------------------------------------
|
||||
def _resolve_action_run_status(target_url: str) -> int | None:
|
||||
"""Resolve the underlying Gitea `action_run.status` integer for the
|
||||
run referenced by `target_url`, returning None if the resolver
|
||||
cannot reach an authoritative source from the runner.
|
||||
|
||||
Canonical Gitea 1.22.6 enum (per `models/actions/status.go` +
|
||||
`reference_gitea_action_status_enum_corrected_2026_05_19`):
|
||||
1=Success, 2=Failure, 3=Cancelled, 4=Skipped,
|
||||
5=Waiting, 6=Running, 7=Blocked
|
||||
Only `status == 2` is a real defect; status=3 is cancel-cascade and
|
||||
status=1 is an emission artifact (Gitea wrote a 'failure' commit_status
|
||||
row for a run that actually succeeded — observed empirically on
|
||||
`publish-canvas-image` jobs at SHAs in mc#1597..1630).
|
||||
|
||||
CURRENT STATE (2026-05-20, verified): Gitea 1.22.6 exposes NO REST
|
||||
endpoint for `action_run.status`. Probed:
|
||||
/api/v1/repos/{o}/{r}/actions/runs/{id} → HTTP 404
|
||||
/api/v1/repos/{o}/{r}/actions/jobs/{id} → HTTP 404
|
||||
/api/v1/repos/{o}/{r}/actions/tasks/{id} → HTTP 404
|
||||
/swagger.v1.json paths containing 'actions' → secrets+variables+runners only
|
||||
The SPA backend (`/{repo}/actions/runs/{id}/jobs/{idx}` POST) requires
|
||||
a session CSRF token, unreachable from a runner. The only authoritative
|
||||
source today is direct DB access (`mol_action_status` on op-host,
|
||||
`docker exec molecule-postgres-1 psql ...`), which the runner cannot
|
||||
reach.
|
||||
|
||||
Therefore: this hook returns None on every call. Callers MUST fall
|
||||
back to the description-string filter (existing) plus the HEAD
|
||||
recheck (this PR). When a future Gitea release (>=1.23 expected) or
|
||||
an op-host proxy exposes the endpoint, replace the body of this
|
||||
function with an `api(...)` call — the caller contract is stable.
|
||||
|
||||
See also:
|
||||
- `reference_chronic_red_sweep_cancelled_vs_failed_filter`
|
||||
- `feedback_gitea_status_enum_use_helper_not_raw_int`
|
||||
"""
|
||||
_ = target_url # noqa: F841 — intentional placeholder
|
||||
return None
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Gitea reads
|
||||
# --------------------------------------------------------------------------
|
||||
@@ -614,6 +671,56 @@ def run_once(*, dry_run: bool = False) -> int:
|
||||
}
|
||||
|
||||
if red:
|
||||
# HEAD recheck (task #394 — guards mc#1597..1630 false-positive
|
||||
# cluster). After the initial detection, wait RECHECK_DELAY_SECS
|
||||
# (default 90s; tests stub time.sleep) and re-evaluate:
|
||||
#
|
||||
# 1. Re-fetch HEAD SHA. If HEAD moved, a new commit landed
|
||||
# mid-tick — the prior red SHA is no longer authoritative
|
||||
# and the next cron run will re-evaluate against the new
|
||||
# HEAD. Skip-file.
|
||||
#
|
||||
# 2. If HEAD unchanged, re-fetch the combined status. If it
|
||||
# recovered (combined state no longer in {failure,error}
|
||||
# after the cancel-cascade filter), a transient retry
|
||||
# rolled the run forward. Skip-file.
|
||||
#
|
||||
# Both paths emit a Loki event distinguishable from the real
|
||||
# `main_red_detected` so obs queries can track filter activity.
|
||||
# The settling window is well below the hourly cron cadence —
|
||||
# genuine failures persist past it and are surfaced normally.
|
||||
time.sleep(RECHECK_DELAY_SECS)
|
||||
|
||||
recheck_sha = get_head_sha(WATCH_BRANCH)
|
||||
if recheck_sha != sha:
|
||||
emit_loki_event("main_red_skipped_head_drift", sha, [])
|
||||
print(
|
||||
f"::notice::skip-file (HEAD moved): initial red at "
|
||||
f"{sha[:10]} but HEAD is now {recheck_sha[:10]} on "
|
||||
f"{WATCH_BRANCH}; next cron tick will re-evaluate."
|
||||
)
|
||||
return 0
|
||||
|
||||
recheck_status = get_combined_status(sha)
|
||||
recheck_red, recheck_failed = is_red(recheck_status)
|
||||
if not recheck_red:
|
||||
emit_loki_event("main_red_skipped_recovered", sha, [])
|
||||
print(
|
||||
f"::notice::skip-file (recovered after settling): "
|
||||
f"combined state at {sha[:10]} flipped to "
|
||||
f"{recheck_status.get('state')!r} on recheck; "
|
||||
f"initial red was a transient cancel-cascade."
|
||||
)
|
||||
return 0
|
||||
|
||||
# Still red after settling — file/update. Use the recheck data
|
||||
# as authoritative so the issue body reflects the latest state.
|
||||
failed = recheck_failed
|
||||
debug["recheck_combined_state"] = recheck_status.get("state")
|
||||
debug["recheck_failed_contexts"] = [
|
||||
s.get("context") for s in failed
|
||||
]
|
||||
|
||||
failed_ctxs = [s.get("context") for s in failed if s.get("context")]
|
||||
emit_loki_event("main_red_detected", sha, failed_ctxs)
|
||||
print(f"::warning::main is RED at {sha[:10]} on {WATCH_BRANCH}: "
|
||||
|
||||
@@ -47,7 +47,9 @@ What this script does, per `.gitea/workflows/status-reaper.yml` invocation:
|
||||
Parse context as `<workflow_name> / <job_name> (push)`.
|
||||
Look up workflow_name in the trigger map:
|
||||
- missing → log ::notice:: and skip (conservative).
|
||||
- has_push_trigger=True → preserve (real defect signal).
|
||||
- has_push_trigger=True and description == "Has been cancelled"
|
||||
→ compensate cancelled/superseded push noise.
|
||||
- has_push_trigger=True otherwise → preserve (real defect signal).
|
||||
- has_push_trigger=False → POST a compensating
|
||||
`state=success` status to /statuses/{sha} with the same
|
||||
context (Gitea de-dups by context) and a description
|
||||
@@ -141,6 +143,11 @@ PR_SHADOW_COMPENSATION_DESCRIPTION = (
|
||||
"shadowed by successful push status on same SHA; see "
|
||||
".gitea/scripts/status-reaper.py)"
|
||||
)
|
||||
CANCELLED_PUSH_COMPENSATION_DESCRIPTION = (
|
||||
"Compensated by status-reaper (push run was cancelled/superseded; "
|
||||
"Gitea 1.22.6 reports cancelled runs as failure statuses)"
|
||||
)
|
||||
CANCELLED_DESCRIPTION = "Has been cancelled"
|
||||
|
||||
# Context suffix the reaper acts on. Gitea hardcodes this for ALL
|
||||
# default-branch workflow runs.
|
||||
@@ -476,7 +483,7 @@ def reap(
|
||||
{compensated, preserved_real_push, preserved_unknown,
|
||||
preserved_non_failure, preserved_non_push_suffix,
|
||||
preserved_unparseable, compensated_pr_shadowed_by_push_success,
|
||||
preserved_pr_without_push_success,
|
||||
preserved_pr_without_push_success, compensated_cancelled_push,
|
||||
compensated_contexts: [<context>, ...]}
|
||||
|
||||
`compensated_contexts` is rev2-added so `reap_branch` can build
|
||||
@@ -490,6 +497,7 @@ def reap(
|
||||
"preserved_non_push_suffix": 0,
|
||||
"preserved_unparseable": 0,
|
||||
"compensated_pr_shadowed_by_push_success": 0,
|
||||
"compensated_cancelled_push": 0,
|
||||
"preserved_pr_without_push_success": 0,
|
||||
"compensated_contexts": [],
|
||||
}
|
||||
@@ -567,8 +575,27 @@ def reap(
|
||||
counters["preserved_unknown"] += 1
|
||||
continue
|
||||
|
||||
if (s.get("description") or "").strip() == CANCELLED_DESCRIPTION:
|
||||
# Gitea 1.22.6 maps cancelled action runs to failure commit
|
||||
# statuses. During merge bursts, older push runs can be
|
||||
# superseded and cancelled even though a newer run for the
|
||||
# same branch is the real signal. Compensate only the exact
|
||||
# Gitea cancellation description; real push failures remain red.
|
||||
post_compensating_status(
|
||||
sha,
|
||||
context,
|
||||
s.get("target_url"),
|
||||
description=CANCELLED_PUSH_COMPENSATION_DESCRIPTION,
|
||||
dry_run=dry_run,
|
||||
)
|
||||
counters["compensated"] += 1
|
||||
counters["compensated_cancelled_push"] += 1
|
||||
counters["compensated_contexts"].append(context)
|
||||
continue
|
||||
|
||||
if workflow_trigger_map[workflow_name]:
|
||||
# Real push trigger → real defect signal. Preserve.
|
||||
# Real push trigger with a non-cancelled failure description
|
||||
# remains a defect signal. Preserve.
|
||||
counters["preserved_real_push"] += 1
|
||||
continue
|
||||
|
||||
@@ -674,6 +701,7 @@ def reap_branch(
|
||||
"preserved_non_push_suffix": 0,
|
||||
"preserved_unparseable": 0,
|
||||
"compensated_pr_shadowed_by_push_success": 0,
|
||||
"compensated_cancelled_push": 0,
|
||||
"preserved_pr_without_push_success": 0,
|
||||
"compensated_per_sha": {},
|
||||
"skipped": True,
|
||||
@@ -689,6 +717,7 @@ def reap_branch(
|
||||
"preserved_non_push_suffix": 0,
|
||||
"preserved_unparseable": 0,
|
||||
"compensated_pr_shadowed_by_push_success": 0,
|
||||
"compensated_cancelled_push": 0,
|
||||
"preserved_pr_without_push_success": 0,
|
||||
"compensated_per_sha": {},
|
||||
}
|
||||
@@ -728,6 +757,7 @@ def reap_branch(
|
||||
"preserved_non_push_suffix",
|
||||
"preserved_unparseable",
|
||||
"compensated_pr_shadowed_by_push_success",
|
||||
"compensated_cancelled_push",
|
||||
"preserved_pr_without_push_success",
|
||||
):
|
||||
aggregate[key] += per_sha[key]
|
||||
|
||||
@@ -1,60 +0,0 @@
|
||||
name: cascade-list-drift-gate
|
||||
|
||||
# Ported from .github/workflows/cascade-list-drift-gate.yml on 2026-05-11
|
||||
# per RFC internal#219 §1 sweep.
|
||||
#
|
||||
# Differences from the GitHub version:
|
||||
# - on.paths reference .gitea/workflows/publish-runtime.yml (the active
|
||||
# Gitea workflow file) instead of .github/workflows/publish-runtime.yml
|
||||
# (which Category A of this sweep deletes).
|
||||
# - Explicit `WORKFLOW=` arg passed to the drift script so it audits the
|
||||
# .gitea/ workflow (the script's default is still .github/... which
|
||||
# will not exist post-Cat-A).
|
||||
# - Workflow-level env.GITHUB_SERVER_URL set per
|
||||
# feedback_act_runner_github_server_url.
|
||||
# - `continue-on-error: true` on the job (RFC §1 contract — surface
|
||||
# defects without blocking; follow-up PR flips after triage).
|
||||
#
|
||||
# Structural gate: TEMPLATES list in publish-runtime.yml must match
|
||||
# manifest.json's workspace_templates exactly. Closes the recurrence
|
||||
# path of PR #2556 (the data fix) and is the first concrete deliverable
|
||||
# of RFC #388 PR-3.
|
||||
#
|
||||
# Triggers narrowly to keep CI quiet: only on PRs that actually change
|
||||
# one of the two files. The path-filtered split + always-emit-result
|
||||
# pattern (memory: "Required check names need a job that always runs")
|
||||
# is unnecessary here because the workflow IS the check name and PR
|
||||
# branch protection should require it directly. Future-proof: if this
|
||||
# becomes a required check, add a no-op aggregator with always() so the
|
||||
# name still emits when paths don't match.
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [staging, main]
|
||||
paths:
|
||||
- manifest.json
|
||||
- .gitea/workflows/publish-runtime.yml
|
||||
- scripts/check-cascade-list-vs-manifest.sh
|
||||
|
||||
env:
|
||||
GITHUB_SERVER_URL: https://git.moleculesai.app
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
# bp-exempt: drift visibility gate; CI / all-required remains the required aggregate.
|
||||
check:
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking
|
||||
# the PR. Follow-up PR flips this off after surfaced defects are
|
||||
# triaged.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- name: Check cascade list matches manifest
|
||||
# Pass the .gitea/ workflow path explicitly — the script's
|
||||
# default still points at .github/... which Category A of this
|
||||
# sweep removes.
|
||||
run: bash scripts/check-cascade-list-vs-manifest.sh manifest.json .gitea/workflows/publish-runtime.yml
|
||||
@@ -1,225 +0,0 @@
|
||||
name: MCP Stdio Transport Regression
|
||||
|
||||
# Regression test for molecule-ai-workspace-runtime#61:
|
||||
# asyncio.connect_read_pipe / connect_write_pipe fail with
|
||||
# ValueError: "Pipe transport is only for pipes, sockets and character devices"
|
||||
# when stdout is a regular file (openclaw capture, CI tee, debugging).
|
||||
#
|
||||
# This workflow reproduces the exact failure mode and verifies the
|
||||
# fallback to direct buffer I/O works. It runs on every PR that
|
||||
# touches the MCP server or this workflow, plus nightly cron.
|
||||
#
|
||||
# Why a separate workflow (not folded into ci.yml python-lint):
|
||||
# - The test needs to spawn the MCP server with stdout redirected
|
||||
# to a regular file (not a TTY/pipe), which conflicts with
|
||||
# pytest's own capture mechanism.
|
||||
# - It exercises the actual process spawn path (python a2a_mcp_server.py)
|
||||
# not just unit-test mocks — closer to the real openclaw integration.
|
||||
# - A dedicated workflow surfaces stdio-specific regressions without
|
||||
# coupling to the broader Python test suite's coverage gate.
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main, staging]
|
||||
paths:
|
||||
- 'workspace/a2a_mcp_server.py'
|
||||
- 'workspace/mcp_cli.py'
|
||||
- 'workspace/tests/test_a2a_mcp_server.py'
|
||||
- '.gitea/workflows/ci-mcp-stdio-transport.yml'
|
||||
push:
|
||||
branches: [main, staging]
|
||||
paths:
|
||||
- 'workspace/a2a_mcp_server.py'
|
||||
- 'workspace/mcp_cli.py'
|
||||
- 'workspace/tests/test_a2a_mcp_server.py'
|
||||
- '.gitea/workflows/ci-mcp-stdio-transport.yml'
|
||||
schedule:
|
||||
# Nightly at 04:00 UTC — catches drift from dependency updates
|
||||
# (e.g. asyncio behavior changes in new Python patch releases).
|
||||
- cron: '0 4 * * *'
|
||||
|
||||
concurrency:
|
||||
group: mcp-stdio-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
GITHUB_SERVER_URL: https://git.moleculesai.app
|
||||
|
||||
jobs:
|
||||
# bp-exempt: regression canary for runtime#61; not a merge gate — informational only until promoted to required.
|
||||
# mc#774: continue-on-error mask — new workflow, flip to false once it's green on ≥3 consecutive main runs.
|
||||
mcp-stdio-regular-file:
|
||||
name: MCP stdio with regular-file stdout
|
||||
runs-on: ubuntu-latest
|
||||
continue-on-error: true # mc#774
|
||||
timeout-minutes: 5
|
||||
env:
|
||||
WORKSPACE_ID: "00000000-0000-0000-0000-000000000001"
|
||||
defaults:
|
||||
run:
|
||||
working-directory: workspace
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: '3.11'
|
||||
cache: pip
|
||||
cache-dependency-path: workspace/requirements.txt
|
||||
- run: pip install -r requirements.txt pytest pytest-asyncio pytest-cov
|
||||
|
||||
- name: Reproduce runtime#61 — stdout as regular file
|
||||
run: |
|
||||
set -euo pipefail
|
||||
echo "=== Reproducing molecule-ai-workspace-runtime#61 ==="
|
||||
echo ""
|
||||
echo "Before the fix, this command would fail with:"
|
||||
echo ' ValueError: Pipe transport is only for pipes, sockets and character devices'
|
||||
echo ""
|
||||
|
||||
# Spawn the MCP server with stdout redirected to a regular file.
|
||||
# This is exactly what openclaw does when capturing MCP output.
|
||||
OUTPUT=$(mktemp)
|
||||
trap 'rm -f "$OUTPUT"' EXIT
|
||||
|
||||
# Send initialize request, then tools/list, then exit
|
||||
{
|
||||
echo '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{}}'
|
||||
echo '{"jsonrpc":"2.0","id":2,"method":"tools/list"}'
|
||||
} | python a2a_mcp_server.py > "$OUTPUT" 2>&1 || {
|
||||
RC=$?
|
||||
echo "FAIL: MCP server exited with code $RC"
|
||||
echo "--- stdout+stderr ---"
|
||||
cat "$OUTPUT"
|
||||
exit 1
|
||||
}
|
||||
|
||||
echo "PASS: MCP server handled regular-file stdout without crashing"
|
||||
echo ""
|
||||
echo "--- Output (first 20 lines) ---"
|
||||
head -20 "$OUTPUT"
|
||||
echo ""
|
||||
|
||||
# Verify we got valid JSON-RPC responses
|
||||
if grep -q '"result"' "$OUTPUT"; then
|
||||
echo "PASS: JSON-RPC responses found in output"
|
||||
else
|
||||
echo "FAIL: No JSON-RPC responses in output"
|
||||
cat "$OUTPUT"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Reproduce runtime#61 — stdin from regular file
|
||||
run: |
|
||||
set -euo pipefail
|
||||
echo "=== stdin as regular file (CI tee / capture pattern) ==="
|
||||
|
||||
INPUT=$(mktemp)
|
||||
OUTPUT=$(mktemp)
|
||||
trap 'rm -f "$INPUT" "$OUTPUT"' EXIT
|
||||
|
||||
cat > "$INPUT" <<'EOF'
|
||||
{"jsonrpc":"2.0","id":1,"method":"initialize","params":{}}
|
||||
{"jsonrpc":"2.0","id":2,"method":"tools/list"}
|
||||
EOF
|
||||
|
||||
python a2a_mcp_server.py < "$INPUT" > "$OUTPUT" 2>&1 || {
|
||||
RC=$?
|
||||
echo "FAIL: MCP server exited with code $RC"
|
||||
cat "$OUTPUT"
|
||||
exit 1
|
||||
}
|
||||
|
||||
echo "PASS: MCP server handled regular-file stdin without crashing"
|
||||
|
||||
if grep -q '"result"' "$OUTPUT"; then
|
||||
echo "PASS: JSON-RPC responses found in output"
|
||||
else
|
||||
echo "FAIL: No JSON-RPC responses in output"
|
||||
cat "$OUTPUT"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Verify warning is emitted for non-pipe stdio
|
||||
run: |
|
||||
set -euo pipefail
|
||||
echo "=== Verify diagnostic warning ==="
|
||||
|
||||
OUTPUT=$(mktemp)
|
||||
trap 'rm -f "$OUTPUT"' EXIT
|
||||
|
||||
{
|
||||
echo '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{}}'
|
||||
} | python a2a_mcp_server.py > "$OUTPUT" 2>&1
|
||||
|
||||
# The warning should mention "not a pipe" for operator visibility
|
||||
if grep -qi "not a pipe" "$OUTPUT"; then
|
||||
echo "PASS: Diagnostic warning emitted for non-pipe stdio"
|
||||
else
|
||||
echo "NOTE: No warning in output (may be suppressed by log level)"
|
||||
fi
|
||||
|
||||
- name: Reproduce openclaw failure — pipe held OPEN, no EOF
|
||||
run: |
|
||||
set -euo pipefail
|
||||
echo "=== keep-stdin-open pipe (the real openclaw / Claude Code case) ==="
|
||||
echo ""
|
||||
echo "Before the readline() fix this HANGS: main() did"
|
||||
echo " stdin.read(65536) -> on a pipe, blocks until 64KB OR EOF."
|
||||
echo "An MCP client sends one ~150B initialize and keeps stdin"
|
||||
echo "open waiting for the response, so the server never parsed"
|
||||
echo "the request and the client timed out (openclaw: 'MCP error"
|
||||
echo "-32000: Connection closed'). The earlier regular-file /"
|
||||
echo "heredoc-pipe steps PASSED through this bug because a file"
|
||||
echo "(or a closing heredoc) yields EOF immediately."
|
||||
echo ""
|
||||
|
||||
# Drive the server through a real pipe that stays OPEN: write
|
||||
# one initialize, do NOT close stdin, and require a response
|
||||
# within a hard timeout. read(65536) -> no output -> timeout
|
||||
# kills it -> FAIL. readline() -> immediate response -> PASS.
|
||||
python - <<'PYEOF'
|
||||
import json, subprocess, sys, time, select
|
||||
|
||||
proc = subprocess.Popen(
|
||||
[sys.executable, "a2a_mcp_server.py"],
|
||||
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
env={**__import__("os").environ},
|
||||
)
|
||||
req = json.dumps({
|
||||
"jsonrpc": "2.0", "id": 1, "method": "initialize",
|
||||
"params": {"protocolVersion": "2024-11-05",
|
||||
"capabilities": {},
|
||||
"clientInfo": {"name": "keepopen", "version": "1"}},
|
||||
}) + "\n"
|
||||
proc.stdin.write(req.encode())
|
||||
proc.stdin.flush()
|
||||
# Deliberately DO NOT close proc.stdin — mirror a live MCP client.
|
||||
|
||||
deadline = time.time() + 15
|
||||
line = b""
|
||||
while time.time() < deadline:
|
||||
r, _, _ = select.select([proc.stdout], [], [], 1)
|
||||
if r:
|
||||
line = proc.stdout.readline()
|
||||
if line:
|
||||
break
|
||||
proc.kill()
|
||||
|
||||
if not line:
|
||||
print("FAIL: no response within 15s on an open pipe — "
|
||||
"stdin.read(65536) regression is back")
|
||||
sys.exit(1)
|
||||
resp = json.loads(line.decode())
|
||||
assert resp.get("id") == 1 and "result" in resp, \
|
||||
f"unexpected response: {line[:200]!r}"
|
||||
assert resp["result"]["serverInfo"]["name"] == "molecule", \
|
||||
f"wrong serverInfo: {line[:200]!r}"
|
||||
print("PASS: server answered initialize on a still-open pipe")
|
||||
PYEOF
|
||||
|
||||
- name: Run unit tests for stdio transport
|
||||
run: |
|
||||
set -euo pipefail
|
||||
echo "=== Running stdio transport unit tests ==="
|
||||
python -m pytest tests/test_a2a_mcp_server.py::TestStdioPipeAssertion tests/test_a2a_mcp_server.py::TestStdioKeepOpenPipe -v --no-cov
|
||||
+57
-138
@@ -86,46 +86,17 @@ jobs:
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- id: check
|
||||
env:
|
||||
PR_BASE_SHA: ${{ github.event.pull_request.base.sha }}
|
||||
PR_BASE_REF: ${{ github.event.pull_request.base.ref }}
|
||||
PUSH_BEFORE: ${{ github.event.before }}
|
||||
run: |
|
||||
# For PR events: diff against the base branch (not HEAD~1 of the branch,
|
||||
# which may be unrelated after force-pushes). When a push updates a PR,
|
||||
# both pull_request and push events fire — prefer the PR base so that
|
||||
# the diff is always computed against the actual merge base, not the
|
||||
# previous SHA on the branch which may be on a different history line.
|
||||
BASE="${GITHUB_BASE_REF:-${{ github.event.before }}}"
|
||||
# GITHUB_BASE_REF is set for PR events (the base branch name).
|
||||
# For pull_request events we use the stored base.sha; for push events
|
||||
# (or when base.sha is unavailable) fall back to github.event.before.
|
||||
if [ "${{ github.event_name }}" = "pull_request" ] && [ -n "${{ github.event.pull_request.base.sha }}" ]; then
|
||||
BASE="${{ github.event.pull_request.base.sha }}"
|
||||
fi
|
||||
# Fallback: if BASE is empty or all zeros (new branch), run everything
|
||||
if [ -z "$BASE" ] || echo "$BASE" | grep -qE '^0+$'; then
|
||||
echo "platform=true" >> "$GITHUB_OUTPUT"
|
||||
echo "canvas=true" >> "$GITHUB_OUTPUT"
|
||||
echo "python=true" >> "$GITHUB_OUTPUT"
|
||||
echo "scripts=true" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
# Workflow-only edits are covered by the workflow lint family
|
||||
# and by this workflow's always-present required jobs. Do not fan
|
||||
# those edits out into Go/Canvas/Python/shellcheck work; the
|
||||
# downstream jobs still emit their required contexts via no-op
|
||||
# steps when their surface flag is false.
|
||||
#
|
||||
# If the diff itself cannot be trusted, fail open by running every
|
||||
# surface instead of silently under-testing the PR.
|
||||
if ! DIFF=$(git diff --name-only "$BASE" HEAD 2>/dev/null); then
|
||||
echo "platform=true" >> "$GITHUB_OUTPUT"
|
||||
echo "canvas=true" >> "$GITHUB_OUTPUT"
|
||||
echo "python=true" >> "$GITHUB_OUTPUT"
|
||||
echo "scripts=true" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
echo "platform=$(echo "$DIFF" | grep -qE '^workspace-server/' && echo true || echo false)" >> "$GITHUB_OUTPUT"
|
||||
echo "canvas=$(echo "$DIFF" | grep -qE '^canvas/' && echo true || echo false)" >> "$GITHUB_OUTPUT"
|
||||
echo "python=$(echo "$DIFF" | grep -qE '^workspace/' && echo true || echo false)" >> "$GITHUB_OUTPUT"
|
||||
echo "scripts=$(echo "$DIFF" | grep -qE '^tests/e2e/|^scripts/|^infra/scripts/' && echo true || echo false)" >> "$GITHUB_OUTPUT"
|
||||
python3 .gitea/scripts/detect-changes.py \
|
||||
--profile ci \
|
||||
--event-name "${{ github.event_name }}" \
|
||||
--pr-base-sha "$PR_BASE_SHA" \
|
||||
--base-ref "$PR_BASE_REF" \
|
||||
--push-before "${GITHUB_EVENT_BEFORE:-$PUSH_BEFORE}"
|
||||
|
||||
# Platform (Go) — Go build/vet/test/lint + coverage gates. The always-run
|
||||
# + per-step gating shape preserves the GitHub-side required-check name
|
||||
@@ -133,6 +104,7 @@ jobs:
|
||||
# the name match works on PRs that don't touch workspace-server/).
|
||||
platform-build:
|
||||
name: Platform (Go)
|
||||
needs: changes
|
||||
runs-on: ubuntu-latest
|
||||
# mc#774 (closed 2026-05-14): Phase 4 flip of the platform-build job.
|
||||
# Phase 4 (#656) originally flipped this to continue-on-error: false based on
|
||||
@@ -153,29 +125,29 @@ jobs:
|
||||
run:
|
||||
working-directory: workspace-server
|
||||
steps:
|
||||
- if: false
|
||||
- if: ${{ github.event_name == 'pull_request' && needs.changes.outputs.platform != 'true' }}
|
||||
working-directory: .
|
||||
run: echo "No platform/** changes — skipping real build steps; this job always runs to satisfy the required-check name on branch protection."
|
||||
- if: always()
|
||||
run: echo "No workspace-server/** changes on this PR — Platform (Go) gate satisfied without running Go build/test/lint."
|
||||
- if: ${{ github.event_name != 'pull_request' || needs.changes.outputs.platform == 'true' }}
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- if: always()
|
||||
- if: ${{ github.event_name != 'pull_request' || needs.changes.outputs.platform == 'true' }}
|
||||
uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
|
||||
with:
|
||||
go-version: 'stable'
|
||||
- if: always()
|
||||
- if: ${{ github.event_name != 'pull_request' || needs.changes.outputs.platform == 'true' }}
|
||||
run: go mod download
|
||||
- if: always()
|
||||
- if: ${{ github.event_name != 'pull_request' || needs.changes.outputs.platform == 'true' }}
|
||||
run: go build ./cmd/server
|
||||
# CLI (molecli) moved to standalone repo: git.moleculesai.app/molecule-ai/molecule-cli
|
||||
- if: always()
|
||||
- if: ${{ github.event_name != 'pull_request' || needs.changes.outputs.platform == 'true' }}
|
||||
run: go vet ./...
|
||||
- if: always()
|
||||
- if: ${{ github.event_name != 'pull_request' || needs.changes.outputs.platform == 'true' }}
|
||||
name: Install golangci-lint
|
||||
run: go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@v2.12.2
|
||||
- if: always()
|
||||
- if: ${{ github.event_name != 'pull_request' || needs.changes.outputs.platform == 'true' }}
|
||||
name: Run golangci-lint
|
||||
run: $(go env GOPATH)/bin/golangci-lint run --timeout 3m ./...
|
||||
- if: always()
|
||||
- if: ${{ github.event_name != 'pull_request' || needs.changes.outputs.platform == 'true' }}
|
||||
name: Diagnostic — per-package verbose 60s
|
||||
run: |
|
||||
set +e
|
||||
@@ -191,7 +163,7 @@ jobs:
|
||||
echo "::endgroup::"
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
- if: always()
|
||||
- if: ${{ github.event_name != 'pull_request' || needs.changes.outputs.platform == 'true' }}
|
||||
name: Run tests with race detection and coverage
|
||||
# Explicit timeout: cold runner cache causes OOM kills at ~4m39s on the
|
||||
# full ./... suite with race detection + coverage. A 10m per-step timeout
|
||||
@@ -199,7 +171,7 @@ jobs:
|
||||
# instead of OOM-killing. The job-level timeout (15m) is a backstop.
|
||||
run: go test -race -timeout 10m -coverprofile=coverage.out ./...
|
||||
|
||||
- if: always()
|
||||
- if: ${{ github.event_name != 'pull_request' || needs.changes.outputs.platform == 'true' }}
|
||||
name: Per-file coverage report
|
||||
# Advisory — lists every source file with its coverage so reviewers
|
||||
# can see at-a-glance where gaps are. Sorted ascending so the worst
|
||||
@@ -213,7 +185,7 @@ jobs:
|
||||
END {for (f in s) printf "%6.1f%% %s\n", s[f]/c[f], f}' \
|
||||
| sort -n
|
||||
|
||||
- if: always()
|
||||
- if: ${{ github.event_name != 'pull_request' || needs.changes.outputs.platform == 'true' }}
|
||||
name: Check coverage thresholds
|
||||
# Enforces two gates from #1823 Layer 1:
|
||||
# 1. Total floor (25% — ratchet plan in COVERAGE_FLOOR.md).
|
||||
@@ -301,6 +273,7 @@ jobs:
|
||||
# siblings — verified empirically on PR #2314).
|
||||
canvas-build:
|
||||
name: Canvas (Next.js)
|
||||
needs: changes
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 20
|
||||
# Phase 4 (RFC #219 §1): confirmed green on main 2026-05-12.
|
||||
@@ -309,20 +282,20 @@ jobs:
|
||||
run:
|
||||
working-directory: canvas
|
||||
steps:
|
||||
- if: false
|
||||
- if: ${{ github.event_name == 'pull_request' && needs.changes.outputs.canvas != 'true' }}
|
||||
working-directory: .
|
||||
run: echo "No canvas/** changes — skipping real build steps; this job always runs to satisfy the required-check name on branch protection."
|
||||
- if: always()
|
||||
run: echo "No canvas/** changes on this PR — Canvas (Next.js) gate satisfied without running npm build/test."
|
||||
- if: ${{ github.event_name != 'pull_request' || needs.changes.outputs.canvas == 'true' }}
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- if: always()
|
||||
- if: ${{ github.event_name != 'pull_request' || needs.changes.outputs.canvas == 'true' }}
|
||||
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
|
||||
with:
|
||||
node-version: '22'
|
||||
- if: always()
|
||||
- if: ${{ github.event_name != 'pull_request' || needs.changes.outputs.canvas == 'true' }}
|
||||
run: npm ci --include=optional --prefer-offline
|
||||
- if: always()
|
||||
- if: ${{ github.event_name != 'pull_request' || needs.changes.outputs.canvas == 'true' }}
|
||||
run: npm run build
|
||||
- if: always()
|
||||
- if: ${{ github.event_name != 'pull_request' || needs.changes.outputs.canvas == 'true' }}
|
||||
name: Run tests with coverage
|
||||
# Coverage instrumentation is configured in canvas/vitest.config.ts
|
||||
# (provider: v8, reporters: text + html + json-summary). Step 2 of
|
||||
@@ -331,7 +304,7 @@ jobs:
|
||||
# tracked in #1815) after the team sees what current coverage is.
|
||||
run: npx vitest run --coverage
|
||||
- name: Upload coverage summary as artifact
|
||||
if: always()
|
||||
if: ${{ github.event_name != 'pull_request' || needs.changes.outputs.canvas == 'true' }}
|
||||
# Pinned to v3 for Gitea act_runner v0.6 compatibility — v4+ uses
|
||||
# the GHES 3.10+ artifact protocol that Gitea 1.22.x does NOT
|
||||
# implement, surfacing as `GHESNotSupportedError: @actions/artifact
|
||||
@@ -348,15 +321,16 @@ jobs:
|
||||
# Shellcheck (E2E scripts) — required check, always runs.
|
||||
shellcheck:
|
||||
name: Shellcheck (E2E scripts)
|
||||
needs: changes
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 4 (RFC #219 §1): confirmed green on main 2026-05-12.
|
||||
continue-on-error: false
|
||||
steps:
|
||||
- if: false
|
||||
run: echo "No tests/e2e/ or infra/scripts/ changes — skipping real shellcheck; this job always runs to satisfy the required-check name on branch protection."
|
||||
- if: always()
|
||||
- if: ${{ github.event_name == 'pull_request' && needs.changes.outputs.scripts != 'true' }}
|
||||
run: echo "No tests/e2e, scripts, or infra/scripts changes on this PR — Shellcheck gate satisfied without running script checks."
|
||||
- if: ${{ github.event_name != 'pull_request' || needs.changes.outputs.scripts == 'true' }}
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- if: always()
|
||||
- if: ${{ github.event_name != 'pull_request' || needs.changes.outputs.scripts == 'true' }}
|
||||
name: Run shellcheck on tests/e2e/*.sh and infra/scripts/*.sh
|
||||
# shellcheck is pre-installed on ubuntu-latest runners (via apt).
|
||||
# infra/scripts/ is included because setup.sh + nuke.sh gate the
|
||||
@@ -367,16 +341,16 @@ jobs:
|
||||
find tests/e2e infra/scripts -type f -name '*.sh' -print0 \
|
||||
| xargs -0 shellcheck --severity=warning
|
||||
|
||||
- if: always()
|
||||
- if: ${{ github.event_name != 'pull_request' || needs.changes.outputs.scripts == 'true' }}
|
||||
name: Lint cleanup-trap hygiene (RFC #2873)
|
||||
run: bash tests/e2e/lint_cleanup_traps.sh
|
||||
|
||||
- if: always()
|
||||
- if: ${{ github.event_name != 'pull_request' || needs.changes.outputs.scripts == 'true' }}
|
||||
name: Run E2E bash unit tests (no live infra)
|
||||
run: |
|
||||
bash tests/e2e/test_model_slug.sh
|
||||
|
||||
- if: always()
|
||||
- if: ${{ github.event_name != 'pull_request' || needs.changes.outputs.scripts == 'true' }}
|
||||
name: Test ECR promote-tenant-image script (mock-driven, no live infra)
|
||||
# Covers scripts/promote-tenant-image.sh — the codified
|
||||
# :staging-latest → :latest ECR promote + tenant fleet redeploy
|
||||
@@ -386,7 +360,7 @@ jobs:
|
||||
run: |
|
||||
bash scripts/test-promote-tenant-image.sh
|
||||
|
||||
- if: always()
|
||||
- if: ${{ github.event_name != 'pull_request' || needs.changes.outputs.scripts == 'true' }}
|
||||
name: Shellcheck promote-tenant-image script
|
||||
# scripts/ is excluded from the bulk shellcheck pass above (legacy
|
||||
# SC3040/SC3043 cleanup pending). Run shellcheck explicitly on
|
||||
@@ -456,84 +430,29 @@ jobs:
|
||||
cat /tmp/deploy-reminder.md >> "$GITHUB_STEP_SUMMARY"
|
||||
|
||||
# Python Lint & Test — required check, always runs.
|
||||
# Runtime Python moved to molecule-ai-workspace-runtime. Keep this context as
|
||||
# a guard so branch protection still catches attempts to reintroduce an
|
||||
# editable runtime copy under molecule-core/workspace/.
|
||||
python-lint:
|
||||
name: Python Lint & Test
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 4 (RFC #219 §1): confirmed green on main 2026-05-12.
|
||||
continue-on-error: false
|
||||
env:
|
||||
WORKSPACE_ID: test
|
||||
defaults:
|
||||
run:
|
||||
working-directory: workspace
|
||||
steps:
|
||||
- if: false
|
||||
working-directory: .
|
||||
run: echo "No workspace/** changes — skipping real lint+test; this job always runs to satisfy the required-check name on branch protection."
|
||||
- if: always()
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- if: always()
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: '3.11'
|
||||
cache: pip
|
||||
cache-dependency-path: workspace/requirements.txt
|
||||
- if: always()
|
||||
run: pip install -r requirements.txt pytest pytest-asyncio pytest-cov sqlalchemy>=2.0.0
|
||||
# Coverage flags + fail-under floor moved into workspace/pytest.ini
|
||||
# (issue #1817) so local `pytest` and CI use identical config.
|
||||
- if: always()
|
||||
run: python -m pytest --tb=short
|
||||
|
||||
- if: always()
|
||||
name: Per-file critical-path coverage (MCP / inbox / auth)
|
||||
# MCP-critical Python files have a per-file floor on top of the
|
||||
# 86% total floor in pytest.ini. See issue #2790 for full rationale.
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- name: Runtime SSOT guard
|
||||
run: |
|
||||
set -e
|
||||
PER_FILE_FLOOR=75
|
||||
CRITICAL_FILES=(
|
||||
"a2a_mcp_server.py"
|
||||
"mcp_cli.py"
|
||||
"a2a_tools.py"
|
||||
"a2a_tools_inbox.py"
|
||||
"inbox.py"
|
||||
"platform_auth.py"
|
||||
)
|
||||
|
||||
# pytest already wrote .coverage; emit a JSON view scoped to
|
||||
# the critical files so jq/python can read the per-file pct
|
||||
# without parsing tabular text.
|
||||
INCLUDES=$(printf '*%s,' "${CRITICAL_FILES[@]}")
|
||||
INCLUDES="${INCLUDES%,}"
|
||||
python -m coverage json -o /tmp/critical-cov.json --include="$INCLUDES"
|
||||
|
||||
FAILED=0
|
||||
for f in "${CRITICAL_FILES[@]}"; do
|
||||
pct=$(jq -r --arg f "$f" '.files | to_entries | map(select(.key == $f)) | .[0].value.summary.percent_covered // "MISSING"' /tmp/critical-cov.json)
|
||||
if [ "$pct" = "MISSING" ]; then
|
||||
echo "::error file=workspace/$f::No coverage data — file may have moved or test exclusion mis-set."
|
||||
FAILED=$((FAILED+1))
|
||||
continue
|
||||
fi
|
||||
echo "$f: ${pct}%"
|
||||
if awk "BEGIN{exit !($pct < $PER_FILE_FLOOR)}"; then
|
||||
echo "::error file=workspace/$f::${pct}% < ${PER_FILE_FLOOR}% per-file floor (MCP critical path). See COVERAGE_FLOOR.md."
|
||||
FAILED=$((FAILED+1))
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$FAILED" -gt 0 ]; then
|
||||
echo ""
|
||||
echo "$FAILED MCP critical-path file(s) below the ${PER_FILE_FLOOR}% per-file floor."
|
||||
echo "These paths handle multi-tenant routing, auth tokens, and inbox dispatch."
|
||||
echo "A coverage drop here is the same risk shape as Go-side tokens/secrets files"
|
||||
echo "dropping below 10% (see COVERAGE_FLOOR.md). Either:"
|
||||
echo " (a) add tests to raise coverage back above ${PER_FILE_FLOOR}%, or"
|
||||
echo " (b) if this is unavoidable historical debt, file an issue and propose"
|
||||
echo " adjusting the floor with rationale in COVERAGE_FLOOR.md."
|
||||
set -eu
|
||||
if [ -d workspace ]; then
|
||||
echo "::error file=workspace::Runtime source must live in molecule-ai-workspace-runtime, not molecule-core/workspace."
|
||||
exit 1
|
||||
fi
|
||||
for f in scripts/build_runtime_package.py scripts/test_build_runtime_package.py; do
|
||||
if [ -e "$f" ]; then
|
||||
echo "::error file=$f::Legacy build-from-workspace packaging script must not be restored."
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
echo "Runtime SSOT guard passed; core consumes the standalone runtime package."
|
||||
|
||||
all-required:
|
||||
# Aggregator sentinel — RFC internal#219 §2 (Phase 4 — closes internal#286).
|
||||
|
||||
@@ -132,31 +132,13 @@ jobs:
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- id: decide
|
||||
# Inline replacement for dorny/paths-filter — same pattern PR#372's
|
||||
# ci.yml port used. Diffs against the PR base or push BEFORE SHA,
|
||||
# then matches against the api-relevant path set.
|
||||
run: |
|
||||
BASE="${GITHUB_BASE_REF:-${{ github.event.before }}}"
|
||||
if [ "${{ github.event_name }}" = "pull_request" ] && [ -n "${{ github.event.pull_request.base.sha }}" ]; then
|
||||
BASE="${{ github.event.pull_request.base.sha }}"
|
||||
fi
|
||||
if [ -z "$BASE" ] || echo "$BASE" | grep -qE '^0+$'; then
|
||||
echo "api=true" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
if ! git cat-file -e "$BASE" 2>/dev/null; then
|
||||
git fetch --depth=1 origin "$BASE" 2>/dev/null || true
|
||||
fi
|
||||
if ! git cat-file -e "$BASE" 2>/dev/null; then
|
||||
echo "api=true" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
CHANGED=$(git diff --name-only "$BASE" HEAD)
|
||||
if echo "$CHANGED" | grep -qE '^(workspace-server/|tests/e2e/|\.gitea/workflows/e2e-api\.yml$)'; then
|
||||
echo "api=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "api=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
python3 .gitea/scripts/detect-changes.py \
|
||||
--profile e2e-api \
|
||||
--event-name "${{ github.event_name }}" \
|
||||
--pr-base-sha "${{ github.event.pull_request.base.sha }}" \
|
||||
--base-ref "${{ github.event.pull_request.base.ref }}" \
|
||||
--push-before "${GITHUB_EVENT_BEFORE:-${{ github.event.before }}}"
|
||||
|
||||
# ONE job (no job-level `if:`) that always runs and reports under the
|
||||
# required-check name `E2E API Smoke Test`. Real work is gated per-step
|
||||
@@ -366,6 +348,9 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
echo "Migrations OK"
|
||||
- name: Run today's-PR-coverage E2E (mc#1525/1535/1536/1539/1542 fix-specific assertions)
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: bash tests/e2e/test_today_pr_coverage_e2e.sh
|
||||
- name: Run E2E API tests
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: bash tests/e2e/test_api.sh
|
||||
@@ -375,15 +360,18 @@ jobs:
|
||||
- name: Run priority-runtimes E2E (claude-code + hermes — skips when keys absent)
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: bash tests/e2e/test_priority_runtimes_e2e.sh
|
||||
- name: Install standalone runtime parser from Gitea registry
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: |
|
||||
python3 -m pip install --no-deps \
|
||||
--index-url https://git.moleculesai.app/api/packages/molecule-ai/pypi/simple/ \
|
||||
molecule-ai-workspace-runtime
|
||||
- name: Run poll-mode + since_id cursor E2E (#2339)
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: bash tests/e2e/test_poll_mode_e2e.sh
|
||||
- name: Run poll-mode chat upload E2E (RFC #2891)
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: bash tests/e2e/test_poll_mode_chat_upload_e2e.sh
|
||||
- name: Run today's-PR-coverage E2E (mc#1525/1535/1536/1539/1542 fix-specific assertions)
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: bash tests/e2e/test_today_pr_coverage_e2e.sh
|
||||
- name: Dump platform log on failure
|
||||
if: failure() && needs.detect-changes.outputs.api == 'true'
|
||||
run: cat workspace-server/platform.log || true
|
||||
@@ -401,4 +389,3 @@ jobs:
|
||||
run: |
|
||||
docker rm -f "$PG_CONTAINER" 2>/dev/null || true
|
||||
docker rm -f "$REDIS_CONTAINER" 2>/dev/null || true
|
||||
|
||||
|
||||
@@ -44,6 +44,8 @@ name: E2E Peer Visibility (literal MCP list_peers)
|
||||
# - No cross-repo `uses:` (feedback_gitea_cross_repo_uses_blocked). The
|
||||
# actions/checkout SHA is the one e2e-staging-canvas.yml already uses
|
||||
# successfully (a mirrored SHA — see #1277/PR#1292 root-cause).
|
||||
# - 2026-05-21 retrigger: verify fresh platform-tenant image after the
|
||||
# publish Buildx DOCKER_CONFIG fix restored staging-latest image updates.
|
||||
# - Per-SHA concurrency, not global (feedback_concurrency_group_per_sha).
|
||||
# - Workflow-level GITHUB_SERVER_URL pinned
|
||||
# (feedback_act_runner_github_server_url).
|
||||
@@ -68,14 +70,11 @@ name: E2E Peer Visibility (literal MCP list_peers)
|
||||
# minutes, not the 30+ min cold-EC2 path), so peer-visibility is part of
|
||||
# the local gate that fires before the staging E2E.
|
||||
#
|
||||
# It is its OWN non-required status context `E2E Peer Visibility (local)`
|
||||
# — same non-required-by-design decision as the staging job (red until
|
||||
# Hermes-401 #162 / OpenClaw-never-online #165 land; flip-to-required
|
||||
# tracked at molecule-core#1296). It is an HONEST gate: NO
|
||||
# continue-on-error mask (feedback_fix_root_not_symptom). It is kept a
|
||||
# distinct context (not folded into e2e-api.yml's required `E2E API
|
||||
# Smoke Test`) precisely so a deliberately-RED-today gate cannot wedge
|
||||
# the required local-E2E job or any unrelated merge.
|
||||
# It is its OWN non-required status context `E2E Peer Visibility (local)`.
|
||||
# The local backend uses external-mode workspaces by default so it tests
|
||||
# the literal platform MCP list_peers path without depending on local
|
||||
# template container boot/heartbeat. Container-mode runtime boot remains
|
||||
# available via PV_LOCAL_PROVISION_MODE=container for targeted debugging.
|
||||
|
||||
on:
|
||||
push:
|
||||
@@ -86,8 +85,6 @@ on:
|
||||
- 'workspace-server/internal/middleware/**'
|
||||
- 'workspace-server/internal/handlers/registry.go'
|
||||
- 'workspace-server/internal/handlers/workspace.go'
|
||||
- 'workspace/a2a_mcp_server.py'
|
||||
- 'workspace/platform_tools/registry.py'
|
||||
- 'tests/e2e/test_peer_visibility_mcp_staging.sh'
|
||||
- 'tests/e2e/test_peer_visibility_mcp_local.sh'
|
||||
- 'tests/e2e/lib/peer_visibility_assert.sh'
|
||||
@@ -100,8 +97,6 @@ on:
|
||||
- 'workspace-server/internal/middleware/**'
|
||||
- 'workspace-server/internal/handlers/registry.go'
|
||||
- 'workspace-server/internal/handlers/workspace.go'
|
||||
- 'workspace/a2a_mcp_server.py'
|
||||
- 'workspace/platform_tools/registry.py'
|
||||
- 'tests/e2e/test_peer_visibility_mcp_staging.sh'
|
||||
- 'tests/e2e/test_peer_visibility_mcp_local.sh'
|
||||
- 'tests/e2e/lib/peer_visibility_assert.sh'
|
||||
@@ -157,9 +152,9 @@ jobs:
|
||||
# ephemeral host ports so concurrent host-network act_runner runs don't
|
||||
# collide; go build; background platform-server). Its OWN non-required
|
||||
# status context `E2E Peer Visibility (local)` — non-required-by-design
|
||||
# exactly like the staging job (red until #162/#165 land;
|
||||
# flip-to-required tracked at molecule-core#1296). HONEST gate, NO
|
||||
# continue-on-error mask (feedback_fix_root_not_symptom). Runs on PR +
|
||||
# exactly like the staging job (flip-to-required tracked at
|
||||
# molecule-core#1296). HONEST gate, NO continue-on-error mask
|
||||
# (feedback_fix_root_not_symptom). Runs on PR +
|
||||
# push (local boot is minutes, not the 30+ min cold-EC2 path).
|
||||
# bp-required: pending #1296
|
||||
peer-visibility-local:
|
||||
@@ -179,6 +174,9 @@ jobs:
|
||||
E2E_ANTHROPIC_API_KEY: ${{ secrets.MOLECULE_STAGING_ANTHROPIC_API_KEY }}
|
||||
E2E_OPENAI_API_KEY: ${{ secrets.MOLECULE_STAGING_OPENAI_API_KEY }}
|
||||
PV_RUNTIMES: "hermes openclaw claude-code"
|
||||
PV_LOCAL_PROVISION_MODE: external
|
||||
ADMIN_TOKEN: local-e2e-admin-token
|
||||
MOLECULE_ADMIN_TOKEN: local-e2e-admin-token
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
|
||||
@@ -267,10 +265,9 @@ jobs:
|
||||
echo "::error::Platform did not become healthy in 30s"
|
||||
cat workspace-server/platform.log || true; exit 1
|
||||
- name: Run LOCAL fresh-provision peer-visibility E2E (literal MCP list_peers)
|
||||
# HONEST gate — NO continue-on-error. Red today (Hermes-401 #162 /
|
||||
# OpenClaw-never-online #165 not yet fixed); green when they land.
|
||||
# Non-required-by-design via its distinct status context until the
|
||||
# molecule-core#1296 flip-to-required.
|
||||
# HONEST gate — NO continue-on-error. The local backend uses
|
||||
# external-mode workspaces so this context tests the literal MCP
|
||||
# peer-visibility path without coupling to template container boot.
|
||||
run: bash tests/e2e/test_peer_visibility_mcp_local.sh
|
||||
- name: Dump platform log on failure
|
||||
if: failure()
|
||||
|
||||
@@ -101,36 +101,13 @@ jobs:
|
||||
# not present in the shallow checkout.
|
||||
fetch-depth: 2
|
||||
- id: filter
|
||||
# Inline replacement for dorny/paths-filter — see e2e-api.yml.
|
||||
run: |
|
||||
# Gitea Actions evaluates github.event.before to empty string in shell
|
||||
# scripts. Use GITHUB_EVENT_BEFORE shell env var instead (Gitea
|
||||
# correctly populates it for push events). PR case uses template var.
|
||||
BASE=""
|
||||
if [ "${{ github.event_name }}" = "pull_request" ] && [ -n "${{ github.event.pull_request.base.sha }}" ]; then
|
||||
BASE="${{ github.event.pull_request.base.sha }}"
|
||||
elif [ -n "$GITHUB_EVENT_BEFORE" ]; then
|
||||
BASE="$GITHUB_EVENT_BEFORE"
|
||||
fi
|
||||
if [ -z "$BASE" ] || echo "$BASE" | grep -qE '^0+$'; then
|
||||
echo "handlers=true" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
# timeout 30 guards against the case where BASE points to a ref that
|
||||
# git can resolve but cat-file hangs (rare on corrupted objects).
|
||||
if ! timeout 30 git cat-file -e "$BASE" 2>/dev/null; then
|
||||
git fetch --depth=1 origin "$BASE" 2>/dev/null || true
|
||||
fi
|
||||
if ! timeout 30 git cat-file -e "$BASE" 2>/dev/null; then
|
||||
echo "handlers=true" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
CHANGED=$(git diff --name-only "$BASE" HEAD)
|
||||
if echo "$CHANGED" | grep -qE '^(workspace-server/internal/handlers/|workspace-server/internal/wsauth/|workspace-server/migrations/|\.gitea/workflows/handlers-postgres-integration\.yml$)'; then
|
||||
echo "handlers=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "handlers=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
python3 .gitea/scripts/detect-changes.py \
|
||||
--profile handlers-postgres \
|
||||
--event-name "${{ github.event_name }}" \
|
||||
--pr-base-sha "${{ github.event.pull_request.base.sha }}" \
|
||||
--base-ref "${{ github.event.pull_request.base.ref }}" \
|
||||
--push-before "${GITHUB_EVENT_BEFORE:-}"
|
||||
|
||||
# Single-job-with-per-step-if pattern: always runs to satisfy the
|
||||
# required-check name on branch protection; real work gates on the
|
||||
|
||||
@@ -1,177 +0,0 @@
|
||||
name: publish-runtime-autobump
|
||||
|
||||
# Auto-bump-on-workspace-edit half of the publish pipeline.
|
||||
#
|
||||
# Why this file exists (issue #351):
|
||||
# Gitea Actions does not correctly disambiguate `paths:` from `tags:`
|
||||
# when both are bundled under a single `on.push` key. The result is
|
||||
# that tag pushes get filtered out and `publish-runtime.yml` never
|
||||
# fires — `action_run` rows: 0. This was unnoticed pre-2026-05-11
|
||||
# because PYPI_TOKEN was absent (publishes would have failed anyway).
|
||||
#
|
||||
# Split design:
|
||||
# - publish-runtime.yml : on.push.tags only (the publisher)
|
||||
# - publish-runtime-autobump.yml: on.push.branches+paths (this file — the version-bumper)
|
||||
#
|
||||
# This file computes the next version from PyPI's latest, pushes a
|
||||
# `runtime-v$VERSION` tag, and exits. The tag push then triggers
|
||||
# publish-runtime.yml via its tags-only trigger.
|
||||
#
|
||||
# Concurrency: shares the `publish-runtime` group with publish-runtime.yml
|
||||
# so concurrent workspace pushes serialize at the bump step. Without
|
||||
# this, two pushes minutes apart could both read PyPI latest=0.1.129
|
||||
# and try to tag 0.1.130 simultaneously, only one of which would land.
|
||||
|
||||
on:
|
||||
# Run on PR pushes to post a success status so Gitea can merge the PR.
|
||||
# All steps use continue-on-error: true so operational failures
|
||||
# (PyPI unreachable, DISPATCH_TOKEN missing) do not block merge.
|
||||
pull_request:
|
||||
paths:
|
||||
- "workspace/**"
|
||||
# mc#1578 / a05add29 cure: build_runtime_package.py owns PYPROJECT_TEMPLATE
|
||||
# (deps, classifiers, project metadata). A change there is publish-affecting
|
||||
# even when workspace/** is untouched, so the autobump must fire to claim
|
||||
# the next runtime-v$VERSION tag. Without this, manual tagging races PyPI
|
||||
# (e.g. runtime-v0.1.18 collided with the 2026-04-27 PyPI 0.1.18 publish,
|
||||
# blocking the python-multipart pin from reaching prod).
|
||||
- "scripts/build_runtime_package.py"
|
||||
- "scripts/test_build_runtime_package.py"
|
||||
# Bump-and-tag on main/staging push (the actual operational trigger).
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- staging
|
||||
paths:
|
||||
- "workspace/**"
|
||||
- "scripts/build_runtime_package.py"
|
||||
- "scripts/test_build_runtime_package.py"
|
||||
# Manual dispatch — useful when Gitea Actions API (/actions/*) is
|
||||
# unreachable (e.g. act_runner 404 on Gitea 1.22.6) and we cannot
|
||||
# re-trigger via curl.
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: write # required to push tags back
|
||||
|
||||
concurrency:
|
||||
group: publish-runtime
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
# PR-validation path: always succeeds so Gitea can merge workflow-only PRs.
|
||||
# Operational failures (PyPI unreachable, missing DISPATCH_TOKEN) are
|
||||
# surfaced via continue-on-error: true rather than blocking the merge.
|
||||
# The actual bump work happens on the main/staging push after merge.
|
||||
# bp-exempt: advisory validation for runtime publication; not a branch-protection gate.
|
||||
pr-validate:
|
||||
runs-on: ubuntu-latest
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true # do not block PR merge on operational failures
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 1
|
||||
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Validate PyPI connectivity (best-effort)
|
||||
run: |
|
||||
set -eu
|
||||
echo "=== Checking PyPI accessibility ==="
|
||||
LATEST=$(curl -fsS --retry 3 --max-time 10 \
|
||||
https://pypi.org/pypi/molecule-ai-workspace-runtime/json \
|
||||
| python -c "import sys,json; print(json.load(sys.stdin)['info']['version'])" \
|
||||
|| echo "PyPI unreachable (non-blocking for PR validation)")
|
||||
echo "Latest: ${LATEST:-unknown}"
|
||||
|
||||
# Actual bump-and-tag: runs on main/staging pushes, posts real success/failure.
|
||||
# No continue-on-error — operational failures here trip the main-red
|
||||
# watchdog, which is the desired signal for infrastructure degradation.
|
||||
# bp-exempt: post-merge tag publication side effect; CI / all-required gates source changes.
|
||||
bump-and-tag:
|
||||
runs-on: ubuntu-latest
|
||||
# Only fire on push events (main/staging after PR merge). Pull_request
|
||||
# events are handled by pr-validate above; we do NOT bump on every
|
||||
# push-synchronize because that would race with the PR head.
|
||||
#
|
||||
# NOTE: the prior condition `github.event.pull_request.base.ref == ''`
|
||||
# was broken — on a PR-merge push in Gitea Actions, the pull_request
|
||||
# context is still attached (base.ref='main'), so the condition always
|
||||
# evaluated to false and bump-and-tag was permanently skipped.
|
||||
if: github.event_name == 'push'
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Fetch tags for collision check
|
||||
run: git fetch origin --tags --depth=1
|
||||
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Compute next version from PyPI latest and existing tags
|
||||
id: bump
|
||||
run: |
|
||||
set -eu
|
||||
LATEST=$(curl -fsS --retry 3 https://pypi.org/pypi/molecule-ai-workspace-runtime/json \
|
||||
| python -c "import sys,json; print(json.load(sys.stdin)['info']['version'])")
|
||||
MAJOR=$(echo "$LATEST" | cut -d. -f1)
|
||||
MINOR=$(echo "$LATEST" | cut -d. -f2)
|
||||
TAG_LATEST=$(git tag --list "runtime-v${MAJOR}.${MINOR}.*" \
|
||||
| sed -E 's/^runtime-v//' \
|
||||
| grep -E '^[0-9]+\.[0-9]+\.[0-9]+$' \
|
||||
| sort -V \
|
||||
| tail -1 || true)
|
||||
VERSION=$(PYPI_LATEST="$LATEST" TAG_LATEST="$TAG_LATEST" python - <<'PY'
|
||||
import os
|
||||
|
||||
def parse(v):
|
||||
return tuple(int(part) for part in v.split("."))
|
||||
|
||||
pypi = os.environ["PYPI_LATEST"]
|
||||
tag = os.environ.get("TAG_LATEST") or pypi
|
||||
base = max(parse(pypi), parse(tag))
|
||||
print(f"{base[0]}.{base[1]}.{base[2] + 1}")
|
||||
PY
|
||||
)
|
||||
echo "PyPI latest=$LATEST, latest runtime tag=${TAG_LATEST:-none} -> next=$VERSION"
|
||||
if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+$'; then
|
||||
echo "::error::computed version $VERSION does not match PEP 440 X.Y.Z"
|
||||
exit 1
|
||||
fi
|
||||
if git tag --list | grep -qx "runtime-v$VERSION"; then
|
||||
echo "::error::tag runtime-v$VERSION already exists in this repo. Manual intervention required (PyPI and Gitea tag history are out of sync)."
|
||||
exit 1
|
||||
fi
|
||||
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Push runtime-v$VERSION tag
|
||||
env:
|
||||
DISPATCH_TOKEN: ${{ secrets.DISPATCH_TOKEN }}
|
||||
VERSION: ${{ steps.bump.outputs.version }}
|
||||
GITEA_URL: https://git.moleculesai.app
|
||||
run: |
|
||||
set -eu
|
||||
if [ -z "$DISPATCH_TOKEN" ]; then
|
||||
echo "::error::DISPATCH_TOKEN secret is not set — needed to push the tag back to molecule-core."
|
||||
exit 1
|
||||
fi
|
||||
git config user.name "publish-runtime autobump"
|
||||
git config user.email "publish-runtime@moleculesai.app"
|
||||
git tag -a "runtime-v$VERSION" \
|
||||
-m "Auto-bump on workspace/** edit on $GITHUB_REF" \
|
||||
-m "Triggered by: $GITHUB_REF @ $GITHUB_SHA" \
|
||||
-m "publish-runtime.yml will pick up this tag and upload to PyPI"
|
||||
# Push via DISPATCH_TOKEN (a Gitea PAT). Using the bot identity
|
||||
# ensures the resulting tag-push event is dispatched to
|
||||
# publish-runtime.yml; act_runner's default GITHUB_TOKEN cannot
|
||||
# trigger downstream workflows.
|
||||
git remote set-url origin "${GITEA_URL#https://}"
|
||||
git remote set-url origin "https://x-access-token:${DISPATCH_TOKEN}@${GITEA_URL#https://}/molecule-ai/molecule-core.git"
|
||||
git push origin "runtime-v$VERSION"
|
||||
echo "✓ pushed runtime-v$VERSION — publish-runtime.yml should fire next"
|
||||
@@ -1,437 +0,0 @@
|
||||
name: publish-runtime
|
||||
|
||||
# Gitea Actions port of .github/workflows/publish-runtime.yml.
|
||||
#
|
||||
# Ported 2026-05-10 (issue #206). Key differences from the GitHub version:
|
||||
# - Gitea Actions reads .gitea/workflows/, not .github/workflows/
|
||||
# - Dropped `environment: pypi-publish` — Gitea Actions does not support
|
||||
# named environments or OIDC trusted publishers
|
||||
# - Replaced `pypa/gh-action-pypi-publish@release/v1` (OIDC) with
|
||||
# `twine upload` using PYPI_TOKEN secret — same mechanism as a local
|
||||
# `python -m twine upload` with a PyPI token
|
||||
# - Replaced `github.ref_name` (GitHub-only) with `${GITHUB_REF#refs/tags/}`
|
||||
# — Gitea Actions exposes github.ref (the full ref) but not ref_name
|
||||
# - Dropped `merge_group` trigger (Gitea has no merge queue)
|
||||
#
|
||||
# 2026-05-10 (issue #348): originally restored `staging`/`main` branch +
|
||||
# `workspace/**` path-filter trigger in PR #349.
|
||||
#
|
||||
# 2026-05-11 (issue #351): REVERTED the branches+paths trigger from THIS
|
||||
# file. Bundling `paths` with `tags` under a single `on.push` key caused
|
||||
# Gitea Actions to never dispatch the workflow for tag-push events (0
|
||||
# runs in `action_run` for workflow_id='publish-runtime.yml' since the
|
||||
# port, including the runtime-v1.0.0 tag — which is why PyPI is still at
|
||||
# 0.1.129 despite a v1.0.0 Gitea tag existing).
|
||||
#
|
||||
# The auto-bump-on-workspace-edit trigger now lives in
|
||||
# `.gitea/workflows/publish-runtime-autobump.yml`. That file computes the
|
||||
# next version from PyPI's latest and pushes a `runtime-v$VERSION` tag,
|
||||
# which THIS file then picks up via the tags-only trigger below.
|
||||
#
|
||||
# This decoupling means Gitea's path-vs-tag evaluator never has to
|
||||
# disambiguate — each file has a single unambiguous trigger shape.
|
||||
#
|
||||
# PyPI publishing: requires PYPI_TOKEN repository secret (or org-level secret).
|
||||
# Set via: repo Settings → Actions → Variables and Secrets → New Secret.
|
||||
# The token should be a PyPI API token scoped to molecule-ai-workspace-runtime.
|
||||
#
|
||||
# The DISPATCH_TOKEN cascade (git push to template repos) is unchanged —
|
||||
# it uses the Gitea API directly and was already Gitea-compatible.
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "runtime-v*"
|
||||
workflow_dispatch:
|
||||
# 2026-05-11 (root cause of #351 / 0 runs ever):
|
||||
# Gitea 1.22.6's workflow parser rejects `workflow_dispatch.inputs.version`
|
||||
# with "unknown on type" — it mis-treats the inputs sub-keys as top-level
|
||||
# `on:` event types. Log line:
|
||||
# actions/workflows.go:DetectWorkflows() [W] ignore invalid workflow
|
||||
# "publish-runtime.yml": unknown on type: map["version": {...}]
|
||||
# That `[W] ignore invalid workflow` is silent UX — the workflow never
|
||||
# registers, so it never fires for ANY event (push.tags included).
|
||||
# Removing the inputs block restores parsing. Manual dispatch from the
|
||||
# Gitea UI now triggers the PyPI auto-bump fallback in `Derive version`
|
||||
# below (no `inputs.version` to read).
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
# Serialize publishes so two concurrent tag pushes don't both compute
|
||||
# "latest+1" and race on PyPI upload. The second one waits.
|
||||
concurrency:
|
||||
group: publish-runtime
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
# Dedicated publish/release lane (internal#462 / #394 / #399). Ship
|
||||
# path (on: push tag runtime-v*) — reserved capacity, never FIFO
|
||||
# behind PR-CI. `publish` resolves only to molecule-runner-publish-*.
|
||||
runs-on: publish
|
||||
outputs:
|
||||
version: ${{ steps.version.outputs.version }}
|
||||
wheel_sha256: ${{ steps.wheel_hash.outputs.wheel_sha256 }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: "3.11"
|
||||
cache: pip
|
||||
|
||||
- name: Derive version (tag or PyPI auto-bump)
|
||||
id: version
|
||||
run: |
|
||||
if echo "$GITHUB_REF" | grep -q "^refs/tags/runtime-v"; then
|
||||
# Tag is `runtime-vX.Y.Z` — strip the prefix.
|
||||
VERSION="${GITHUB_REF#refs/tags/runtime-v}"
|
||||
else
|
||||
# workflow_dispatch path (no inputs supported on Gitea 1.22.6) or
|
||||
# any other non-tag trigger: derive from PyPI latest + patch bump.
|
||||
LATEST=$(curl -fsS --retry 3 https://pypi.org/pypi/molecule-ai-workspace-runtime/json \
|
||||
| python -c "import sys,json; print(json.load(sys.stdin)['info']['version'])")
|
||||
MAJOR=$(echo "$LATEST" | cut -d. -f1)
|
||||
MINOR=$(echo "$LATEST" | cut -d. -f2)
|
||||
PATCH=$(echo "$LATEST" | cut -d. -f3)
|
||||
VERSION="${MAJOR}.${MINOR}.$((PATCH+1))"
|
||||
echo "Auto-bumped from PyPI latest $LATEST -> $VERSION"
|
||||
fi
|
||||
if ! echo "$VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+(\.dev[0-9]+|rc[0-9]+|a[0-9]+|b[0-9]+|\.post[0-9]+)?$'; then
|
||||
echo "::error::version $VERSION does not match PEP 440"
|
||||
exit 1
|
||||
fi
|
||||
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
|
||||
echo "Publishing molecule-ai-workspace-runtime $VERSION"
|
||||
|
||||
- name: Install build tooling
|
||||
run: pip install build twine
|
||||
|
||||
- name: Build package from workspace/
|
||||
run: |
|
||||
python scripts/build_runtime_package.py \
|
||||
--version "${{ steps.version.outputs.version }}" \
|
||||
--out "${{ runner.temp }}/runtime-build"
|
||||
|
||||
- name: Build wheel + sdist
|
||||
working-directory: ${{ runner.temp }}/runtime-build
|
||||
run: python -m build
|
||||
|
||||
- name: Capture wheel SHA256 for cascade content-verification
|
||||
id: wheel_hash
|
||||
working-directory: ${{ runner.temp }}/runtime-build
|
||||
run: |
|
||||
set -eu
|
||||
WHEEL=$(ls dist/*.whl 2>/dev/null | head -1)
|
||||
if [ -z "$WHEEL" ]; then
|
||||
echo "::error::No .whl in dist/ — \`python -m build\` must have failed silently"
|
||||
exit 1
|
||||
fi
|
||||
HASH=$(sha256sum "$WHEEL" | awk '{print $1}')
|
||||
echo "wheel_sha256=${HASH}" >> "$GITHUB_OUTPUT"
|
||||
echo "Local wheel SHA256 (pre-upload): ${HASH}"
|
||||
echo "Wheel filename: $(basename "$WHEEL")"
|
||||
|
||||
- name: Verify package contents (sanity)
|
||||
working-directory: ${{ runner.temp }}/runtime-build
|
||||
run: |
|
||||
python -m twine check dist/*
|
||||
python -m venv /tmp/smoke
|
||||
/tmp/smoke/bin/pip install --quiet dist/*.whl
|
||||
/tmp/smoke/bin/python "$GITHUB_WORKSPACE/scripts/wheel_smoke.py"
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
# RFC#596 (2026-05-19): Gitea PyPI registry as PRIMARY, PyPI as
|
||||
# best-effort fallback. Eliminates the SPOF that caused the
|
||||
# 2026-05-19 P0 (PyPI abuse-block #593 + Railway outage #595).
|
||||
#
|
||||
# Order is inverted intentionally:
|
||||
# 1. Gitea FIRST — must succeed (our internal SSOT).
|
||||
# 2. PyPI SECOND — best-effort, non-fatal on failure (courtesy
|
||||
# mirror; our consumers don't depend on it after Phase 4
|
||||
# template Dockerfile updates).
|
||||
#
|
||||
# Endpoint shape (verified live in RFC#596 Phase 5):
|
||||
# POST https://git.moleculesai.app/api/packages/molecule-ai/pypi/
|
||||
# HTTP Basic auth: username = gitea username, password = PAT with
|
||||
# `write:package` scope. Returns 201 Created on success.
|
||||
# ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
- name: Publish to Gitea PyPI registry (PRIMARY)
|
||||
id: gitea_publish
|
||||
working-directory: ${{ runner.temp }}/runtime-build
|
||||
env:
|
||||
# MOLECULE_PYPI_GITEA_PUBLISHER_USER: Gitea username for the publisher
|
||||
# persona (must own a token with `write:package` scope).
|
||||
# Provisioned in RFC#596 Phase 3 (operator-config PR).
|
||||
# NOTE: secret name MUST NOT start with `GITEA_` or `GITHUB_` —
|
||||
# Gitea 1.22.6 reserves those prefixes for built-in env vars and
|
||||
# rejects repo-secret PUT with HTTP 400 / "invalid secret name".
|
||||
# Empirically reproduced 2026-05-19 against
|
||||
# `/repos/molecule-ai/molecule-core/actions/secrets/GITEA_*`.
|
||||
MOLECULE_PYPI_GITEA_PUBLISHER_USER: ${{ secrets.MOLECULE_PYPI_GITEA_PUBLISHER_USER }}
|
||||
# MOLECULE_PYPI_GITEA_PUBLISHER_TOKEN: PAT for the publisher persona,
|
||||
# `write:package` scope on molecule-ai org.
|
||||
# Synced from Infisical /ci/gitea-pypi-publisher (RFC#596 Phase 3).
|
||||
MOLECULE_PYPI_GITEA_PUBLISHER_TOKEN: ${{ secrets.MOLECULE_PYPI_GITEA_PUBLISHER_TOKEN }}
|
||||
run: |
|
||||
set -eu
|
||||
if [ -z "${MOLECULE_PYPI_GITEA_PUBLISHER_TOKEN:-}" ] || [ -z "${MOLECULE_PYPI_GITEA_PUBLISHER_USER:-}" ]; then
|
||||
echo "::error::MOLECULE_PYPI_GITEA_PUBLISHER_USER / MOLECULE_PYPI_GITEA_PUBLISHER_TOKEN secrets are not set."
|
||||
echo "::error::Provision them via the RFC#596 Phase 3 operator-config sync script."
|
||||
echo "::error::Gitea is the PRIMARY index per RFC#596 — publish job aborts here, NOT after PyPI."
|
||||
exit 1
|
||||
fi
|
||||
python -m twine upload \
|
||||
--verbose \
|
||||
--repository-url "https://git.moleculesai.app/api/packages/molecule-ai/pypi/" \
|
||||
--username "$MOLECULE_PYPI_GITEA_PUBLISHER_USER" \
|
||||
--password "$MOLECULE_PYPI_GITEA_PUBLISHER_TOKEN" \
|
||||
dist/*
|
||||
echo "gitea_status=success" >> "$GITHUB_OUTPUT"
|
||||
echo "gitea_url=https://git.moleculesai.app/api/packages/molecule-ai/pypi/simple/molecule-ai-workspace-runtime" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Publish to PyPI (FALLBACK, best-effort)
|
||||
id: pypi_publish
|
||||
# working-directory matches the preceding Build/Verify steps. Without
|
||||
# this, twine runs from the default workspace checkout dir where
|
||||
# `dist/` doesn't exist and fails with:
|
||||
# ERROR InvalidDistribution: Cannot find file (or expand pattern): 'dist/*'
|
||||
# Caught on the first-ever successful dispatch of this workflow
|
||||
# (run 5097, 2026-05-11 02:08Z) — every other step in the publish
|
||||
# job already had this working-directory; Publish was missing it.
|
||||
#
|
||||
# RFC#596: this step is `continue-on-error: true` because PyPI is
|
||||
# NO LONGER the primary index. PyPI 403/timeout/abuse-block does
|
||||
# NOT block the publish — Gitea already has the wheel.
|
||||
continue-on-error: true
|
||||
working-directory: ${{ runner.temp }}/runtime-build
|
||||
env:
|
||||
# PYPI_TOKEN: repository secret scoped to molecule-ai-workspace-runtime.
|
||||
# Set via: Settings → Actions → Variables and Secrets → New Secret.
|
||||
# Format: pypi-XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
||||
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
|
||||
run: |
|
||||
if [ -z "$PYPI_TOKEN" ]; then
|
||||
echo "::warning::PYPI_TOKEN secret is not set — skipping PyPI mirror publish (non-fatal per RFC#596)."
|
||||
echo "pypi_status=skipped_no_token" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
if python -m twine upload \
|
||||
--verbose \
|
||||
--repository pypi \
|
||||
--username __token__ \
|
||||
--password "$PYPI_TOKEN" \
|
||||
dist/*; then
|
||||
echo "pypi_status=success" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
rc=$?
|
||||
echo "::warning::PyPI mirror publish failed (exit $rc). Non-fatal per RFC#596 — Gitea has the wheel."
|
||||
echo "pypi_status=failed_exit_$rc" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
echo "pypi_url=https://pypi.org/project/molecule-ai-workspace-runtime/${{ steps.version.outputs.version }}/" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Publish job summary (Gitea + PyPI status)
|
||||
if: always()
|
||||
run: |
|
||||
{
|
||||
echo "## publish-runtime $(date -u +%FT%TZ)"
|
||||
echo
|
||||
echo "**Version:** \`${{ steps.version.outputs.version }}\`"
|
||||
echo "**Wheel SHA256:** \`${{ steps.wheel_hash.outputs.wheel_sha256 }}\`"
|
||||
echo
|
||||
echo "### Indexes"
|
||||
echo
|
||||
echo "| Index | Status | URL |"
|
||||
echo "|---------|-------------------------------------------------|-----|"
|
||||
echo "| Gitea (PRIMARY) | ${{ steps.gitea_publish.outputs.gitea_status || 'failed' }} | ${{ steps.gitea_publish.outputs.gitea_url || '—' }} |"
|
||||
echo "| PyPI (fallback) | ${{ steps.pypi_publish.outputs.pypi_status || 'failed' }} | ${{ steps.pypi_publish.outputs.pypi_url || '—' }} |"
|
||||
echo
|
||||
echo "Per RFC#596: Gitea is the contract. PyPI is best-effort."
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
|
||||
cascade:
|
||||
needs: publish
|
||||
# Publish/release lane (internal#462) — downstream of the runtime
|
||||
# publish ship job; keep it on the reserved lane too.
|
||||
runs-on: publish
|
||||
steps:
|
||||
- name: Wait for PyPI to propagate the new version
|
||||
env:
|
||||
RUNTIME_VERSION: ${{ needs.publish.outputs.version }}
|
||||
EXPECTED_SHA256: ${{ needs.publish.outputs.wheel_sha256 }}
|
||||
run: |
|
||||
set -eu
|
||||
if [ -z "$EXPECTED_SHA256" ]; then
|
||||
echo "::error::publish job did not expose wheel_sha256 — cannot verify wheel content. Refusing to fan out cascade."
|
||||
exit 1
|
||||
fi
|
||||
# NOTE (RFC#596 follow-up): this propagation probe still resolves
|
||||
# against PyPI's default index. After RFC#596 Phase 4 lands and
|
||||
# consumers pull from Gitea first, this probe should be rewritten
|
||||
# to verify the Gitea simple/ endpoint serves the new wheel
|
||||
# (PyPI may be best-effort-failed and the cascade should still
|
||||
# fan out, since templates will pull from Gitea). Tracked in #596.
|
||||
python -m venv /tmp/propagation-probe
|
||||
PROBE=/tmp/propagation-probe/bin
|
||||
$PROBE/pip install --upgrade --quiet pip
|
||||
for i in $(seq 1 30); do
|
||||
if $PROBE/pip install \
|
||||
--quiet \
|
||||
--no-cache-dir \
|
||||
--force-reinstall \
|
||||
--no-deps \
|
||||
"molecule-ai-workspace-runtime==${RUNTIME_VERSION}" \
|
||||
>/dev/null 2>&1; then
|
||||
INSTALLED=$($PROBE/pip show molecule-ai-workspace-runtime 2>/dev/null \
|
||||
| awk -F': ' '/^Version:/{print $2}')
|
||||
if [ "$INSTALLED" = "$RUNTIME_VERSION" ]; then
|
||||
echo "✓ PyPI resolved $RUNTIME_VERSION (install check)"
|
||||
break
|
||||
fi
|
||||
fi
|
||||
if [ $i -eq 30 ]; then
|
||||
echo "::error::pip install --no-cache-dir molecule-ai-workspace-runtime==${RUNTIME_VERSION} never resolved within ~5 min."
|
||||
echo "::error::Refusing to fan out cascade against a potentially stale PyPI index."
|
||||
exit 1
|
||||
fi
|
||||
echo " [$i/30] waiting for PyPI to propagate ${RUNTIME_VERSION}..."
|
||||
sleep 4
|
||||
done
|
||||
|
||||
# Stage (b): download wheel + SHA256 compare against what we built.
|
||||
# Catches Fastly stale-content serving old bytes under a new version URL.
|
||||
#
|
||||
# Caught run 5196 (first-ever successful publish, 2026-05-11): the
|
||||
# previous one-liner `HASH=$(pip download ... && sha256sum ...)`
|
||||
# captured pip's stdout (`Collecting molecule-ai-workspace-runtime
|
||||
# ==X.Y.Z`) into HASH, then the SHA comparison failed against the
|
||||
# leaked `Collecting...` string. `2>/dev/null` silences stderr but
|
||||
# NOT stdout; pip writes its progress to stdout by default.
|
||||
# Fix: split into two steps, silence pip's stdout explicitly, capture
|
||||
# only sha256sum's output into HASH.
|
||||
python -m pip download \
|
||||
--no-deps \
|
||||
--no-cache-dir \
|
||||
--dest /tmp/wheel-probe \
|
||||
--quiet \
|
||||
"molecule-ai-workspace-runtime==${RUNTIME_VERSION}" \
|
||||
>/dev/null 2>&1
|
||||
HASH=$(sha256sum /tmp/wheel-probe/*.whl | awk '{print $1}')
|
||||
if [ "$HASH" != "$EXPECTED_SHA256" ]; then
|
||||
echo "::error::PyPI propagated $RUNTIME_VERSION but wheel content SHA256 mismatch."
|
||||
echo "::error::Expected: $EXPECTED_SHA256"
|
||||
echo "::error::Got: $HASH"
|
||||
echo "::error::Fastly may be serving stale content. Refusing to fan out cascade."
|
||||
exit 1
|
||||
fi
|
||||
echo "✓ PyPI CDN verified (SHA256 match)"
|
||||
|
||||
- name: Fan out via push to .runtime-version
|
||||
env:
|
||||
# Gitea PAT with write:repository scope on the 8 cascade-active
|
||||
# template repos. Used for git push to each template repo's main
|
||||
# branch, which trips their `on: push: branches: [main]` trigger
|
||||
# on publish-image.yml.
|
||||
DISPATCH_TOKEN: ${{ secrets.DISPATCH_TOKEN }}
|
||||
RUNTIME_VERSION: ${{ needs.publish.outputs.version }}
|
||||
run: |
|
||||
set +e # don't abort on a single repo failure — collect them all
|
||||
|
||||
if [ -z "$DISPATCH_TOKEN" ]; then
|
||||
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
||||
echo "::warning::DISPATCH_TOKEN secret not set — skipping cascade."
|
||||
echo "::warning::set it at Settings → Actions → Variables and Secrets → New Secret."
|
||||
exit 0
|
||||
fi
|
||||
echo "::error::DISPATCH_TOKEN secret missing — cascade cannot fan out."
|
||||
echo "::error::PyPI was published, but the 8 template repos will NOT pick up the new version."
|
||||
exit 1
|
||||
fi
|
||||
VERSION="$RUNTIME_VERSION"
|
||||
if [ -z "$VERSION" ]; then
|
||||
echo "::error::publish job did not expose a version output"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
GITEA_URL="${GITEA_URL:-https://git.moleculesai.app}"
|
||||
# Keep in lockstep with manifest.json workspace_templates (suffix-stripped).
|
||||
# Guarded by scripts/check-cascade-list-vs-manifest.sh (cascade-list-drift-gate).
|
||||
# 2026-05-19: pruned crewai/deepagents/gemini-cli — not in manifest.
|
||||
TEMPLATES="claude-code hermes openclaw codex langgraph autogen"
|
||||
FAILED=""
|
||||
SKIPPED=""
|
||||
|
||||
git config --global user.name "publish-runtime cascade"
|
||||
git config --global user.email "publish-runtime@moleculesai.app"
|
||||
|
||||
WORKDIR="$(mktemp -d)"
|
||||
for tpl in $TEMPLATES; do
|
||||
REPO="molecule-ai/molecule-ai-workspace-template-$tpl"
|
||||
CLONE="$WORKDIR/$tpl"
|
||||
|
||||
HTTP=$(curl -sS -o /dev/null -w "%{http_code}" \
|
||||
-H "Authorization: token $DISPATCH_TOKEN" \
|
||||
"$GITEA_URL/api/v1/repos/$REPO/contents/.github/workflows/publish-image.yml")
|
||||
if [ "$HTTP" = "404" ]; then
|
||||
echo "↷ $tpl has no publish-image.yml — soft-skip"
|
||||
SKIPPED="$SKIPPED $tpl"
|
||||
continue
|
||||
fi
|
||||
|
||||
attempt=0
|
||||
success=false
|
||||
while [ $attempt -lt 3 ]; do
|
||||
attempt=$((attempt + 1))
|
||||
rm -rf "$CLONE"
|
||||
if ! git clone --depth=1 \
|
||||
"https://x-access-token:${DISPATCH_TOKEN}@${GITEA_URL#https://}/$REPO.git" \
|
||||
"$CLONE" >/tmp/clone.log 2>&1; then
|
||||
echo "::warning::clone $tpl attempt $attempt failed: $(tail -n3 /tmp/clone.log)"
|
||||
sleep 2
|
||||
continue
|
||||
fi
|
||||
|
||||
cd "$CLONE"
|
||||
echo "$VERSION" > .runtime-version
|
||||
|
||||
if git diff --quiet -- .runtime-version; then
|
||||
echo "✓ $tpl already at $VERSION — no commit needed"
|
||||
success=true
|
||||
cd - >/dev/null
|
||||
break
|
||||
fi
|
||||
|
||||
git add .runtime-version
|
||||
git commit -m "chore: pin runtime to $VERSION (publish-runtime cascade)" \
|
||||
-m "Co-Authored-By: publish-runtime cascade <publish-runtime@moleculesai.app>" \
|
||||
>/dev/null
|
||||
|
||||
if git push origin HEAD:main >/tmp/push.log 2>&1; then
|
||||
echo "✓ $tpl pushed $VERSION on attempt $attempt"
|
||||
success=true
|
||||
cd - >/dev/null
|
||||
break
|
||||
fi
|
||||
|
||||
echo "::warning::push $tpl attempt $attempt failed, pull-rebasing"
|
||||
git pull --rebase origin main >/tmp/rebase.log 2>&1 || true
|
||||
cd - >/dev/null
|
||||
done
|
||||
|
||||
if [ "$success" != "true" ]; then
|
||||
FAILED="$FAILED $tpl"
|
||||
fi
|
||||
done
|
||||
rm -rf "$WORKDIR"
|
||||
|
||||
if [ -n "$FAILED" ]; then
|
||||
echo "::error::Cascade incomplete after 3 retries each. Failed:$FAILED"
|
||||
exit 1
|
||||
fi
|
||||
if [ -n "$SKIPPED" ]; then
|
||||
echo "Cascade complete: pinned $VERSION. Soft-skipped (no publish-image.yml):$SKIPPED"
|
||||
else
|
||||
echo "Cascade complete: $VERSION pinned across all manifest workspace_templates."
|
||||
fi
|
||||
@@ -25,8 +25,11 @@ name: publish-workspace-server-image
|
||||
# staging-<sha>. Set repo variable or secret PROD_AUTO_DEPLOY_DISABLED=true
|
||||
# to stop production rollout while keeping image publishing enabled.
|
||||
#
|
||||
# ECR target: 153263036946.dkr.ecr.us-east-2.amazonaws.com/molecule-ai/*
|
||||
# Primary ECR target: 153263036946.dkr.ecr.us-east-2.amazonaws.com/molecule-ai/*
|
||||
# Optional staging tenant mirror target:
|
||||
# 004947743811.dkr.ecr.us-east-2.amazonaws.com/molecule-ai/platform-tenant
|
||||
# Required secrets: AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AUTO_SYNC_TOKEN
|
||||
# Optional secrets: AWS_STAGING_ECR_ACCESS_KEY_ID, AWS_STAGING_ECR_SECRET_ACCESS_KEY
|
||||
#
|
||||
# mc#711: Docker daemon not accessible on ubuntu-latest runner (molecule-canonical-1
|
||||
# shows client-only in `docker info` — daemon not running). DinD mount is present but
|
||||
@@ -65,6 +68,7 @@ env:
|
||||
# use below in this repo's staging-verify.yml.
|
||||
IMAGE_NAME: ${{ vars.ECR_REGISTRY || '153263036946.dkr.ecr.us-east-2.amazonaws.com' }}/molecule-ai/platform
|
||||
TENANT_IMAGE_NAME: ${{ vars.ECR_REGISTRY || '153263036946.dkr.ecr.us-east-2.amazonaws.com' }}/molecule-ai/platform-tenant
|
||||
STAGING_TENANT_IMAGE_NAME: ${{ vars.STAGING_ECR_REGISTRY || '004947743811.dkr.ecr.us-east-2.amazonaws.com' }}/molecule-ai/platform-tenant
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
@@ -135,6 +139,18 @@ jobs:
|
||||
run: |
|
||||
echo "sha=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
# Keep Buildx state inside the job temp dir. The publish runner's
|
||||
# inherited DOCKER_CONFIG can point at a host-owned ECR config path
|
||||
# (/home/hongming/.docker-ecr), which caused setup-buildx-action to
|
||||
# fail before image build with EACCES creating buildx/certs.
|
||||
- name: Prepare writable Docker config
|
||||
run: |
|
||||
set -euo pipefail
|
||||
export DOCKER_CONFIG="$RUNNER_TEMP/docker-config"
|
||||
mkdir -p "$DOCKER_CONFIG/buildx/certs"
|
||||
echo "DOCKER_CONFIG=$DOCKER_CONFIG" >> "$GITHUB_ENV"
|
||||
docker buildx version
|
||||
|
||||
# Build + push platform image (inline ECR auth — mirrors the operator-host
|
||||
# approach; credentials come from GITHUB_SECRET_AWS_ACCESS_KEY_ID /
|
||||
# GITHUB_SECRET_AWS_SECRET_ACCESS_KEY in Gitea Actions).
|
||||
@@ -170,21 +186,46 @@ jobs:
|
||||
--push .
|
||||
|
||||
# Build + push tenant image (Go platform + Next.js canvas in one image).
|
||||
# When staging ECR publisher credentials are configured, push the same
|
||||
# build to the staging account too so fresh staging/E2E tenants can pull
|
||||
# without cross-account ECR permissions.
|
||||
- name: Build & push tenant image to ECR (staging-<sha> + staging-latest)
|
||||
env:
|
||||
TENANT_IMAGE_NAME: ${{ env.TENANT_IMAGE_NAME }}
|
||||
STAGING_TENANT_IMAGE_NAME: ${{ env.STAGING_TENANT_IMAGE_NAME }}
|
||||
TAG_SHA: staging-${{ steps.tags.outputs.sha }}
|
||||
TAG_LATEST: staging-latest
|
||||
GIT_SHA: ${{ github.sha }}
|
||||
REPO: ${{ github.repository }}
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_STAGING_ECR_ACCESS_KEY_ID: ${{ secrets.AWS_STAGING_ECR_ACCESS_KEY_ID }}
|
||||
AWS_STAGING_ECR_SECRET_ACCESS_KEY: ${{ secrets.AWS_STAGING_ECR_SECRET_ACCESS_KEY }}
|
||||
AWS_DEFAULT_REGION: us-east-2
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ECR_REGISTRY="${TENANT_IMAGE_NAME%%/*}"
|
||||
aws ecr get-login-password --region us-east-2 | \
|
||||
docker login --username AWS --password-stdin "${ECR_REGISTRY}"
|
||||
|
||||
build_tags=(
|
||||
--tag "${TENANT_IMAGE_NAME}:${TAG_SHA}"
|
||||
--tag "${TENANT_IMAGE_NAME}:${TAG_LATEST}"
|
||||
)
|
||||
if [ -n "${AWS_STAGING_ECR_ACCESS_KEY_ID:-}" ] && [ -n "${AWS_STAGING_ECR_SECRET_ACCESS_KEY:-}" ]; then
|
||||
STAGING_ECR_REGISTRY="${STAGING_TENANT_IMAGE_NAME%%/*}"
|
||||
AWS_ACCESS_KEY_ID="${AWS_STAGING_ECR_ACCESS_KEY_ID}" \
|
||||
AWS_SECRET_ACCESS_KEY="${AWS_STAGING_ECR_SECRET_ACCESS_KEY}" \
|
||||
aws ecr get-login-password --region us-east-2 | \
|
||||
docker login --username AWS --password-stdin "${STAGING_ECR_REGISTRY}"
|
||||
build_tags+=(
|
||||
--tag "${STAGING_TENANT_IMAGE_NAME}:${TAG_SHA}"
|
||||
--tag "${STAGING_TENANT_IMAGE_NAME}:${TAG_LATEST}"
|
||||
)
|
||||
else
|
||||
echo "::notice::Skipping staging ECR tenant push; AWS_STAGING_ECR_ACCESS_KEY_ID/AWS_STAGING_ECR_SECRET_ACCESS_KEY are not configured."
|
||||
fi
|
||||
|
||||
docker buildx build \
|
||||
--file ./workspace-server/Dockerfile.tenant \
|
||||
--build-arg NEXT_PUBLIC_PLATFORM_URL= \
|
||||
@@ -193,8 +234,7 @@ jobs:
|
||||
--label "org.opencontainers.image.revision=${GIT_SHA}" \
|
||||
--label "org.opencontainers.image.created=$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
|
||||
--label "molecule.workflow.run_id=${GITHUB_RUN_ID}" \
|
||||
--tag "${TENANT_IMAGE_NAME}:${TAG_SHA}" \
|
||||
--tag "${TENANT_IMAGE_NAME}:${TAG_LATEST}" \
|
||||
"${build_tags[@]}" \
|
||||
--push .
|
||||
|
||||
# bp-exempt: production deploy side-effect; merge is gated by CI / all-required and this job waits for push CI before acting.
|
||||
|
||||
@@ -1,101 +0,0 @@
|
||||
name: Runtime Pin Compatibility
|
||||
|
||||
# Ported from .github/workflows/runtime-pin-compat.yml on 2026-05-11 per
|
||||
# RFC internal#219 §1 sweep.
|
||||
#
|
||||
# Differences from the GitHub version:
|
||||
# - Dropped `merge_group:` (no Gitea merge queue) and
|
||||
# `workflow_dispatch:` (no inputs, but the trigger itself is
|
||||
# parser-rejected when inputs are absent in some Gitea 1.22.x
|
||||
# builds; safest to drop entirely — manual runs go via cron-trigger
|
||||
# bump or push-with-paths-filter).
|
||||
# - on.paths references .gitea/workflows/runtime-pin-compat.yml (this
|
||||
# file) instead of the .github/ one.
|
||||
# - Workflow-level env.GITHUB_SERVER_URL set.
|
||||
# - `continue-on-error: true` on the job (RFC §1 contract).
|
||||
#
|
||||
# CI gate that prevents the 5-hour staging outage from 2026-04-24 from
|
||||
# recurring (controlplane#253). The original failure mode:
|
||||
# 1. molecule-ai-workspace-runtime 0.1.13 declared `a2a-sdk<1.0` in its
|
||||
# requires_dist metadata (incorrect — it actually imports
|
||||
# a2a.server.routes which only exists in a2a-sdk 1.0+)
|
||||
# 2. `pip install molecule-ai-workspace-runtime` resolved cleanly
|
||||
# 3. `from molecule_runtime.main import main_sync` raised ImportError
|
||||
# 4. Every tenant workspace crashed; the canary tenant caught it but
|
||||
# only after 5 hours of degraded staging
|
||||
#
|
||||
# This workflow installs the CURRENTLY PUBLISHED runtime from PyPI on
|
||||
# top of `workspace/requirements.txt` and smoke-imports. Catches:
|
||||
# - Upstream PyPI yanks
|
||||
# - Bad re-releases of molecule-ai-workspace-runtime
|
||||
# - Already-shipped wheels that stop importing because a transitive
|
||||
# dep moved underneath
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main, staging]
|
||||
paths:
|
||||
# Narrow filter: pypi-latest is sensitive only to changes that
|
||||
# affect what we're INSTALLING (requirements.txt) or WHAT THE
|
||||
# CHECK ITSELF DOES (this workflow file). Edits to workspace/
|
||||
# source code don't change what's on PyPI right now, so they
|
||||
# don't change this gate's verdict.
|
||||
- 'workspace/requirements.txt'
|
||||
- '.gitea/workflows/runtime-pin-compat.yml'
|
||||
pull_request:
|
||||
branches: [main, staging]
|
||||
paths:
|
||||
- 'workspace/requirements.txt'
|
||||
- '.gitea/workflows/runtime-pin-compat.yml'
|
||||
# Daily catch for upstream PyPI publishes that break the pin combo
|
||||
# without any change in our repo (e.g. someone re-yanks an a2a-sdk
|
||||
# release or molecule-ai-workspace-runtime publishes a bad bump).
|
||||
schedule:
|
||||
- cron: '0 13 * * *' # 06:00 PT
|
||||
|
||||
env:
|
||||
GITHUB_SERVER_URL: https://git.moleculesai.app
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
pypi-latest-install:
|
||||
name: PyPI-latest install + import smoke
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking
|
||||
# the PR. Follow-up PR flips this off after surfaced defects are
|
||||
# triaged.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: '3.11'
|
||||
cache: pip
|
||||
cache-dependency-path: workspace/requirements.txt
|
||||
- name: Install runtime + workspace requirements
|
||||
# Install order is load-bearing: install the runtime FIRST so pip
|
||||
# honors whatever a2a-sdk constraint the runtime metadata declares
|
||||
# (this is the surface that broke in 2026-04-24 — runtime declared
|
||||
# `a2a-sdk<1.0` but actually needed >=1.0). The follow-up install
|
||||
# of workspace/requirements.txt then upgrades a2a-sdk to the
|
||||
# constraint our runtime image actually pins. The import smoke
|
||||
# below verifies the upgraded combination is consistent.
|
||||
run: |
|
||||
python -m venv /tmp/venv
|
||||
/tmp/venv/bin/pip install --upgrade pip
|
||||
/tmp/venv/bin/pip install molecule-ai-workspace-runtime
|
||||
/tmp/venv/bin/pip install -r workspace/requirements.txt
|
||||
/tmp/venv/bin/pip show molecule-ai-workspace-runtime a2a-sdk \
|
||||
| grep -E '^(Name|Version):'
|
||||
- name: Smoke import — fail if metadata declares deps that don't satisfy real imports
|
||||
# WORKSPACE_ID is validated at import time by platform_auth.py — EC2
|
||||
# user-data sets it from the cloud-init template; set a placeholder
|
||||
# here so the import smoke doesn't trip on the env-var guard.
|
||||
env:
|
||||
WORKSPACE_ID: 00000000-0000-0000-0000-000000000001
|
||||
run: |
|
||||
/tmp/venv/bin/python -c "from molecule_runtime.main import main_sync; print('runtime imports OK')"
|
||||
@@ -1,150 +0,0 @@
|
||||
name: Runtime PR-Built Compatibility
|
||||
|
||||
# Ported from .github/workflows/runtime-prbuild-compat.yml on 2026-05-11
|
||||
# per RFC internal#219 §1 sweep.
|
||||
#
|
||||
# Differences from the GitHub version:
|
||||
# - Dropped `merge_group:` (no Gitea merge queue) and `workflow_dispatch:`
|
||||
# (Gitea 1.22.6 parser-rejects workflow_dispatch with inputs and is
|
||||
# finicky without them).
|
||||
# - `dorny/paths-filter@v4` replaced with inline `git diff` (per PR#372
|
||||
# pattern for ci.yml port).
|
||||
# - on.paths references .gitea/workflows/runtime-prbuild-compat.yml.
|
||||
# - Workflow-level env.GITHUB_SERVER_URL set.
|
||||
# - `continue-on-error: true` on every job (RFC §1 contract).
|
||||
#
|
||||
# Companion to `runtime-pin-compat.yml`. That workflow tests what's
|
||||
# CURRENTLY PUBLISHED on PyPI; this workflow tests what WOULD BE
|
||||
# PUBLISHED if THIS PR merges.
|
||||
#
|
||||
# Why two workflows: the chicken-and-egg #128 fix added a "PR-built
|
||||
# wheel" job to the original runtime-pin-compat.yml, but both jobs
|
||||
# shared a `paths:` filter that was the union of their needs
|
||||
# (`workspace/**`). That meant the PyPI-latest job ran on every doc
|
||||
# edit even though the upstream PyPI artifact can't change with our
|
||||
# workspace/ source. Splitting the two means each gets a narrow
|
||||
# `paths:` filter that matches the inputs it actually depends on.
|
||||
#
|
||||
# Catches the failure mode where a PR adds an import requiring a newer
|
||||
# SDK than `workspace/requirements.txt` pins:
|
||||
# 1. Pip resolves the existing PyPI wheel + the old SDK pin -> smoke
|
||||
# passes (it imports the OLD main.py from the wheel, not the PR's
|
||||
# new main.py).
|
||||
# 2. Merge -> publish-runtime.yml ships a wheel WITH the new import.
|
||||
# 3. Tenant images redeploy -> all crash on first boot with ImportError.
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main, staging]
|
||||
pull_request:
|
||||
branches: [main, staging]
|
||||
|
||||
env:
|
||||
GITHUB_SERVER_URL: https://git.moleculesai.app
|
||||
|
||||
concurrency:
|
||||
# event_name + sha keeps PR sync and the subsequent staging push on the
|
||||
# same SHA from cancelling each other (per feedback_concurrency_group_per_sha).
|
||||
group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.event.pull_request.head.sha || github.sha }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
detect-changes:
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
outputs:
|
||||
wheel: ${{ steps.decide.outputs.wheel }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- id: decide
|
||||
run: |
|
||||
# Inline replacement for dorny/paths-filter — same pattern
|
||||
# PR#372's ci.yml port used. Diffs against the PR base or the
|
||||
# previous push SHA, then matches against the wheel-relevant
|
||||
# path set.
|
||||
#
|
||||
# NOTE: Gitea Actions does not expose github.event.before as a
|
||||
# shell environment variable. The ${{ github.event.before }} template
|
||||
# expression works inside YAML run: blocks but is evaluated to an
|
||||
# empty string for push events, making the ${VAR:-fallback} always
|
||||
# use the fallback. Use GITHUB_EVENT_BEFORE instead — it IS set in
|
||||
# the runner's shell environment for push events.
|
||||
BASE=""
|
||||
if [ "${{ github.event_name }}" = "pull_request" ]; then
|
||||
BASE="${{ github.event.pull_request.base.sha }}"
|
||||
elif [ -n "$GITHUB_EVENT_BEFORE" ]; then
|
||||
BASE="$GITHUB_EVENT_BEFORE"
|
||||
fi
|
||||
if [ -z "$BASE" ] || echo "$BASE" | grep -qE '^0+$'; then
|
||||
# New branch or no previous SHA: treat as wheel-relevant.
|
||||
echo "wheel=true" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
if ! timeout 30 git cat-file -e "$BASE" 2>/dev/null; then
|
||||
git fetch --depth=1 origin "$BASE" 2>/dev/null || true
|
||||
fi
|
||||
if ! timeout 30 git cat-file -e "$BASE" 2>/dev/null; then
|
||||
echo "wheel=true" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
CHANGED=$(git diff --name-only "$BASE" HEAD)
|
||||
if echo "$CHANGED" | grep -qE '^(workspace/|scripts/build_runtime_package\.py$|scripts/wheel_smoke\.py$|\.gitea/workflows/runtime-prbuild-compat\.yml$)'; then
|
||||
echo "wheel=true" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "wheel=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
# ONE job (no job-level `if:`) that always runs and reports under the
|
||||
# required-check name `PR-built wheel + import smoke`. Real work is
|
||||
# gated per-step on `needs.detect-changes.outputs.wheel`.
|
||||
local-build-install:
|
||||
needs: detect-changes
|
||||
name: PR-built wheel + import smoke
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: No-op pass (paths filter excluded this commit)
|
||||
if: needs.detect-changes.outputs.wheel != 'true'
|
||||
run: |
|
||||
echo "No workspace/ / scripts/{build_runtime_package,wheel_smoke}.py / workflow changes — wheel gate satisfied without rebuilding."
|
||||
echo "::notice::PR-built wheel + import smoke no-op pass (paths filter excluded this commit)."
|
||||
- if: needs.detect-changes.outputs.wheel == 'true'
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- if: needs.detect-changes.outputs.wheel == 'true'
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: '3.11'
|
||||
cache: pip
|
||||
cache-dependency-path: workspace/requirements.txt
|
||||
- name: Install build tooling
|
||||
if: needs.detect-changes.outputs.wheel == 'true'
|
||||
run: pip install build
|
||||
- name: Build wheel from PR source (mirrors publish-runtime.yml)
|
||||
if: needs.detect-changes.outputs.wheel == 'true'
|
||||
# Use a fixed test version so the wheel filename is predictable.
|
||||
# Doesn't reach PyPI — this build is local-only for the smoke.
|
||||
run: |
|
||||
python scripts/build_runtime_package.py \
|
||||
--version "0.0.0.dev0+pin-compat" \
|
||||
--out /tmp/runtime-build
|
||||
cd /tmp/runtime-build && python -m build
|
||||
- name: Install built wheel + workspace requirements
|
||||
if: needs.detect-changes.outputs.wheel == 'true'
|
||||
run: |
|
||||
python -m venv /tmp/venv-built
|
||||
/tmp/venv-built/bin/pip install --upgrade pip
|
||||
/tmp/venv-built/bin/pip install /tmp/runtime-build/dist/*.whl
|
||||
/tmp/venv-built/bin/pip install -r workspace/requirements.txt
|
||||
/tmp/venv-built/bin/pip show molecule-ai-workspace-runtime a2a-sdk \
|
||||
| grep -E '^(Name|Version):'
|
||||
- name: Smoke import the PR-built wheel
|
||||
if: needs.detect-changes.outputs.wheel == 'true'
|
||||
# Same script publish-runtime.yml runs against the to-be-PyPI wheel.
|
||||
run: |
|
||||
/tmp/venv-built/bin/python "$GITHUB_WORKSPACE/scripts/wheel_smoke.py"
|
||||
@@ -58,14 +58,20 @@ jobs:
|
||||
python-version: '3.11'
|
||||
- name: Install .gitea script test dependencies
|
||||
run: python -m pip install --quiet 'pytest==9.0.2' 'PyYAML==6.0.2'
|
||||
- name: Run scripts/ unittests (build_runtime_package, ...)
|
||||
# Top-level scripts/ tests live alongside their target file
|
||||
# (e.g. scripts/test_build_runtime_package.py exercises
|
||||
# scripts/build_runtime_package.py). discover from scripts/
|
||||
# picks up only top-level test_*.py because scripts/ops/ has
|
||||
# no __init__.py — that's intentional, so we run two passes.
|
||||
- name: Run scripts/ unittests, if any
|
||||
# Top-level scripts/ tests live alongside their target file. The
|
||||
# runtime packaging tests moved to molecule-ai-workspace-runtime, so
|
||||
# this pass may legitimately find no tests.
|
||||
working-directory: scripts
|
||||
run: python -m unittest discover -t . -p 'test_*.py' -v
|
||||
run: |
|
||||
set +e
|
||||
python -m unittest discover -t . -p 'test_*.py' -v
|
||||
rc=$?
|
||||
if [ "$rc" -eq 5 ]; then
|
||||
echo "No top-level scripts/ unittest files found; skipping."
|
||||
exit 0
|
||||
fi
|
||||
exit "$rc"
|
||||
- name: Run scripts/ops/ unittests (sweep_cf_decide, ...)
|
||||
working-directory: scripts/ops
|
||||
run: python -m unittest discover -p 'test_*.py' -v
|
||||
|
||||
+18
-1
@@ -127,7 +127,11 @@ cd workspace-server && go test -race ./...
|
||||
cd canvas && npm test
|
||||
|
||||
# Workspace runtime (Python)
|
||||
cd workspace && python -m pytest -v
|
||||
# Runtime code is SSOT in molecule-ai-workspace-runtime, not molecule-core/workspace.
|
||||
cd ../molecule-ai-workspace-runtime
|
||||
python -m venv .venv && source .venv/bin/activate
|
||||
pip install --index-url https://git.moleculesai.app/api/packages/molecule-ai/pypi/simple/ -e . pytest pytest-asyncio
|
||||
pytest -q
|
||||
|
||||
# E2E API tests (requires running platform)
|
||||
bash tests/e2e/test_api.sh
|
||||
@@ -159,6 +163,19 @@ and run CI manually.
|
||||
| review-check-tests | `review-check.sh` evaluator regression suite (13 scenarios) |
|
||||
| ops-scripts | Python unittest suite for `scripts/*.py` |
|
||||
|
||||
### Workspace runtime SSOT
|
||||
|
||||
Runtime code lives in
|
||||
[`molecule-ai-workspace-runtime`](https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-runtime).
|
||||
Do not reintroduce `molecule-core/workspace/` or vendored `molecule_runtime/`
|
||||
copies in consumers. Core and templates consume the published runtime package
|
||||
from the Gitea package registry.
|
||||
|
||||
For local external MCP agents, multi-workspace config is
|
||||
`MOLECULE_WORKSPACES=[{"id":"...","token":"...","platform_url":"..."}]`.
|
||||
`platform_url` selects the tenant; `org_id` is not part of this config.
|
||||
Workspace IDs can differ across orgs.
|
||||
|
||||
## Local Testing
|
||||
|
||||
### review-check.sh
|
||||
|
||||
@@ -163,11 +163,11 @@ Most agent systems stop at "a smart runtime." Molecule AI pushes further: it giv
|
||||
|
||||
| Core mechanism | Molecule AI module(s) | Why it matters |
|
||||
|---|---|---|
|
||||
| **Durable memory that survives sessions** | `workspace/builtin_tools/memory.py`, `workspace/builtin_tools/awareness_client.py`, `workspace-server/internal/handlers/memories.go` | Memory is not just durable, it is **workspace-scoped** and can route into awareness namespaces tied to the org structure |
|
||||
| **Durable memory that survives sessions** | `molecule-ai-workspace-runtime/molecule_runtime/builtin_tools/`, `workspace-server/internal/handlers/memories.go` | Memory is not just durable, it is **workspace-scoped** and can route into awareness namespaces tied to the org structure |
|
||||
| **Cross-session recall** | `workspace-server/internal/handlers/activity.go` (`/workspaces/:id/session-search`) | Recall spans both activity history and memory rows, so the system can search what happened and what was learned without inventing a separate hidden store |
|
||||
| **Skills built from experience** | `workspace/builtin_tools/memory.py` (`_maybe_log_skill_promotion`) | Promotion from memory into a skill candidate is surfaced as an explicit platform activity, not a silent internal side effect |
|
||||
| **Skill improvement during use** | `workspace/skill_loader/watcher.py`, `workspace/skill_loader/loader.py`, `workspace/main.py` | Skills hot-reload into the live runtime, so improvements become available on the next A2A task without restarting the workspace |
|
||||
| **Persistent skill lifecycle** | `workspace-server/cmd/cli/cmd_agent_skill.go`, `workspace/plugins.py` | Skills are not just generated once; they can be audited, installed, published, shared, mounted by plugins, and governed as reusable operational assets |
|
||||
| **Skills built from experience** | `molecule-ai-workspace-runtime/molecule_runtime/builtin_tools/memory.py` (`_maybe_log_skill_promotion`) | Promotion from memory into a skill candidate is surfaced as an explicit platform activity, not a silent internal side effect |
|
||||
| **Skill improvement during use** | `molecule-ai-workspace-runtime/molecule_runtime/skill_loader/`, `molecule-ai-workspace-runtime/molecule_runtime/main.py` | Skills hot-reload into the live runtime, so improvements become available on the next A2A task without restarting the workspace |
|
||||
| **Persistent skill lifecycle** | `workspace-server/cmd/cli/cmd_agent_skill.go`, `molecule-ai-workspace-runtime/molecule_runtime/plugins.py` | Skills are not just generated once; they can be audited, installed, published, shared, mounted by plugins, and governed as reusable operational assets |
|
||||
|
||||
### Why this matters in Molecule AI
|
||||
|
||||
@@ -208,7 +208,7 @@ The result is not just “an agent that learns.” It is **an organization that
|
||||
|
||||
### Runtime
|
||||
|
||||
- unified `workspace/` image; thin AMI in production (us-east-2)
|
||||
- standalone workspace-template images that install `molecule-ai-workspace-runtime` from the Gitea package registry; thin AMI in production (us-east-2)
|
||||
- adapter-driven execution across **8 runtimes** (Claude Code, Hermes, Gemini CLI, LangGraph, DeepAgents, CrewAI, AutoGen, OpenClaw)
|
||||
- Agent Card registration
|
||||
- awareness-backed memory integration; **Memory v2 backed by pgvector** for semantic recall
|
||||
|
||||
@@ -55,7 +55,7 @@ test.describe("Desktop ChatTab", () => {
|
||||
await textarea.fill("What is the weather?");
|
||||
await page.getByRole("button", { name: /Send/ }).first().click();
|
||||
|
||||
await expect(page.getByText("What is the weather?")).toBeVisible({ timeout: 5_000 });
|
||||
await expect(page.getByText("What is the weather?", { exact: true })).toBeVisible({ timeout: 5_000 });
|
||||
await expect(page.getByText("Echo: What is the weather?")).toBeVisible({ timeout: 15_000 });
|
||||
});
|
||||
|
||||
|
||||
@@ -49,7 +49,7 @@ test.describe("MobileChat", () => {
|
||||
await textarea.fill("Mobile test message");
|
||||
await page.getByRole("button", { name: /Send/ }).first().click();
|
||||
|
||||
await expect(page.getByText("Mobile test message")).toBeVisible({ timeout: 5_000 });
|
||||
await expect(page.getByText("Mobile test message", { exact: true })).toBeVisible({ timeout: 5_000 });
|
||||
await expect(page.getByText("Echo: Mobile test message")).toBeVisible({ timeout: 15_000 });
|
||||
});
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
*/
|
||||
|
||||
import { randomUUID } from "node:crypto";
|
||||
import { execFileSync, execSync } from "node:child_process";
|
||||
|
||||
const PLATFORM_URL = process.env.E2E_PLATFORM_URL ?? "http://localhost:8080";
|
||||
|
||||
@@ -23,13 +24,19 @@ export interface SeededWorkspace {
|
||||
* Create an external workspace and wire it to the echo runtime.
|
||||
*/
|
||||
export async function seedWorkspace(echoURL: string): Promise<SeededWorkspace> {
|
||||
// 1. Create external workspace (no URL — platform will mint an auth token).
|
||||
// 1. Create external workspace pointing at the in-process echo runtime.
|
||||
const runId = Math.random().toString(36).slice(2, 8);
|
||||
const wsName = `Chat E2E Agent ${runId}`;
|
||||
const createRes = await fetch(`${PLATFORM_URL}/workspaces`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ name: wsName, tier: 1, external: true, runtime: "external" }),
|
||||
body: JSON.stringify({
|
||||
name: wsName,
|
||||
tier: 1,
|
||||
external: true,
|
||||
runtime: "external",
|
||||
url: echoURL,
|
||||
}),
|
||||
});
|
||||
if (!createRes.ok) {
|
||||
const text = await createRes.text();
|
||||
@@ -40,7 +47,10 @@ export async function seedWorkspace(echoURL: string): Promise<SeededWorkspace> {
|
||||
name: string;
|
||||
connection?: { auth_token?: string };
|
||||
};
|
||||
const authToken = ws.connection?.auth_token;
|
||||
let authToken = ws.connection?.auth_token;
|
||||
if (!authToken) {
|
||||
authToken = await mintTestToken(ws.id);
|
||||
}
|
||||
if (!authToken) {
|
||||
throw new Error("Workspace created but no auth_token returned");
|
||||
}
|
||||
@@ -73,16 +83,35 @@ export async function seedWorkspace(echoURL: string): Promise<SeededWorkspace> {
|
||||
`-c "UPDATE workspaces SET status = 'online', url = '${echoURL}', platform_inbound_secret = '${inboundSecret}' WHERE id = '${ws.id}'"`,
|
||||
].join(" ");
|
||||
|
||||
const { execSync } = await import("node:child_process");
|
||||
try {
|
||||
execSync(psql, { stdio: "pipe", timeout: 30_000 });
|
||||
} catch (err) {
|
||||
throw new Error(`DB update failed: ${err}`);
|
||||
}
|
||||
|
||||
cacheWorkspaceURL(ws.id, echoURL);
|
||||
|
||||
return { id: ws.id, name: wsName, agentURL: echoURL, authToken };
|
||||
}
|
||||
|
||||
function cacheWorkspaceURL(workspaceId: string, agentURL: string): void {
|
||||
const redisContainer = process.env.REDIS_CONTAINER;
|
||||
if (!redisContainer) return;
|
||||
|
||||
const keys = [`ws:${workspaceId}:url`, `ws:${workspaceId}:internal_url`];
|
||||
for (const key of keys) {
|
||||
try {
|
||||
execFileSync(
|
||||
"docker",
|
||||
["exec", redisContainer, "redis-cli", "SET", key, agentURL],
|
||||
{ stdio: "pipe", timeout: 10_000 },
|
||||
);
|
||||
} catch (err) {
|
||||
throw new Error(`Redis URL cache update failed for ${key}: ${err}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Start a heartbeat interval that keeps an external workspace alive.
|
||||
* Returns a stop function.
|
||||
@@ -141,7 +170,6 @@ export async function seedChatHistory(
|
||||
|
||||
const sql = `INSERT INTO chat_messages (id, workspace_id, role, content, created_at) VALUES ${values};`;
|
||||
|
||||
const { execSync } = await import("node:child_process");
|
||||
const psql = `PGPASSWORD=${pass} psql -h ${host} -p ${port} -U ${user} -d ${db} -c "${sql}"`;
|
||||
execSync(psql, { stdio: "pipe", timeout: 10_000 });
|
||||
}
|
||||
@@ -163,7 +191,6 @@ export async function cleanupWorkspace(workspaceId: string): Promise<void> {
|
||||
|
||||
const psql = `PGPASSWORD=${pass} psql -h ${host} -p ${port} -U ${user} -d ${db} -c "DELETE FROM workspaces WHERE id = '${workspaceId}'"`;
|
||||
|
||||
const { execSync } = await import("node:child_process");
|
||||
try {
|
||||
execSync(psql, { stdio: "pipe", timeout: 30_000 });
|
||||
} catch {
|
||||
|
||||
@@ -162,10 +162,10 @@ export async function startEchoRuntime(): Promise<EchoRuntime> {
|
||||
});
|
||||
});
|
||||
|
||||
await new Promise<void>((resolve) => server.listen(0, "127.0.0.1", resolve));
|
||||
await new Promise<void>((resolve) => server.listen(0, resolve));
|
||||
const address = server.address();
|
||||
const port = typeof address === "object" && address ? address.port : 0;
|
||||
const baseURL = `http://127.0.0.1:${port}`;
|
||||
const baseURL = `http://localhost:${port}`;
|
||||
|
||||
return {
|
||||
baseURL,
|
||||
|
||||
@@ -17,7 +17,7 @@ Canvas (Next.js :3000) ←WebSocket→ Platform (Go :8080) ←HTTP→ Postgres +
|
||||
|
||||
- **Workspace Server** (`workspace-server/`): Go/Gin control plane — workspace CRUD, registry, discovery, WebSocket hub, liveness monitoring.
|
||||
- **Canvas** (`canvas/`): Next.js 15 + React Flow (@xyflow/react v12) + Zustand + Tailwind — visual workspace graph.
|
||||
- **Workspace Runtime** (`workspace/`): Shared runtime published as [`molecule-ai-workspace-runtime`](https://pypi.org/project/molecule-ai-workspace-runtime/) on PyPI. Supports LangGraph, Claude Code, OpenClaw, DeepAgents, CrewAI, AutoGen. Each adapter lives in its own standalone template repo (e.g. `molecule-ai-workspace-template-claude-code`). See `docs/workspace-runtime-package.md` for the full picture.
|
||||
- **Workspace Runtime**: Shared runtime published from [`molecule-ai-workspace-runtime`](https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-runtime) to the Molecule AI Gitea package registry. Supports LangGraph, Claude Code, OpenClaw, Hermes, Codex, and AutoGen. Each adapter lives in its own standalone template repo (e.g. `molecule-ai-workspace-template-claude-code`). See `docs/workspace-runtime-package.md` for the full picture.
|
||||
- **molecli** (`workspace-server/cmd/cli/`): Go TUI dashboard (Bubbletea + Lipgloss) — real-time workspace monitoring, event log, health overview, delete/filter operations.
|
||||
|
||||
## Key Architectural Patterns
|
||||
|
||||
@@ -285,6 +285,39 @@ Canvas requests (no `X-Workspace-ID` header) and system callers
|
||||
|
||||
---
|
||||
|
||||
## Multiple Workspaces From One Local MCP Bridge
|
||||
|
||||
The standalone runtime package includes `molecule-mcp`, a local MCP bridge for
|
||||
external agents such as Claude Code, Codex, Hermes, and other tools that run
|
||||
outside the platform container fleet. One local bridge can serve multiple
|
||||
external workspaces by setting `MOLECULE_WORKSPACES`:
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"id": "workspace-id-local-to-hongming-org",
|
||||
"token": "...",
|
||||
"platform_url": "https://hongming.moleculesai.app"
|
||||
},
|
||||
{
|
||||
"id": "different-workspace-id-local-to-agents-team-org",
|
||||
"token": "...",
|
||||
"platform_url": "https://agents-team.moleculesai.app"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
`platform_url` is the tenant routing key. The bridge registers, heartbeats,
|
||||
polls inboxes, and sends outbound A2A calls against the URL attached to the
|
||||
workspace that is doing the work.
|
||||
|
||||
Do not add `org_id` to this config. The tenant already comes from
|
||||
`platform_url`, and the bearer token is issued by that tenant. Workspace IDs
|
||||
also do not need to be shared across orgs; each tenant can return its own
|
||||
workspace ID and token for the same local agent process.
|
||||
|
||||
---
|
||||
|
||||
## Canvas Appearance
|
||||
|
||||
External workspaces appear on the canvas with a purple **REMOTE** badge
|
||||
|
||||
@@ -135,6 +135,33 @@ The `id` field is your workspace ID — remember it.
|
||||
|
||||
---
|
||||
|
||||
## Optional — one local MCP bridge, multiple tenants
|
||||
|
||||
If your local agent runtime uses `molecule-mcp`, one process can serve more
|
||||
than one external workspace:
|
||||
|
||||
```bash
|
||||
export MOLECULE_WORKSPACES='[
|
||||
{
|
||||
"id": "workspace-id-local-to-you-org",
|
||||
"token": "...",
|
||||
"platform_url": "https://you.moleculesai.app"
|
||||
},
|
||||
{
|
||||
"id": "different-workspace-id-local-to-team-org",
|
||||
"token": "...",
|
||||
"platform_url": "https://team.moleculesai.app"
|
||||
}
|
||||
]'
|
||||
molecule-mcp
|
||||
```
|
||||
|
||||
Use the workspace ID and token returned by each tenant. The IDs may differ
|
||||
across orgs. `org_id` is not required here because `platform_url` selects the
|
||||
tenant and the token is tenant-scoped.
|
||||
|
||||
---
|
||||
|
||||
## Step 4 — Chat with it
|
||||
|
||||
1. Open your Molecule canvas at `https://<TENANT>`
|
||||
|
||||
@@ -125,6 +125,33 @@ The agent appears on the canvas with a **purple REMOTE badge** within seconds. F
|
||||
|
||||
---
|
||||
|
||||
## Multi-Tenant Local MCP Bridge
|
||||
|
||||
For local MCP-driven agents, use the standalone runtime's `molecule-mcp`
|
||||
entrypoint. A single local bridge can serve multiple external workspaces by
|
||||
setting `MOLECULE_WORKSPACES`:
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"id": "workspace-id-local-to-acme",
|
||||
"token": "...",
|
||||
"platform_url": "https://acme.moleculesai.app"
|
||||
},
|
||||
{
|
||||
"id": "different-workspace-id-local-to-ops",
|
||||
"token": "...",
|
||||
"platform_url": "https://ops.moleculesai.app"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
`platform_url` selects the tenant for registration, heartbeat, inbox polling,
|
||||
and outbound A2A routing. `org_id` is not required in this config, and the
|
||||
workspace IDs do not need to match across tenants.
|
||||
|
||||
---
|
||||
|
||||
## What Phase 30 Covers
|
||||
|
||||
| Phase | What shipped | Endpoint |
|
||||
|
||||
@@ -1,304 +1,44 @@
|
||||
# Workspace Runtime PyPI Package
|
||||
# Workspace Runtime Package
|
||||
|
||||
## Requires Python >= 3.11
|
||||
`molecule-ai-workspace-runtime` is the shared Python runtime consumed by
|
||||
workspace template images and by external MCP integrations.
|
||||
|
||||
The wheel pins `requires_python>=3.11`. On Python 3.10 or older, `pip install
|
||||
molecule-ai-workspace-runtime` fails with `Could not find a version that
|
||||
satisfies the requirement (from versions: none)` — the pin filters the only
|
||||
available artifact before pip even attempts install. Upgrade the interpreter
|
||||
(`brew install python@3.12` / `apt install python3.12` / etc.) or use a
|
||||
3.11+ venv.
|
||||
## Source Of Truth
|
||||
|
||||
## Overview
|
||||
The source of truth is the standalone Gitea repo:
|
||||
|
||||
The shared workspace runtime infrastructure has **one editable source** and
|
||||
**one published artifact**:
|
||||
|
||||
1. **Source of truth (monorepo, editable):** `workspace/` — every runtime
|
||||
change lands here. Edit it like any other monorepo code.
|
||||
2. **Published artifact (PyPI, generated):** [`molecule-ai-workspace-runtime`](https://pypi.org/project/molecule-ai-workspace-runtime/)
|
||||
— produced by `.github/workflows/publish-runtime.yml` on every
|
||||
`runtime-vX.Y.Z` tag push. Do NOT edit this independently — it gets
|
||||
overwritten on every publish.
|
||||
|
||||
The legacy sibling repo `molecule-ai-workspace-runtime` (the GitHub repo, as
|
||||
distinct from the PyPI package) is no longer the source-of-truth and should
|
||||
be treated as a publish artifact only. It can be archived or used as a
|
||||
read-only mirror.
|
||||
|
||||
## Where to make changes
|
||||
|
||||
**All runtime edits land in `molecule-monorepo/workspace/`. Period.**
|
||||
|
||||
The GitHub repo `Molecule-AI/molecule-ai-workspace-runtime` is **mirror-only**.
|
||||
It exists so external consumers (template repos, downstream operators) have a
|
||||
git-cloneable artifact that mirrors the PyPI wheel — nothing more.
|
||||
|
||||
- **Direct PRs against `molecule-ai-workspace-runtime` are auto-rejected by
|
||||
the `mirror-guard` CI check.** The check fails any push that did not come
|
||||
from the publish pipeline. There is no opt-out — file the change against
|
||||
`molecule-monorepo/workspace/` instead.
|
||||
- **The mirror + the PyPI wheel both auto-regenerate on every push to
|
||||
`staging`** via `.github/workflows/publish-runtime.yml` (which calls
|
||||
`scripts/build_runtime_package.py`, builds wheel + sdist, smoke-imports,
|
||||
uploads to PyPI via Trusted Publisher, and force-pushes the rewritten tree
|
||||
to the mirror repo). You never touch the mirror by hand.
|
||||
|
||||
If you have an old local clone of the mirror and try to push a fix to it
|
||||
directly, expect a CI failure with a message pointing you here. Re-open the
|
||||
change against `molecule-monorepo/workspace/` and let the publish workflow
|
||||
do the rest.
|
||||
|
||||
## Why this shape
|
||||
|
||||
The 8 workspace template repos (claude-code, langgraph, hermes, etc.) each
|
||||
build their own Docker image and `pip install molecule-ai-workspace-runtime`
|
||||
from PyPI. PyPI is the right distribution channel — semver, reproducible
|
||||
builds, no submodule dance per-repo. But the runtime ALSO needs to evolve
|
||||
in lock-step with the platform's wire protocol (queue shape, A2A metadata,
|
||||
event payloads). Shipping cross-cutting protocol changes as separate
|
||||
runtime + platform PRs in two repos creates ordering pain and broken
|
||||
intermediate states.
|
||||
|
||||
The monorepo + auto-publish split gives both: edit cross-cutting changes
|
||||
in one PR, publish the runtime artifact via a tag.
|
||||
|
||||
## What's in the package
|
||||
|
||||
Everything in `workspace/*.py` plus the `adapters/`, `builtin_tools/`,
|
||||
`plugins_registry/`, `policies/`, `skill_loader/` subpackages. Build
|
||||
artifacts (`Dockerfile`, `*.sh`, `pytest.ini`, `requirements.txt`) are
|
||||
excluded.
|
||||
|
||||
The build script rewrites bare imports so the published package is a
|
||||
proper Python namespace:
|
||||
|
||||
```
|
||||
# In monorepo workspace/:
|
||||
from a2a_client import discover_peer
|
||||
from builtin_tools.memory import store
|
||||
|
||||
# In published molecule_runtime/ (auto-rewritten at publish time):
|
||||
from molecule_runtime.a2a_client import discover_peer
|
||||
from molecule_runtime.builtin_tools.memory import store
|
||||
```text
|
||||
https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-runtime
|
||||
```
|
||||
|
||||
The closed allowlist of rewritten module names lives in
|
||||
`scripts/build_runtime_package.py` (`TOP_LEVEL_MODULES` + `SUBPACKAGES`).
|
||||
Add a new top-level module to workspace/? Add it to the allowlist in the
|
||||
same PR.
|
||||
Do not add runtime source back under `molecule-core/workspace/`. The core repo
|
||||
owns the platform server, canvas, provisioning, and tests around the installed
|
||||
runtime package.
|
||||
|
||||
## Adapter repos
|
||||
## Package Registry
|
||||
|
||||
Each of the 8 adapter template repos contains:
|
||||
- `adapter.py` — runtime-specific `Adapter` class
|
||||
- `requirements.txt` — `molecule-ai-workspace-runtime>=0.1.X` + adapter deps
|
||||
- `Dockerfile` — standalone image with `ENV ADAPTER_MODULE=adapter` and
|
||||
`ENTRYPOINT ["molecule-runtime"]`
|
||||
The runtime package is published to the Molecule AI Gitea package registry:
|
||||
|
||||
| Adapter | Repo |
|
||||
|---------|------|
|
||||
| claude-code | https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-claude-code |
|
||||
| langgraph | https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-langgraph |
|
||||
| crewai | https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-crewai |
|
||||
| autogen | https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-autogen |
|
||||
| deepagents | https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-deepagents |
|
||||
| hermes | https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-hermes |
|
||||
| gemini-cli | https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-gemini-cli |
|
||||
| openclaw | https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-openclaw |
|
||||
|
||||
## Adapter discovery (ADAPTER_MODULE)
|
||||
|
||||
Standalone adapter repos set `ENV ADAPTER_MODULE=adapter` in their
|
||||
Dockerfile. The runtime's `get_adapter()` checks this env var first:
|
||||
|
||||
```python
|
||||
# In molecule_runtime/adapters/__init__.py
|
||||
def get_adapter(runtime: str) -> type[BaseAdapter]:
|
||||
adapter_module = os.environ.get("ADAPTER_MODULE")
|
||||
if adapter_module:
|
||||
mod = importlib.import_module(adapter_module)
|
||||
return getattr(mod, "Adapter")
|
||||
raise KeyError(...)
|
||||
```text
|
||||
https://git.moleculesai.app/api/packages/molecule-ai/pypi/simple/
|
||||
```
|
||||
|
||||
## Publishing a new version
|
||||
PyPI is intentionally not part of the critical path. Template Dockerfiles,
|
||||
external-runtime snippets, and CI install checks should use the Gitea registry.
|
||||
|
||||
```bash
|
||||
# From any local checkout of monorepo, after merging your runtime change:
|
||||
git tag runtime-v0.1.6
|
||||
git push origin runtime-v0.1.6
|
||||
```
|
||||
## Release Flow
|
||||
|
||||
The `publish-runtime` workflow takes over — checks out the tag, runs
|
||||
`scripts/build_runtime_package.py --version 0.1.6`, builds wheel + sdist,
|
||||
runs a smoke import to catch broken rewrites, and uploads to PyPI via
|
||||
the PyPA Trusted Publisher action (OIDC). No static API token is stored
|
||||
in this repo — PyPI verifies the workflow's OIDC claim against the
|
||||
trusted-publisher config registered for `molecule-ai-workspace-runtime`.
|
||||
1. Land a reviewed PR in `molecule-ai-workspace-runtime`.
|
||||
2. Bump `version =` in that repo's `pyproject.toml`.
|
||||
3. Tag `runtime-vX.Y.Z` on the runtime repo.
|
||||
4. The runtime repo's `publish-runtime` workflow builds the wheel and sdist,
|
||||
publishes to the Gitea registry, verifies install from that registry, then
|
||||
cascades `.runtime-version` pins to workspace template repos.
|
||||
|
||||
For dev/test releases without tagging, dispatch the workflow manually
|
||||
with an explicit version (e.g. `0.1.6.dev1` — PEP 440 dev/rc/post forms
|
||||
are accepted).
|
||||
## Core Repo Contract
|
||||
|
||||
After publish, the 8 template repos pick up the new version on their
|
||||
next `:latest` rebuild. To force-pull immediately, bump the pin in each
|
||||
template's `requirements.txt`.
|
||||
`molecule-core` must not ship editable runtime code. Its responsibilities are:
|
||||
|
||||
## End-to-end CD chain
|
||||
|
||||
The full chain from monorepo merge → workspace containers running new code:
|
||||
|
||||
```
|
||||
1. Merge PR with workspace/ changes to main
|
||||
↓
|
||||
2. .github/workflows/auto-tag-runtime.yml fires
|
||||
↓ reads PR labels (release:major/minor) or defaults to patch
|
||||
↓ pushes runtime-vX.Y.Z tag
|
||||
↓
|
||||
3. .github/workflows/publish-runtime.yml fires (on the tag)
|
||||
↓ builds wheel via scripts/build_runtime_package.py
|
||||
↓ smoke-imports the wheel
|
||||
↓ uploads to PyPI
|
||||
↓ cascade job fires repository_dispatch (event-type: runtime-published)
|
||||
↓ to all 8 workspace-template-* repos
|
||||
↓
|
||||
4. Each template's publish-image.yml fires (on repository_dispatch)
|
||||
↓ rebuilds Dockerfile (which pip-installs the new PyPI version)
|
||||
↓ pushes ghcr.io/molecule-ai/workspace-template-<runtime>:latest
|
||||
↓
|
||||
5. Production hosts run scripts/refresh-workspace-images.sh
|
||||
OR an operator hits POST /admin/workspace-images/refresh on the platform
|
||||
↓ docker pull all 8 :latest tags
|
||||
↓ remove + force-recreate any running ws-* containers using a refreshed image
|
||||
↓ canvas re-provisions the workspaces on next interaction
|
||||
```
|
||||
|
||||
Steps 1-4 are fully automated. Step 5 is one-click: a single curl or shell
|
||||
command. SaaS deployments typically wire step 5 into their normal deploy
|
||||
pipeline (every release pulls fresh images on every host); local dev fires
|
||||
it manually after a runtime release lands.
|
||||
|
||||
### Auth
|
||||
|
||||
PyPI publishing uses **Trusted Publisher (OIDC)** — no static token in the
|
||||
monorepo. The trusted-publisher config on PyPI binds the
|
||||
`molecule-ai-workspace-runtime` project to this repo's
|
||||
`publish-runtime.yml` workflow + `pypi-publish` environment. Rotation is
|
||||
moot: there is no shared secret to rotate.
|
||||
|
||||
### Required secrets
|
||||
|
||||
| Secret | Where | Why |
|
||||
|---|---|---|
|
||||
| `TEMPLATE_DISPATCH_TOKEN` | molecule-core repo | Fine-grained PAT with `actions:write` on the 8 template repos. Without it the `cascade` job warns and exits clean — PyPI still publishes; templates just don't auto-rebuild. |
|
||||
|
||||
### Step 5 specifics
|
||||
|
||||
**Local dev (compose stack):**
|
||||
```bash
|
||||
bash scripts/refresh-workspace-images.sh # all runtimes
|
||||
bash scripts/refresh-workspace-images.sh --runtime claude-code
|
||||
bash scripts/refresh-workspace-images.sh --no-recreate # pull only, leave containers
|
||||
```
|
||||
|
||||
**Via platform admin endpoint (any deploy):**
|
||||
```bash
|
||||
curl -X POST "$PLATFORM/admin/workspace-images/refresh"
|
||||
curl -X POST "$PLATFORM/admin/workspace-images/refresh?runtime=claude-code"
|
||||
curl -X POST "$PLATFORM/admin/workspace-images/refresh?recreate=false"
|
||||
```
|
||||
|
||||
The endpoint pulls + recreates from inside the platform container, so it
|
||||
needs Docker socket access (the compose stack mounts
|
||||
`/var/run/docker.sock` already) AND GHCR auth on the host's docker config
|
||||
(`docker login ghcr.io` once per host). On a fresh host without GHCR auth,
|
||||
the pull step warns per runtime and the response surfaces the failures.
|
||||
|
||||
**Fully hands-off (opt-in image auto-refresh):**
|
||||
|
||||
Set `IMAGE_AUTO_REFRESH=true` on the platform process. A watcher polls
|
||||
GHCR every 5 minutes for digest changes on each `workspace-template-*:latest`
|
||||
tag and invokes the same refresh logic the admin endpoint exposes —
|
||||
no operator action required between "runtime PR merged" and
|
||||
"containers running new code". Disabled by default because SaaS deploy
|
||||
pipelines that already pull on every release would do redundant work.
|
||||
|
||||
Optional companion env (same as the admin endpoint):
|
||||
|
||||
- `GHCR_USER` + `GHCR_TOKEN` — required for private template images;
|
||||
unused for the current public set, but harmless if set.
|
||||
|
||||
## Local dev (build the package without publishing)
|
||||
|
||||
```bash
|
||||
python3 scripts/build_runtime_package.py --version 0.1.0-local --out /tmp/runtime-build
|
||||
cd /tmp/runtime-build
|
||||
python -m build # produces dist/*.whl + dist/*.tar.gz
|
||||
pip install dist/*.whl # install into a venv to test locally
|
||||
```
|
||||
|
||||
This is the same pipeline CI runs. Use it to validate import-rewrite
|
||||
correctness before pushing a `runtime-v*` tag.
|
||||
|
||||
## Writing a new adapter
|
||||
|
||||
Use the GitHub template repo
|
||||
[`molecule-ai/molecule-ai-workspace-template-starter`](https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-starter) (note: the starter repo did not survive the 2026-05-06 GitHub-org-suspension migration; recreation tracked at internal#41)
|
||||
— it ships with the canonical Dockerfile + adapter.py skeleton + config.yaml
|
||||
schema + the `repository_dispatch: [runtime-published]` cascade receiver
|
||||
already wired up. No follow-up setup PR required.
|
||||
|
||||
```bash
|
||||
# Replace <runtime> with your runtime slug (lowercase, hyphenated).
|
||||
gh repo create Molecule-AI/molecule-ai-workspace-template-<runtime> \
|
||||
--template Molecule-AI/molecule-ai-workspace-template-starter \
|
||||
--public \
|
||||
--description "Molecule AI workspace template: <runtime>"
|
||||
|
||||
git clone https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-<runtime>.git
|
||||
cd molecule-ai-workspace-template-<runtime>
|
||||
```
|
||||
|
||||
Then fill in the `TODO` markers in:
|
||||
|
||||
| File | What to fill in |
|
||||
|---|---|
|
||||
| `adapter.py` | Rename class to `<Runtime>Adapter`. Fill in `name()`, `display_name()`, `description()`, `get_config_schema()`. Implement `setup()` and `create_executor()`. |
|
||||
| `requirements.txt` | Add your runtime's pip dependencies (e.g. `langgraph`, `crewai`, `claude-agent-sdk`). |
|
||||
| `Dockerfile` | Add runtime-specific apt deps (most runtimes don't need any). Replace ENTRYPOINT only if you need custom boot logic. |
|
||||
| `config.yaml` | Update top-level `name`/`runtime`/`description`. Add the models your runtime supports to `models[]`. |
|
||||
| `system-prompt.md` | Default agent prompt. |
|
||||
|
||||
After `git push`:
|
||||
|
||||
1. The template's `publish-image.yml` builds + pushes
|
||||
`ghcr.io/molecule-ai/workspace-template-<runtime>:latest` automatically.
|
||||
2. The next `runtime-vX.Y.Z` tag on `molecule-core` cascades a
|
||||
`repository_dispatch` event into your new template, rebuilding the image
|
||||
against the latest runtime — no setup PR required.
|
||||
3. Register the runtime name in the platform's `RuntimeImages` map (in
|
||||
`workspace-server/internal/provisioner/provisioner.go`) so it's
|
||||
selectable in the canvas.
|
||||
|
||||
## When the starter itself needs to evolve
|
||||
|
||||
If the canonical shape changes (e.g. `config.yaml` schema gets a new field,
|
||||
the `BaseAdapter` interface adds a method, the reusable CI workflow
|
||||
signature changes), update the
|
||||
[starter](https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-starter) (recreation pending — see note above)
|
||||
**first**. Existing templates can either migrate at their own pace or be
|
||||
touched in a coordinated cleanup PR. Either way, future templates pick up
|
||||
the new shape from day one.
|
||||
|
||||
## Migration note
|
||||
|
||||
Prior to this workflow, the runtime was duplicated across monorepo
|
||||
`workspace/` AND a sibling repo `molecule-ai-workspace-runtime`, with no
|
||||
sync mechanism. That caused 30+ files to drift between the two trees and
|
||||
tonight's chat-leak / queued-classification fixes existed only in the
|
||||
monorepo copy until manually ported.
|
||||
|
||||
If you have an old local checkout of `molecule-ai-workspace-runtime`, treat
|
||||
it as outdated. The monorepo `workspace/` is now authoritative; the PyPI
|
||||
artifact is rebuilt from it on every `runtime-v*` tag.
|
||||
- Test platform behavior against the installed runtime contract.
|
||||
- Keep MCP/registry/TenantGuard behavior compatible with the runtime package.
|
||||
- Fail CI if `workspace/` or legacy build-from-workspace scripts are restored.
|
||||
|
||||
@@ -1,542 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Build the molecule-ai-workspace-runtime PyPI package from monorepo workspace/.
|
||||
|
||||
Monorepo workspace/ is the single source-of-truth for runtime code. The PyPI
|
||||
package is a publish-time mirror produced by this script, NOT a parallel
|
||||
editable copy. Anyone editing the runtime should edit workspace/, never the
|
||||
sibling molecule-ai-workspace-runtime repo.
|
||||
|
||||
What this does
|
||||
--------------
|
||||
1. Copies workspace/ source into build/molecule_runtime/ (note the rename:
|
||||
bare modules become a real Python package).
|
||||
2. Rewrites top-level imports so e.g. `from a2a_client import X` becomes
|
||||
`from molecule_runtime.a2a_client import X`. The rewrite is regex-based
|
||||
on a closed allowlist of modules — third-party imports like `from a2a.X`
|
||||
(the a2a-sdk package) are left alone because the regex is anchored on
|
||||
exact module names.
|
||||
3. Writes a pyproject.toml with the requested version + the README + the
|
||||
py.typed marker.
|
||||
4. Leaves the build dir ready for `python -m build` to produce a wheel/sdist.
|
||||
|
||||
Usage
|
||||
-----
|
||||
scripts/build_runtime_package.py --version 0.1.6 --out /tmp/runtime-build
|
||||
cd /tmp/runtime-build && python -m build
|
||||
python -m twine upload dist/*
|
||||
|
||||
The publish workflow (.github/workflows/publish-runtime.yml) drives this
|
||||
on every `runtime-v*` tag push.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import shutil
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Top-level Python modules in workspace/ that become molecule_runtime.X.
|
||||
# Anything imported as `from <name> import` or `import <name>` (where <name>
|
||||
# matches one of these) gets rewritten to use the package prefix.
|
||||
#
|
||||
# Closed list (not "every .py we copy") because a typo in workspace/ would
|
||||
# otherwise leak into a wrong rewrite. The set is asserted against
|
||||
# `workspace/*.py` at build time — if the disk contents drift from this
|
||||
# list (new module added, old one removed), the build fails loud instead
|
||||
# of silently shipping unrewritten imports. That gap caused 0.1.16 to
|
||||
# ship `from transcript_auth import ...` (unrewritten — module added
|
||||
# without updating this set), which broke every workspace startup with
|
||||
# `ModuleNotFoundError: No module named 'transcript_auth'`.
|
||||
TOP_LEVEL_MODULES = {
|
||||
"_sanitize_a2a",
|
||||
"a2a_cli",
|
||||
"a2a_client",
|
||||
"a2a_executor",
|
||||
"a2a_mcp_server",
|
||||
"a2a_response",
|
||||
"a2a_tools",
|
||||
"a2a_tools_delegation",
|
||||
"a2a_tools_identity",
|
||||
"a2a_tools_inbox",
|
||||
"a2a_tools_memory",
|
||||
"a2a_tools_messaging",
|
||||
"a2a_tools_rbac",
|
||||
"adapter_base",
|
||||
"agent",
|
||||
"agents_md",
|
||||
"boot_routes",
|
||||
"card_helpers",
|
||||
"config",
|
||||
"configs_dir",
|
||||
"consolidation",
|
||||
"coordinator",
|
||||
"event_log",
|
||||
"events",
|
||||
"executor_helpers",
|
||||
"heartbeat",
|
||||
"inbox",
|
||||
"inbox_uploads",
|
||||
"initial_prompt",
|
||||
"internal_chat_uploads",
|
||||
"internal_file_read",
|
||||
"main",
|
||||
"mcp_cli",
|
||||
"mcp_doctor",
|
||||
"mcp_heartbeat",
|
||||
"mcp_inbox_pollers",
|
||||
"mcp_workspace_resolver",
|
||||
"molecule_ai_status",
|
||||
"not_configured_handler",
|
||||
"platform_auth",
|
||||
"platform_inbound_auth",
|
||||
"plugins",
|
||||
"preflight",
|
||||
"prompt",
|
||||
"runtime_wedge",
|
||||
"secret_redactor",
|
||||
"shared_runtime",
|
||||
"smoke_mode",
|
||||
"transcript_auth",
|
||||
"watcher",
|
||||
}
|
||||
|
||||
# Subdirectory packages — these are already real packages (they have or will
|
||||
# have __init__.py) so the rewrite is `from <pkg>` → `from molecule_runtime.<pkg>`.
|
||||
SUBPACKAGES = {
|
||||
"adapters",
|
||||
"builtin_tools",
|
||||
"lib",
|
||||
"platform_tools",
|
||||
"plugins_registry",
|
||||
"policies",
|
||||
"skill_loader",
|
||||
}
|
||||
|
||||
# Files in workspace/ NOT included in the published package. These are
|
||||
# build artifacts, dev scripts, or monorepo-only scaffolding.
|
||||
EXCLUDE_FILES = {
|
||||
"Dockerfile",
|
||||
"build-all.sh",
|
||||
"rebuild-runtime-images.sh",
|
||||
"entrypoint.sh",
|
||||
"pytest.ini",
|
||||
"requirements.txt",
|
||||
# Note: adapter_base.py, agents_md.py, hermes_executor.py, shared_runtime.py
|
||||
# are kept (referenced by adapters/__init__.py and other modules); they get
|
||||
# their imports rewritten via TOP_LEVEL_MODULES. Excluding them broke the
|
||||
# smoke-test install with `ModuleNotFoundError: adapter_base`.
|
||||
}
|
||||
|
||||
EXCLUDE_DIRS = {
|
||||
"__pycache__",
|
||||
"tests",
|
||||
"molecule_audit", # only used by tests; not on production import path
|
||||
"scripts",
|
||||
}
|
||||
|
||||
|
||||
def build_import_rewriter() -> re.Pattern:
|
||||
"""Compile a single regex matching all import statements that need
|
||||
rewriting. The match groups capture the keyword + module name so the
|
||||
replacement preserves whitespace and trailing punctuation.
|
||||
|
||||
Modules included: TOP_LEVEL_MODULES ∪ SUBPACKAGES.
|
||||
|
||||
The negative-lookahead on `\\.` in the suffix prevents matching
|
||||
`from a2a.server.X import Y` against bare `a2a` (which isn't in our
|
||||
set, but the principle matters for any future short module name that
|
||||
happens to be a prefix of a real package name).
|
||||
"""
|
||||
names = sorted(TOP_LEVEL_MODULES | SUBPACKAGES)
|
||||
alt = "|".join(re.escape(n) for n in names)
|
||||
# Matches:
|
||||
# from <name>(\.|\s|import)
|
||||
# import <name>(\s|$|,)
|
||||
# And captures the keyword + name so we can re-emit with prefix.
|
||||
pattern = (
|
||||
r"(?m)^(?P<indent>\s*)" # leading whitespace (preserved)
|
||||
r"(?P<kw>from|import)\s+" # 'from' or 'import'
|
||||
r"(?P<mod>" + alt + r")" # the module name
|
||||
r"(?P<rest>[\s.,]|$)" # what follows: '.subpath', ' import …', ',', whitespace, EOL
|
||||
)
|
||||
return re.compile(pattern)
|
||||
|
||||
|
||||
def rewrite_imports(text: str, regex: re.Pattern) -> str:
|
||||
"""Replace bare imports with package-prefixed ones.
|
||||
|
||||
`import X` → `import molecule_runtime.X as X` (preserve binding)
|
||||
`from X import Y` → `from molecule_runtime.X import Y`
|
||||
`from X.sub import Y` → `from molecule_runtime.X.sub import Y`
|
||||
|
||||
Rejects `import X as Y` because the rewrite would produce
|
||||
`import molecule_runtime.X as X as Y`, a syntax error. The PR #2433
|
||||
incident shipped this exact pattern past `Python Lint & Test` (which
|
||||
runs against pre-rewrite source) but blew up the wheel-smoke gate.
|
||||
Detecting it here turns the silent build failure into a build-time
|
||||
error with a clear path: use `from X import …` or plain `import X`.
|
||||
"""
|
||||
def repl(m: re.Match) -> str:
|
||||
indent, kw, mod, rest = m.group("indent"), m.group("kw"), m.group("mod"), m.group("rest")
|
||||
if kw == "from":
|
||||
# `from X` or `from X.sub` — always safe to prefix.
|
||||
return f"{indent}from molecule_runtime.{mod}{rest}"
|
||||
# `import X` — preserve the binding name `X` (callers do `X.foo`)
|
||||
# by aliasing. `import X.sub` is uncommon for our modules and would
|
||||
# need a different binding form, but isn't used in workspace/ today.
|
||||
if rest.startswith("."):
|
||||
# `import X.sub` — rewrite as `import molecule_runtime.X.sub` and
|
||||
# leave the trailing dot pattern intact for the rest of the line.
|
||||
return f"{indent}import molecule_runtime.{mod}{rest}"
|
||||
# Detect `import X as Y` — the regex's `rest` group captures only
|
||||
# the immediate following char (whitespace, comma, or EOL), so we
|
||||
# have to peek at the surrounding line context. The match start is
|
||||
# at the line's `import` keyword; everything after the matched
|
||||
# name on the same line is what the source author wrote.
|
||||
line_start = text.rfind("\n", 0, m.start()) + 1
|
||||
line_end = text.find("\n", m.end())
|
||||
if line_end == -1:
|
||||
line_end = len(text)
|
||||
line_after = text[m.end() - len(rest):line_end]
|
||||
# Strip comments from consideration so `import X # noqa` doesn't trip.
|
||||
line_after_no_comment = line_after.split("#", 1)[0]
|
||||
if re.search(r"^\s*as\s+\w+", line_after_no_comment):
|
||||
raise ValueError(
|
||||
f"rewrite_imports: cannot rewrite 'import {mod} as <alias>' on a "
|
||||
f"workspace module — the regex would produce "
|
||||
f"'import molecule_runtime.{mod} as {mod} as <alias>', invalid syntax. "
|
||||
f"Use 'from {mod} import …' or plain 'import {mod}' instead. "
|
||||
f"Offending line: {text[line_start:line_end]!r}"
|
||||
)
|
||||
# Plain `import X` — alias preserves the local name.
|
||||
return f"{indent}import molecule_runtime.{mod} as {mod}{rest}"
|
||||
return regex.sub(repl, text)
|
||||
|
||||
|
||||
def copy_tree_filtered(src: Path, dst: Path) -> list[Path]:
|
||||
"""Copy src/ → dst/ skipping EXCLUDE_FILES + EXCLUDE_DIRS. Returns the
|
||||
list of .py files copied so the caller can run the import rewrite over
|
||||
them in one pass."""
|
||||
py_files: list[Path] = []
|
||||
if dst.exists():
|
||||
shutil.rmtree(dst)
|
||||
dst.mkdir(parents=True)
|
||||
for entry in src.iterdir():
|
||||
if entry.is_dir():
|
||||
if entry.name in EXCLUDE_DIRS:
|
||||
continue
|
||||
sub_py = copy_tree_filtered(entry, dst / entry.name)
|
||||
py_files.extend(sub_py)
|
||||
else:
|
||||
if entry.name in EXCLUDE_FILES:
|
||||
continue
|
||||
shutil.copy2(entry, dst / entry.name)
|
||||
if entry.suffix == ".py":
|
||||
py_files.append(dst / entry.name)
|
||||
return py_files
|
||||
|
||||
|
||||
PYPROJECT_TEMPLATE = """\
|
||||
[build-system]
|
||||
requires = ["setuptools>=68.0", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "molecule-ai-workspace-runtime"
|
||||
version = "{version}"
|
||||
description = "Molecule AI workspace runtime — shared infrastructure for all agent adapters"
|
||||
requires-python = ">=3.11"
|
||||
license = {{text = "BSL-1.1"}}
|
||||
readme = "README.md"
|
||||
dependencies = [
|
||||
"a2a-sdk[http-server]>=1.0.0,<2.0",
|
||||
"httpx>=0.27.0",
|
||||
"uvicorn>=0.30.0",
|
||||
"starlette>=0.38.0",
|
||||
"websockets>=12.0",
|
||||
# multipart/form-data parser — required for Starlette's Request.form() on
|
||||
# /internal/chat/uploads/ingest. Without it, Starlette raises AssertionError
|
||||
# when parsing multipart bodies, which the chat-upload handler surfaces as
|
||||
# an opaque 400. Mirrors the canonical pin in workspace/requirements.txt;
|
||||
# >=0.0.27 avoids CVE-2024-53981 (DoS via malformed boundary).
|
||||
# Forensic a78762a0 (2026-05-19): Hermes PDF upload 400 root cause.
|
||||
"python-multipart>=0.0.27",
|
||||
"pyyaml>=6.0",
|
||||
"langchain-core>=0.3.0",
|
||||
"opentelemetry-api>=1.24.0",
|
||||
"opentelemetry-sdk>=1.24.0",
|
||||
"opentelemetry-exporter-otlp-proto-http>=1.24.0",
|
||||
"temporalio>=1.7.0",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
molecule-runtime = "molecule_runtime.main:main_sync"
|
||||
molecule-mcp = "molecule_runtime.mcp_cli:main"
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
where = ["."]
|
||||
include = ["molecule_runtime*", "plugins_registry*"]
|
||||
|
||||
[tool.setuptools.package-data]
|
||||
"molecule_runtime" = ["py.typed"]
|
||||
"plugins_registry" = ["py.typed"]
|
||||
"""
|
||||
|
||||
|
||||
README_TEMPLATE = """\
|
||||
# molecule-ai-workspace-runtime
|
||||
|
||||
Shared workspace runtime for [Molecule AI](https://git.moleculesai.app/molecule-ai/molecule-core)
|
||||
agent adapters. Installed by every workspace template image
|
||||
(`workspace-template-claude-code`, `-langgraph`, `-hermes`, etc.) to provide
|
||||
A2A delegation, heartbeat, memory, plugin loading, and skill management.
|
||||
|
||||
This package is **published from the molecule-core monorepo `workspace/`
|
||||
directory** by the `publish-runtime` GitHub Actions workflow on every
|
||||
`runtime-v*` tag push. **Do not edit this package directly** — edit
|
||||
`workspace/` in the monorepo.
|
||||
|
||||
## External-runtime MCP server (`molecule-mcp`)
|
||||
|
||||
Operators running an agent outside the platform's container fleet
|
||||
(any runtime that supports MCP stdio — Claude Code, hermes, codex,
|
||||
etc.) can install this wheel and run the universal MCP server
|
||||
locally.
|
||||
|
||||
### Requirements
|
||||
|
||||
* **Python ≥3.11.** The wheel sets `requires-python = ">=3.11"`. On
|
||||
older interpreters `pip install` returns the cryptic
|
||||
`Could not find a version that satisfies the requirement` — that
|
||||
message is pip filtering this wheel out, NOT the package missing
|
||||
from PyPI. Upgrade with `brew install python@3.12` /
|
||||
`apt install python3.12` / `pyenv install 3.12` first.
|
||||
* **`pipx` recommended over `pip`.** `pipx install` puts
|
||||
`molecule-mcp` on PATH automatically and isolates the runtime's
|
||||
deps from your system Python. Plain `pip install --user` works
|
||||
but the binary lands in `~/.local/bin` (Linux) or
|
||||
`~/Library/Python/3.X/bin` (macOS) which is often not on PATH on
|
||||
a fresh shell — `claude mcp add molecule-<workspace-slug> -- molecule-mcp`
|
||||
then fails with "command not found" at first use.
|
||||
|
||||
* **Server name in `claude mcp add` is workspace-specific.** The
|
||||
Canvas "Add to Claude Code" snippet stamps a unique slug
|
||||
(`molecule-<workspace-name>`) so a single Claude Code session can
|
||||
talk to N molecule workspaces concurrently — `claude mcp add` keys
|
||||
entries by name in `~/.claude.json`, so re-running with a bare
|
||||
`molecule` name silently overwrites the prior workspace's entry.
|
||||
See [molecule-core#1535](https://git.moleculesai.app/molecule-ai/molecule-core/pulls/1535)
|
||||
for the canonical generator.
|
||||
|
||||
### Install
|
||||
|
||||
```sh
|
||||
# Recommended:
|
||||
pipx install molecule-ai-workspace-runtime
|
||||
|
||||
# Alternative (manage PATH yourself):
|
||||
pip install --user molecule-ai-workspace-runtime
|
||||
```
|
||||
|
||||
### Run
|
||||
|
||||
```sh
|
||||
WORKSPACE_ID=<uuid> \\
|
||||
PLATFORM_URL=https://<tenant>.staging.moleculesai.app \\
|
||||
MOLECULE_WORKSPACE_TOKEN=<bearer> \\
|
||||
molecule-mcp
|
||||
```
|
||||
|
||||
That exposes the same 8 platform tools (`delegate_task`, `list_peers`,
|
||||
`send_message_to_user`, `commit_memory`, etc.) that container-bound
|
||||
runtimes already get via the workspace's auto-spawned MCP. Register
|
||||
the binary in your agent's MCP config — use a workspace-specific
|
||||
server name so multi-workspace setups don't collide (e.g. Claude Code:
|
||||
`claude mcp add molecule-<workspace-slug> -- molecule-mcp` with the env
|
||||
above; the Canvas modal stamps the right slug for you).
|
||||
|
||||
### Keeping the token out of shell history
|
||||
|
||||
Inline `MOLECULE_WORKSPACE_TOKEN=<bearer>` ends up in `~/.zsh_history`
|
||||
and (when registered via `claude mcp add`) plaintext in
|
||||
`~/.claude.json`. To avoid that, write the token to a 0600 file and
|
||||
point `MOLECULE_WORKSPACE_TOKEN_FILE` at it:
|
||||
|
||||
```sh
|
||||
umask 077
|
||||
printf '%s' "<bearer>" > ~/.config/molecule/token
|
||||
WORKSPACE_ID=<uuid> \\
|
||||
PLATFORM_URL=https://<tenant>.staging.moleculesai.app \\
|
||||
MOLECULE_WORKSPACE_TOKEN_FILE=$HOME/.config/molecule/token \\
|
||||
molecule-mcp
|
||||
```
|
||||
|
||||
Token resolution order: `MOLECULE_WORKSPACE_TOKEN` (inline env) →
|
||||
`MOLECULE_WORKSPACE_TOKEN_FILE` (path) → `${CONFIGS_DIR}/.auth_token`
|
||||
(in-container default).
|
||||
|
||||
The token comes from the canvas → Tokens tab. Restarting an external
|
||||
workspace from the canvas no longer revokes the token (PR #2412), so
|
||||
operator tokens persist across status nudges.
|
||||
|
||||
### Push vs poll delivery (Claude Code specifics)
|
||||
|
||||
By default the inbox runs in **poll mode** — every turn the agent
|
||||
calls `wait_for_message`, which blocks up to ~60s on
|
||||
`/activity?since_id=…`. Real-time push delivery is also supported,
|
||||
but on Claude Code it requires THREE conditions, ALL of which must
|
||||
hold:
|
||||
|
||||
1. **The MCP server declares `experimental.claude/channel`** — this
|
||||
wheel does (see `_build_initialize_result`). Nothing for you to
|
||||
do.
|
||||
2. **Claude Code installs the server as a marketplace plugin** — a
|
||||
plain `claude mcp add molecule-<workspace-slug> -- molecule-mcp`
|
||||
produces a non-plugin-sourced server, which Claude Code rejects with
|
||||
`channel_enable requires a marketplace plugin`. Until the
|
||||
official `moleculesai/claude-code-plugin` marketplace lands
|
||||
(tracking [#2936](https://git.moleculesai.app/molecule-ai/molecule-core/issues/2936)),
|
||||
operators who want push must scaffold their own local marketplace
|
||||
under
|
||||
`~/.claude/marketplaces/molecule-local/` containing a
|
||||
`marketplace.json` + `plugin.json` that points at this wheel.
|
||||
3. **Claude Code is launched with the dev-channels flag** — pass
|
||||
`--dangerously-load-development-channels plugin:molecule@<marketplace>`
|
||||
on the `claude` invocation. Without this flag the channel
|
||||
capability is silently ignored.
|
||||
|
||||
Symptom of any condition failing: messages arrive but only via the
|
||||
poll path (every ~1–60s), not real-time. There's currently no
|
||||
diagnostic surfaced — `molecule-mcp doctor` (tracking
|
||||
[#2937](https://git.moleculesai.app/molecule-ai/molecule-core/issues/2937)) is
|
||||
planned.
|
||||
|
||||
If you don't need real-time push, the default poll path works
|
||||
universally with no extra setup; both modes converge on the same
|
||||
`inbox_pop` ack so messages never duplicate.
|
||||
|
||||
See [`docs/workspace-runtime-package.md`](https://git.moleculesai.app/molecule-ai/molecule-core/src/branch/main/docs/workspace-runtime-package.md)
|
||||
for the publish flow and architecture.
|
||||
"""
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--version", required=True, help="Package version, e.g. 0.1.6")
|
||||
parser.add_argument("--out", required=True, type=Path, help="Build output directory (will be wiped)")
|
||||
parser.add_argument("--source", type=Path, default=Path(__file__).resolve().parent.parent / "workspace",
|
||||
help="Path to monorepo workspace/ directory (default: ../workspace from this script)")
|
||||
args = parser.parse_args()
|
||||
|
||||
src = args.source.resolve()
|
||||
out = args.out.resolve()
|
||||
if not src.is_dir():
|
||||
print(f"error: source not a directory: {src}", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
# Drift gate: assert TOP_LEVEL_MODULES matches workspace/*.py.
|
||||
# Without this, a new top-level module added to workspace/ ships
|
||||
# with unrewritten `from <name> import` statements that explode at
|
||||
# runtime with ModuleNotFoundError. (See 0.1.16 transcript_auth
|
||||
# incident — closed list silently went stale.)
|
||||
on_disk_modules = {
|
||||
f.stem for f in src.glob("*.py")
|
||||
if f.stem not in {"__init__", "conftest"}
|
||||
}
|
||||
missing = on_disk_modules - TOP_LEVEL_MODULES
|
||||
stale = TOP_LEVEL_MODULES - on_disk_modules
|
||||
if missing or stale:
|
||||
print("error: TOP_LEVEL_MODULES drifted from workspace/*.py contents:", file=sys.stderr)
|
||||
if missing:
|
||||
print(f" in workspace/ but NOT in TOP_LEVEL_MODULES (will ship un-rewritten): {sorted(missing)}", file=sys.stderr)
|
||||
if stale:
|
||||
print(f" in TOP_LEVEL_MODULES but NOT in workspace/ (no-op, but misleading): {sorted(stale)}", file=sys.stderr)
|
||||
print(" Edit scripts/build_runtime_package.py:TOP_LEVEL_MODULES to match.", file=sys.stderr)
|
||||
return 3
|
||||
|
||||
# Same drift gate for SUBPACKAGES — catches the inverse class of
|
||||
# bug where a workspace/ subdirectory is referenced by main.py
|
||||
# (`from lib.pre_stop import ...`) but is either missing from
|
||||
# SUBPACKAGES (so the rewriter doesn't qualify the import) or
|
||||
# accidentally listed in EXCLUDE_DIRS (so the directory itself
|
||||
# isn't shipped). 0.1.16-0.1.19 had `lib` in EXCLUDE_DIRS while
|
||||
# main.py imported from it — `ModuleNotFoundError: No module
|
||||
# named 'lib'` at every workspace startup.
|
||||
on_disk_subpkgs = {
|
||||
d.name for d in src.iterdir()
|
||||
if d.is_dir()
|
||||
and d.name not in EXCLUDE_DIRS
|
||||
and d.name not in {"__pycache__"}
|
||||
and (d / "__init__.py").exists()
|
||||
}
|
||||
sub_missing = on_disk_subpkgs - SUBPACKAGES
|
||||
sub_stale = SUBPACKAGES - on_disk_subpkgs
|
||||
if sub_missing or sub_stale:
|
||||
print("error: SUBPACKAGES drifted from workspace/ subdirectories:", file=sys.stderr)
|
||||
if sub_missing:
|
||||
print(f" in workspace/ but NOT in SUBPACKAGES (will ship un-rewritten or be excluded): {sorted(sub_missing)}", file=sys.stderr)
|
||||
if sub_stale:
|
||||
print(f" in SUBPACKAGES but NOT in workspace/ (no-op, but misleading): {sorted(sub_stale)}", file=sys.stderr)
|
||||
print(" Edit scripts/build_runtime_package.py:SUBPACKAGES + EXCLUDE_DIRS to match.", file=sys.stderr)
|
||||
return 3
|
||||
|
||||
pkg_dir = out / "molecule_runtime"
|
||||
print(f"[build] source: {src}")
|
||||
print(f"[build] output: {out}")
|
||||
print(f"[build] package: {pkg_dir}")
|
||||
|
||||
if out.exists():
|
||||
shutil.rmtree(out)
|
||||
out.mkdir(parents=True)
|
||||
|
||||
py_files = copy_tree_filtered(src, pkg_dir)
|
||||
print(f"[build] copied {len(py_files)} .py files")
|
||||
|
||||
# Install plugins_registry/ at the wheel TOP LEVEL so that plugin adapter
|
||||
# code (workspace-template-*) can use bare `from plugins_registry import ...`.
|
||||
# The molecule-runtime package (molecule_runtime/) also ships it at
|
||||
# molecule_runtime/plugins_registry/ (satisfies the rewritten
|
||||
# `from molecule_runtime.plugins_registry import ...` in adapter_base.py).
|
||||
# Both copies coexist: they serve different import namespaces.
|
||||
plugins_src = src / "plugins_registry"
|
||||
plugins_dst = out / "plugins_registry"
|
||||
if plugins_src.is_dir():
|
||||
shutil.copytree(plugins_src, plugins_dst)
|
||||
print(f"[build] installed plugins_registry/ at top level (bare-import shim)")
|
||||
|
||||
# Ensure top-level package marker exists. workspace/ doesn't have one
|
||||
# (it's not a package in monorepo), but the published artifact must.
|
||||
init = pkg_dir / "__init__.py"
|
||||
if not init.exists():
|
||||
init.write_text('"""Molecule AI workspace runtime."""\n')
|
||||
|
||||
# Touch py.typed so type-checkers in adapter consumers see the package
|
||||
# as typed. Empty file is the convention.
|
||||
(pkg_dir / "py.typed").touch()
|
||||
|
||||
# Rewrite imports in every .py file we copied + the new __init__.py.
|
||||
regex = build_import_rewriter()
|
||||
rewrites = 0
|
||||
for f in [*py_files, init]:
|
||||
original = f.read_text()
|
||||
rewritten = rewrite_imports(original, regex)
|
||||
if rewritten != original:
|
||||
f.write_text(rewritten)
|
||||
rewrites += 1
|
||||
print(f"[build] rewrote imports in {rewrites} files")
|
||||
|
||||
# Emit pyproject.toml + README at build root.
|
||||
(out / "pyproject.toml").write_text(PYPROJECT_TEMPLATE.format(version=args.version))
|
||||
(out / "README.md").write_text(README_TEMPLATE)
|
||||
|
||||
print(f"[build] done. To publish:")
|
||||
print(f" cd {out}")
|
||||
print(f" python -m build")
|
||||
print(f" python -m twine upload dist/*")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -1,95 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# check-cascade-list-vs-manifest.sh — structural drift gate for the
|
||||
# publish-runtime cascade list vs manifest.json workspace_templates.
|
||||
#
|
||||
# WHY: PR #2536 pruned the manifest to 4 supported runtimes; PR #2556
|
||||
# realigned the cascade list to match. The underlying drift hazard
|
||||
# (cascade-list ≠ manifest) was unguarded — the data fix didn't prevent
|
||||
# recurrence. This script is the structural gate that does.
|
||||
#
|
||||
# Behavior-based per project pattern: derives the expected set from
|
||||
# manifest.json and the actual set from the workflow YAML, fails on
|
||||
# any divergence in either direction.
|
||||
#
|
||||
# missing-from-cascade → templates in manifest that publish-runtime.yml
|
||||
# won't auto-rebuild on a new wheel publish
|
||||
# (the codex-stuck-on-stale-runtime bug class)
|
||||
# extra-in-cascade → cascade dispatches to deprecated templates
|
||||
# (the wasted-API-calls + dead-CI-noise class)
|
||||
#
|
||||
# Suffix mapping: manifest names map to GHCR repos via
|
||||
# {name without -default suffix} → molecule-ai-workspace-template-<suffix>
|
||||
# That's the same map publish-runtime.yml's TEMPLATES variable iterates.
|
||||
#
|
||||
# Exit:
|
||||
# 0 cascade matches manifest exactly
|
||||
# 1 drift detected (script prints the diff)
|
||||
# 2 bad usage / missing inputs
|
||||
|
||||
set -eu
|
||||
|
||||
MANIFEST="${1:-manifest.json}"
|
||||
WORKFLOW="${2:-.github/workflows/publish-runtime.yml}"
|
||||
|
||||
if [ ! -f "$MANIFEST" ]; then
|
||||
echo "::error::manifest not found: $MANIFEST" >&2
|
||||
exit 2
|
||||
fi
|
||||
if [ ! -f "$WORKFLOW" ]; then
|
||||
echo "::error::workflow not found: $WORKFLOW" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
# Expected cascade entries: manifest workspace_templates → suffix-only
|
||||
# (strip -default tail, e.g. claude-code-default → claude-code, since
|
||||
# publish-runtime.yml's TEMPLATES uses suffixes that match the
|
||||
# molecule-ai-workspace-template-<suffix> repo naming).
|
||||
EXPECTED=$(jq -r '.workspace_templates[].name' "$MANIFEST" \
|
||||
| sed 's/-default$//' \
|
||||
| sort -u)
|
||||
|
||||
# Actual cascade entries: extract from the TEMPLATES="…" line. We look
|
||||
# for the line, pull the contents between the quotes, and split into
|
||||
# one-per-line. Single source of truth in the workflow itself, no
|
||||
# parallel registry needed.
|
||||
#
|
||||
# Why not \s in the regex: BSD sed (macOS) doesn't recognize \s as
|
||||
# whitespace — treats it as literal `s`. POSIX [[:space:]] works on
|
||||
# both BSD and GNU sed. Same hazard nuked the original draft of this
|
||||
# script: \s* matched empty-prefix-of-literal-s, then the leading
|
||||
# whitespace stayed in the captured group.
|
||||
ACTUAL=$(grep -E '[[:space:]]*TEMPLATES="' "$WORKFLOW" \
|
||||
| head -1 \
|
||||
| sed -E 's/^[[:space:]]*TEMPLATES="([^"]*)".*$/\1/' \
|
||||
| tr ' ' '\n' \
|
||||
| grep -v '^$' \
|
||||
| sort -u)
|
||||
|
||||
if [ -z "$ACTUAL" ]; then
|
||||
echo "::error::could not extract TEMPLATES=\"…\" from $WORKFLOW — has the variable name or quoting changed?" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
MISSING=$(comm -23 <(printf '%s\n' "$EXPECTED") <(printf '%s\n' "$ACTUAL"))
|
||||
EXTRA=$(comm -13 <(printf '%s\n' "$EXPECTED") <(printf '%s\n' "$ACTUAL"))
|
||||
|
||||
if [ -z "$MISSING" ] && [ -z "$EXTRA" ]; then
|
||||
echo "✓ cascade list matches manifest workspace_templates ($(echo "$EXPECTED" | wc -l | tr -d ' ') entries)"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "::error::cascade list drift detected between $MANIFEST and $WORKFLOW" >&2
|
||||
echo "" >&2
|
||||
if [ -n "$MISSING" ]; then
|
||||
echo " Templates in manifest but MISSING from cascade (won't auto-rebuild on wheel publish):" >&2
|
||||
echo "$MISSING" | sed 's/^/ - /' >&2
|
||||
echo "" >&2
|
||||
fi
|
||||
if [ -n "$EXTRA" ]; then
|
||||
echo " Templates in cascade but NOT in manifest (deprecated, wasting dispatch calls):" >&2
|
||||
echo "$EXTRA" | sed 's/^/ - /' >&2
|
||||
echo "" >&2
|
||||
fi
|
||||
echo " Fix: edit the TEMPLATES=\"…\" line in $WORKFLOW so the set matches" >&2
|
||||
echo " manifest.json's workspace_templates (suffix-stripped). See PR #2556 for context." >&2
|
||||
exit 1
|
||||
@@ -1,201 +0,0 @@
|
||||
"""Tests for scripts/build_runtime_package.py — the wheel-build import rewriter.
|
||||
|
||||
Run locally: ``python3 -m unittest scripts/test_build_runtime_package.py -v``
|
||||
|
||||
Why this exists: PR #2433 shipped ``import inbox as _inbox_module`` inside
|
||||
the workspace runtime, and the rewriter expanded it to
|
||||
``import molecule_runtime.inbox as inbox as _inbox_module`` — invalid
|
||||
Python. The wheel-smoke gate caught it post-merge but couldn't block
|
||||
the merge (not a required check yet — see PR #2439). PR #2436 added a
|
||||
build-time gate that raises ``ValueError`` on this pattern; this file
|
||||
locks the rewriter's documented contract under unit test so the gate
|
||||
itself can't silently regress.
|
||||
|
||||
Coverage:
|
||||
- ``import X`` → ``import molecule_runtime.X as X``
|
||||
- ``import X.sub`` → ``import molecule_runtime.X.sub``
|
||||
- ``import X`` + trailing comment is preserved
|
||||
- ``from X import Y`` → ``from molecule_runtime.X import Y``
|
||||
- ``from X.sub import Y`` → ``from molecule_runtime.X.sub import Y``
|
||||
- ``from X import Y, Z`` → ``from molecule_runtime.X import Y, Z``
|
||||
- ``import X as Y`` → raises ValueError (the rewriter would
|
||||
produce ``import molecule_runtime.X as X as Y``, syntax error)
|
||||
- non-allowlist module names → not rewritten (regex anchors on the closed set)
|
||||
- Indented imports (inside def/class) keep their indentation.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
# scripts/build_runtime_package.py lives at scripts/ — add scripts/ to sys.path
|
||||
# so the import works whether unittest is invoked from repo root or scripts/.
|
||||
HERE = os.path.dirname(os.path.abspath(__file__))
|
||||
if HERE not in sys.path:
|
||||
sys.path.insert(0, HERE)
|
||||
|
||||
import build_runtime_package as M # noqa: E402
|
||||
|
||||
|
||||
def rewrite(text: str) -> str:
|
||||
"""Run the rewriter end-to-end so the test exercises the same path
|
||||
used by the wheel build (regex compile + substitution)."""
|
||||
regex = M.build_import_rewriter()
|
||||
return M.rewrite_imports(text, regex)
|
||||
|
||||
|
||||
class TestBareImportRewriting(unittest.TestCase):
|
||||
def test_plain_import_aliases_to_preserve_binding(self):
|
||||
self.assertEqual(
|
||||
rewrite("import inbox\n"),
|
||||
"import molecule_runtime.inbox as inbox\n",
|
||||
)
|
||||
|
||||
def test_plain_import_with_trailing_comment_is_preserved(self):
|
||||
# Real-world shape from a2a_mcp_server.py — the comment must
|
||||
# survive the rewrite without losing its leading-space buffer.
|
||||
self.assertEqual(
|
||||
rewrite("import inbox # noqa: E402\n"),
|
||||
"import molecule_runtime.inbox as inbox # noqa: E402\n",
|
||||
)
|
||||
|
||||
def test_import_dotted_keeps_dotted_form(self):
|
||||
# `import X.sub` is rare for our modules but the rewriter must
|
||||
# not double-alias — we want `import molecule_runtime.X.sub`,
|
||||
# not `import molecule_runtime.X.sub as X.sub` (invalid).
|
||||
self.assertEqual(
|
||||
rewrite("import platform_tools.registry\n"),
|
||||
"import molecule_runtime.platform_tools.registry\n",
|
||||
)
|
||||
|
||||
def test_indented_import_preserves_indentation(self):
|
||||
src = "def foo():\n import inbox\n return inbox.x\n"
|
||||
out = rewrite(src)
|
||||
self.assertIn(" import molecule_runtime.inbox as inbox\n", out)
|
||||
|
||||
|
||||
class TestFromImportRewriting(unittest.TestCase):
|
||||
def test_from_module_import_simple(self):
|
||||
self.assertEqual(
|
||||
rewrite("from inbox import InboxState\n"),
|
||||
"from molecule_runtime.inbox import InboxState\n",
|
||||
)
|
||||
|
||||
def test_from_dotted_import(self):
|
||||
self.assertEqual(
|
||||
rewrite("from platform_tools.registry import TOOLS\n"),
|
||||
"from molecule_runtime.platform_tools.registry import TOOLS\n",
|
||||
)
|
||||
|
||||
def test_from_import_multiple_symbols(self):
|
||||
# Multi-import statement — the rewriter only touches the module
|
||||
# prefix, not the names being imported.
|
||||
self.assertEqual(
|
||||
rewrite("from a2a_tools import (foo, bar, baz)\n"),
|
||||
"from molecule_runtime.a2a_tools import (foo, bar, baz)\n",
|
||||
)
|
||||
|
||||
def test_from_import_block_form(self):
|
||||
src = (
|
||||
"from a2a_tools import (\n"
|
||||
" tool_check_task_status,\n"
|
||||
" tool_commit_memory,\n"
|
||||
")\n"
|
||||
)
|
||||
out = rewrite(src)
|
||||
self.assertIn("from molecule_runtime.a2a_tools import (\n", out)
|
||||
# Trailing names + closer are unchanged.
|
||||
self.assertIn(" tool_check_task_status,\n", out)
|
||||
self.assertIn(")\n", out)
|
||||
|
||||
|
||||
class TestImportAsAliasRejection(unittest.TestCase):
|
||||
"""The key regression class — the failure mode that shipped in PR #2433."""
|
||||
|
||||
def test_import_as_alias_raises_value_error(self):
|
||||
with self.assertRaises(ValueError) as ctx:
|
||||
rewrite("import inbox as _inbox_module\n")
|
||||
msg = str(ctx.exception)
|
||||
# Error must name the offending module + suggest the fix.
|
||||
self.assertIn("inbox", msg)
|
||||
self.assertIn("as <alias>", msg)
|
||||
self.assertIn("from", msg) # suggests `from X import …`
|
||||
|
||||
def test_import_as_alias_indented_still_rejected(self):
|
||||
# Indented (inside def/class) — same hazard, same rejection.
|
||||
with self.assertRaises(ValueError):
|
||||
rewrite("def foo():\n import inbox as _x\n")
|
||||
|
||||
def test_import_as_alias_with_trailing_comment_still_rejected(self):
|
||||
with self.assertRaises(ValueError):
|
||||
rewrite("import inbox as _x # comment\n")
|
||||
|
||||
def test_plain_import_with_as_in_comment_does_not_trip(self):
|
||||
# The detection strips comments before pattern-matching, so a
|
||||
# comment containing "as foo" must NOT trigger the rejection.
|
||||
self.assertEqual(
|
||||
rewrite("import inbox # rewriter produces alias as inbox\n"),
|
||||
"import molecule_runtime.inbox as inbox # rewriter produces alias as inbox\n",
|
||||
)
|
||||
|
||||
def test_import_followed_by_comma_is_not_an_alias(self):
|
||||
# `import inbox, os` — comma is not `as`, must not be rejected.
|
||||
# Our regex captures `inbox` then `,` — only `inbox` gets prefixed.
|
||||
# `os` is not in TOP_LEVEL_MODULES so it's left alone.
|
||||
out = rewrite("import inbox, os\n")
|
||||
# The first module is rewritten; the second (non-allowlist) is not.
|
||||
self.assertIn("import molecule_runtime.inbox as inbox", out)
|
||||
|
||||
|
||||
class TestOutsideAllowlistModules(unittest.TestCase):
|
||||
def test_third_party_imports_unchanged(self):
|
||||
# `httpx`, `os`, `re` etc. are not in TOP_LEVEL_MODULES — the
|
||||
# regex must not match them. This is the closed-list invariant
|
||||
# that prevents accidental rewrites of stdlib / third-party.
|
||||
src = "import httpx\nimport os\nfrom re import match\n"
|
||||
self.assertEqual(rewrite(src), src)
|
||||
|
||||
def test_short_name_collision_avoided(self):
|
||||
# `from a2a.server.X import Y` must not match the bare `a2a`
|
||||
# prefix — `a2a` isn't in our allowlist (we allow `a2a_tools`,
|
||||
# `a2a_client`, etc., but not bare `a2a`). Belt-and-suspenders.
|
||||
src = "from a2a.server.routes import create_agent_card_routes\n"
|
||||
self.assertEqual(rewrite(src), src)
|
||||
|
||||
|
||||
class TestEndToEndShape(unittest.TestCase):
|
||||
"""Reproduces the PR #2433 → #2436 incident shape."""
|
||||
|
||||
def test_pr_2433_pattern_now_rejected(self):
|
||||
# The exact line PR #2433 added (inside main()), which produced
|
||||
# `import molecule_runtime.inbox as inbox as _inbox_module` —
|
||||
# invalid syntax in the published wheel.
|
||||
with self.assertRaises(ValueError) as ctx:
|
||||
rewrite(
|
||||
" import inbox as _inbox_module\n"
|
||||
" _inbox_module.set_notification_callback(_on_inbox_message)\n"
|
||||
)
|
||||
# Error message includes the offending line so the operator
|
||||
# knows exactly where to fix.
|
||||
self.assertIn("inbox", str(ctx.exception))
|
||||
|
||||
def test_pr_2436_fix_pattern_works(self):
|
||||
# The fix-forward shape (#2436): top-level `import inbox`,
|
||||
# bridge wired in main() via `inbox.set_notification_callback`.
|
||||
src = (
|
||||
"import inbox\n"
|
||||
"\n"
|
||||
"def main():\n"
|
||||
" inbox.set_notification_callback(cb)\n"
|
||||
)
|
||||
out = rewrite(src)
|
||||
self.assertIn("import molecule_runtime.inbox as inbox\n", out)
|
||||
# The callable reference inside main() is left alone — only
|
||||
# imports get rewritten, not arbitrary `inbox.foo` callsites
|
||||
# (those resolve via the module binding the rewrite preserves).
|
||||
self.assertIn(" inbox.set_notification_callback(cb)\n", out)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
+1
-1
@@ -9,7 +9,7 @@ This repo uses the standard monorepo testing convention: **unit tests live with
|
||||
| Go unit + integration (platform, CLI, handlers) | `workspace-server/**/*_test.go` — run with `cd workspace-server && go test -race ./...` |
|
||||
| TypeScript unit (canvas components, hooks, store) | `canvas/src/**/__tests__/` — run with `cd canvas && npm test -- --run` |
|
||||
| TypeScript unit (MCP server handlers) | `mcp-server/src/__tests__/` — run with `cd mcp-server && npx jest` |
|
||||
| Python unit (workspace runtime, adapters) | `workspace/tests/` — run with `cd workspace && python3 -m pytest` |
|
||||
| Python unit (workspace runtime, adapters) | `molecule-ai-workspace-runtime/tests/` in the standalone runtime repo |
|
||||
| Python unit (SDK: plugin + remote agent) | `sdk/python/tests/` — run with `cd sdk/python && python3 -m pytest` |
|
||||
| **Cross-component E2E** (spans platform + runtime + HTTP) | `tests/e2e/` ← **you are here** |
|
||||
|
||||
|
||||
+4
-1
@@ -33,7 +33,10 @@ e2e_mint_test_token() {
|
||||
return 2
|
||||
fi
|
||||
local body
|
||||
body=$(curl -s -w "\n%{http_code}" "$BASE/admin/workspaces/$wid/test-token")
|
||||
local admin_bearer="${MOLECULE_ADMIN_TOKEN:-${ADMIN_TOKEN:-}}"
|
||||
local admin_auth=()
|
||||
[ -n "$admin_bearer" ] && admin_auth=(-H "Authorization: Bearer $admin_bearer")
|
||||
body=$(curl -s -w "\n%{http_code}" "$BASE/admin/workspaces/$wid/test-token" ${admin_auth[@]+"${admin_auth[@]}"})
|
||||
local code
|
||||
code=$(printf '%s' "$body" | tail -n1)
|
||||
local json
|
||||
|
||||
@@ -71,7 +71,7 @@ pv_assert_runtime() {
|
||||
set +e
|
||||
resp=$(curl -sS -X POST "$base_url/workspaces/$wid/mcp" \
|
||||
-H "Authorization: Bearer $wtok" \
|
||||
"${org_header[@]}" \
|
||||
${org_header[@]+"${org_header[@]}"} \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$PV_RPC_BODY" \
|
||||
-o /tmp/pv_mcp_body.json -w "%{http_code}" 2>/dev/null)
|
||||
|
||||
+23
-12
@@ -10,6 +10,10 @@ FAIL=0
|
||||
# as `Authorization: Bearer <token>`. Capture them here.
|
||||
ECHO_TOKEN=""
|
||||
SUM_TOKEN=""
|
||||
ECHO_AUTH=()
|
||||
SUM_AUTH=()
|
||||
ECHO_URL="https://example.com/echo-agent"
|
||||
SUM_URL="https://example.com/summarizer-agent"
|
||||
|
||||
# AdminAuth-gated calls need a bearer token once any workspace token
|
||||
# exists in the DB. ADMIN_TOKEN is populated after the first workspace
|
||||
@@ -54,8 +58,8 @@ R=$(acurl "$BASE/workspaces")
|
||||
check "GET /workspaces (empty)" '[]' "$R"
|
||||
|
||||
# Test 3: Create workspace A (AdminAuth fail-open — no tokens exist yet)
|
||||
R=$(curl -s -X POST "$BASE/workspaces" -H "Content-Type: application/json" -d '{"name":"Echo Agent","tier":1}')
|
||||
check "POST /workspaces (create echo)" '"status":"provisioning"' "$R"
|
||||
R=$(curl -s -X POST "$BASE/workspaces" -H "Content-Type: application/json" -d '{"name":"Echo Agent","tier":1,"runtime":"external","external":true}')
|
||||
check "POST /workspaces (create echo)" '"status":"awaiting_agent"' "$R"
|
||||
ECHO_ID=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])")
|
||||
|
||||
# Mint a test token so all subsequent AdminAuth-gated calls succeed.
|
||||
@@ -72,8 +76,8 @@ else
|
||||
fi
|
||||
|
||||
# Test 4: Create workspace B (needs bearer — tokens now exist in DB)
|
||||
R=$(acurl -X POST "$BASE/workspaces" -H "Content-Type: application/json" -d '{"name":"Summarizer Agent","tier":1}')
|
||||
check "POST /workspaces (create summarizer)" '"status":"provisioning"' "$R"
|
||||
R=$(acurl -X POST "$BASE/workspaces" -H "Content-Type: application/json" -d '{"name":"Summarizer Agent","tier":1,"runtime":"external","external":true}')
|
||||
check "POST /workspaces (create summarizer)" '"status":"awaiting_agent"' "$R"
|
||||
SUM_ID=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])")
|
||||
|
||||
# Test 5: List has 2
|
||||
@@ -90,9 +94,10 @@ check "GET /workspaces/:id (agent_card null)" '"agent_card":null' "$R"
|
||||
# endpoint), not the admin token. C18 requires a token issued TO THIS
|
||||
# workspace, not just any valid token.
|
||||
ECHO_WS_TOKEN=$(curl -s "$BASE/admin/workspaces/$ECHO_ID/test-token" | python3 -c "import sys,json; print(json.load(sys.stdin).get('auth_token',''))" 2>/dev/null || echo "")
|
||||
[ -n "$ECHO_WS_TOKEN" ] && ECHO_AUTH=(-H "Authorization: Bearer $ECHO_WS_TOKEN")
|
||||
R=$(curl -s -X POST "$BASE/registry/register" -H "Content-Type: application/json" \
|
||||
${ECHO_WS_TOKEN:+-H "Authorization: Bearer $ECHO_WS_TOKEN"} \
|
||||
-d "{\"id\":\"$ECHO_ID\",\"url\":\"http://localhost:8001\",\"agent_card\":{\"name\":\"Echo Agent\",\"skills\":[{\"id\":\"echo\",\"name\":\"Echo\"}]}}")
|
||||
"${ECHO_AUTH[@]}" \
|
||||
-d "{\"id\":\"$ECHO_ID\",\"url\":\"$ECHO_URL\",\"agent_card\":{\"name\":\"Echo Agent\",\"skills\":[{\"id\":\"echo\",\"name\":\"Echo\"}]}}")
|
||||
check "POST /registry/register (echo)" '"status":"registered"' "$R"
|
||||
# Extract token from register response; fall back to the test-token we
|
||||
# already minted (register may not return a new token on re-registration).
|
||||
@@ -101,9 +106,10 @@ if [ -z "$ECHO_TOKEN" ]; then ECHO_TOKEN="$ECHO_WS_TOKEN"; fi
|
||||
|
||||
# Test 8: Register summarizer — same pattern: workspace-specific token
|
||||
SUM_WS_TOKEN=$(curl -s "$BASE/admin/workspaces/$SUM_ID/test-token" | python3 -c "import sys,json; print(json.load(sys.stdin).get('auth_token',''))" 2>/dev/null || echo "")
|
||||
[ -n "$SUM_WS_TOKEN" ] && SUM_AUTH=(-H "Authorization: Bearer $SUM_WS_TOKEN")
|
||||
R=$(curl -s -X POST "$BASE/registry/register" -H "Content-Type: application/json" \
|
||||
${SUM_WS_TOKEN:+-H "Authorization: Bearer $SUM_WS_TOKEN"} \
|
||||
-d "{\"id\":\"$SUM_ID\",\"url\":\"http://localhost:8002\",\"agent_card\":{\"name\":\"Summarizer\",\"skills\":[{\"id\":\"summarize\",\"name\":\"Summarize\"}]}}")
|
||||
"${SUM_AUTH[@]}" \
|
||||
-d "{\"id\":\"$SUM_ID\",\"url\":\"$SUM_URL\",\"agent_card\":{\"name\":\"Summarizer\",\"skills\":[{\"id\":\"summarize\",\"name\":\"Summarize\"}]}}")
|
||||
check "POST /registry/register (summarizer)" '"status":"registered"' "$R"
|
||||
SUM_TOKEN=$(echo "$R" | e2e_extract_token)
|
||||
if [ -z "$SUM_TOKEN" ]; then SUM_TOKEN="$SUM_WS_TOKEN"; fi
|
||||
@@ -112,7 +118,7 @@ if [ -z "$SUM_TOKEN" ]; then SUM_TOKEN="$SUM_WS_TOKEN"; fi
|
||||
R=$(acurl "$BASE/workspaces/$ECHO_ID")
|
||||
check "Echo is online" '"status":"online"' "$R"
|
||||
check "Echo has agent_card" '"skills"' "$R"
|
||||
check "Echo has url" '"url":"http://localhost:8001"' "$R"
|
||||
check "Echo has url" "\"url\":\"$ECHO_URL\"" "$R"
|
||||
|
||||
# Test 10: Heartbeat
|
||||
R=$(curl -s -X POST "$BASE/registry/heartbeat" -H "Content-Type: application/json" -H "Authorization: Bearer $ECHO_TOKEN" \
|
||||
@@ -178,7 +184,7 @@ curl -s -X POST "$BASE/registry/heartbeat" -H "Content-Type: application/json" -
|
||||
# Re-register to force online status in case liveness expired
|
||||
curl -s -X POST "$BASE/registry/register" -H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer $ECHO_TOKEN" \
|
||||
-d "{\"id\":\"$ECHO_ID\",\"url\":\"http://localhost:8001\",\"agent_card\":{\"name\":\"Echo Agent v2\",\"skills\":[{\"id\":\"echo\",\"name\":\"Echo\"},{\"id\":\"repeat\",\"name\":\"Repeat\"}]}}" > /dev/null
|
||||
-d "{\"id\":\"$ECHO_ID\",\"url\":\"$ECHO_URL\",\"agent_card\":{\"name\":\"Echo Agent v2\",\"skills\":[{\"id\":\"echo\",\"name\":\"Echo\"},{\"id\":\"repeat\",\"name\":\"Repeat\"}]}}" > /dev/null
|
||||
|
||||
# Now send high error rate to trigger degraded
|
||||
R=$(curl -s -X POST "$BASE/registry/heartbeat" -H "Content-Type: application/json" -H "Authorization: Bearer $ECHO_TOKEN" \
|
||||
@@ -358,12 +364,17 @@ else
|
||||
fi
|
||||
|
||||
# Register the re-imported workspace to verify agent_card round-trips
|
||||
NEW_TOKEN=$(curl -s "$BASE/admin/workspaces/$NEW_ID/test-token" | python3 -c "import sys,json; print(json.load(sys.stdin).get('auth_token',''))" 2>/dev/null || echo "")
|
||||
NEW_AUTH=()
|
||||
[ -n "$NEW_TOKEN" ] && NEW_AUTH=(-H "Authorization: Bearer $NEW_TOKEN")
|
||||
R=$(curl -s -X POST "$BASE/registry/register" -H "Content-Type: application/json" \
|
||||
-d "{\"id\":\"$NEW_ID\",\"url\":\"http://localhost:8002\",\"agent_card\":{\"name\":\"Summarizer\",\"skills\":[{\"id\":\"summarize\",\"name\":\"Summarize\"}]}}")
|
||||
"${NEW_AUTH[@]}" \
|
||||
-d "{\"id\":\"$NEW_ID\",\"url\":\"$SUM_URL\",\"agent_card\":{\"name\":\"Summarizer\",\"skills\":[{\"id\":\"summarize\",\"name\":\"Summarize\"}]}}")
|
||||
check "Register re-imported workspace" '"status":"registered"' "$R"
|
||||
# Capture the fresh token issued to the re-imported workspace. SUM_TOKEN was
|
||||
# revoked when SUM_ID was deleted above — use this one for cleanup instead.
|
||||
NEW_TOKEN=$(echo "$R" | e2e_extract_token)
|
||||
REG_NEW_TOKEN=$(echo "$R" | e2e_extract_token)
|
||||
[ -n "$REG_NEW_TOKEN" ] && NEW_TOKEN="$REG_NEW_TOKEN"
|
||||
|
||||
# Re-export and verify agent_card survives the round-trip (#165 / PR #167 — admin-gated)
|
||||
REBUNDLE=$(curl -s "$BASE/bundles/export/$NEW_ID" -H "Authorization: Bearer $NEW_TOKEN")
|
||||
|
||||
@@ -24,7 +24,8 @@
|
||||
#
|
||||
# Only PROVISIONING differs from staging:
|
||||
# - staging: POST /cp/admin/orgs (cold EC2 tenant) + per-tenant admin
|
||||
# token + each workspace's auth_token from the POST /workspaces resp.
|
||||
# token + each workspace's MCP bearer from create response or an admin
|
||||
# token-mint fallback.
|
||||
# - local: POST /workspaces directly against the local stack
|
||||
# (BASE, default http://localhost:8080), MCP bearer minted via
|
||||
# GET /admin/workspaces/:id/test-token (e2e_mint_test_token —
|
||||
@@ -32,17 +33,22 @@
|
||||
# every other local E2E (test_priority_runtimes_e2e.sh,
|
||||
# test_api.sh) already uses; no new credential/provision flow.
|
||||
#
|
||||
# It is written to FAIL on today's broken Hermes/OpenClaw behavior and go
|
||||
# green only when the in-flight root-cause fixes (Hermes-401 #162,
|
||||
# OpenClaw-never-online/MCP-wiring #165) actually land — same gate
|
||||
# semantics + exit codes as the staging script. NON-required by design
|
||||
# until then (flip-to-required tracked at molecule-core#1296), and NOT
|
||||
# masked with continue-on-error (feedback_fix_root_not_symptom).
|
||||
# By default the local backend creates external-mode workspace rows and
|
||||
# drives the literal MCP path directly. That keeps the local peer-visibility
|
||||
# gate focused on platform auth + MCP list_peers semantics instead of local
|
||||
# template container boot/heartbeat. Set PV_LOCAL_PROVISION_MODE=container
|
||||
# for targeted runtime-boot debugging. NON-required by design until the
|
||||
# flip-to-required tracked at molecule-core#1296, and NOT masked with
|
||||
# continue-on-error (feedback_fix_root_not_symptom).
|
||||
#
|
||||
# Required env: none (local stack only).
|
||||
# Optional env:
|
||||
# BASE default http://localhost:8080
|
||||
# PV_RUNTIMES space list; default "hermes openclaw claude-code"
|
||||
# PV_LOCAL_PROVISION_MODE default external; set container to also require
|
||||
# local template containers to boot online
|
||||
# PV_PARENT_RUNTIME parent runtime; default claude-code when keyed,
|
||||
# otherwise first keyed runtime in PV_RUNTIMES
|
||||
# E2E_PROVISION_TIMEOUT_SECS per-workspace online budget; default 900
|
||||
# (hermes cold apt+uv is the slow path locally)
|
||||
# E2E_KEEP_WS 1 → skip teardown (local debugging only)
|
||||
@@ -68,6 +74,7 @@ source "$(dirname "$0")/_lib.sh"
|
||||
source "$(dirname "$0")/lib/peer_visibility_assert.sh"
|
||||
|
||||
PV_RUNTIMES="${PV_RUNTIMES:-hermes openclaw claude-code}"
|
||||
PV_LOCAL_PROVISION_MODE="${PV_LOCAL_PROVISION_MODE:-external}"
|
||||
PROVISION_TIMEOUT_SECS="${E2E_PROVISION_TIMEOUT_SECS:-900}"
|
||||
NAME_PREFIX="PV-Local-$$-$(date +%H%M%S)"
|
||||
|
||||
@@ -75,6 +82,9 @@ log() { echo "[$(date +%H:%M:%S)] $*"; }
|
||||
ok() { echo "[$(date +%H:%M:%S)] ✅ $*"; }
|
||||
|
||||
CREATED_WSIDS=()
|
||||
ADMIN_BEARER="${MOLECULE_ADMIN_TOKEN:-${ADMIN_TOKEN:-}}"
|
||||
ADMIN_AUTH=()
|
||||
[ -n "$ADMIN_BEARER" ] && ADMIN_AUTH=(-H "Authorization: Bearer $ADMIN_BEARER")
|
||||
|
||||
# ─── Scoped teardown ───────────────────────────────────────────────────
|
||||
# Deletes ONLY the workspaces THIS run created (tracked in CREATED_WSIDS),
|
||||
@@ -94,7 +104,7 @@ teardown() {
|
||||
log "[teardown] deleting ${#CREATED_WSIDS[@]} workspace(s) this run created (scoped)"
|
||||
for wid in ${CREATED_WSIDS[@]+"${CREATED_WSIDS[@]}"}; do
|
||||
[ -n "$wid" ] || continue
|
||||
curl -s -X DELETE "$BASE/workspaces/$wid?confirm=true" >/dev/null 2>&1 || true
|
||||
curl -s -X DELETE "$BASE/workspaces/$wid?confirm=true" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} >/dev/null 2>&1 || true
|
||||
done
|
||||
exit $rc
|
||||
}
|
||||
@@ -103,7 +113,7 @@ trap teardown EXIT INT TERM
|
||||
# Pre-sweep workspaces a prior crashed run of THIS script left behind
|
||||
# (name prefix match only — never a blanket delete). The trap fires on
|
||||
# normal exit, but a kill -9 / SIGPIPE can bypass it.
|
||||
PRIOR=$(curl -s "$BASE/workspaces" | python3 -c '
|
||||
PRIOR=$(curl -s "$BASE/workspaces" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} | python3 -c '
|
||||
import json, sys
|
||||
try:
|
||||
print(" ".join(w["id"] for w in json.load(sys.stdin) if w.get("name","").startswith("PV-Local-")))
|
||||
@@ -112,7 +122,7 @@ except Exception:
|
||||
' 2>/dev/null)
|
||||
for _wid in $PRIOR; do
|
||||
log "Pre-sweeping prior PV-Local workspace: $_wid"
|
||||
curl -s -X DELETE "$BASE/workspaces/$_wid?confirm=true" >/dev/null 2>&1 || true
|
||||
curl -s -X DELETE "$BASE/workspaces/$_wid?confirm=true" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} >/dev/null 2>&1 || true
|
||||
done
|
||||
|
||||
# ─── Local-stack preflight ─────────────────────────────────────────────
|
||||
@@ -123,10 +133,10 @@ if ! curl -fsS "$BASE/health" -m 5 >/dev/null 2>&1; then
|
||||
fi
|
||||
# admin/test-token is the local MCP-bearer mint path; it 404s in
|
||||
# production. If it is off, this gate cannot drive the literal call.
|
||||
if ! curl -fsS "$BASE/admin/workspaces/preflight-probe/test-token" -m 5 >/dev/null 2>&1; then
|
||||
if ! curl -fsS "$BASE/admin/workspaces/preflight-probe/test-token" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} -m 5 >/dev/null 2>&1; then
|
||||
# A 404 here is EITHER "no such ws" (fine — endpoint is enabled) OR the
|
||||
# endpoint is disabled (MOLECULE_ENV=production). Distinguish by body.
|
||||
PROBE=$(curl -s "$BASE/admin/workspaces/preflight-probe/test-token" -m 5 2>/dev/null)
|
||||
PROBE=$(curl -s "$BASE/admin/workspaces/preflight-probe/test-token" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} -m 5 2>/dev/null)
|
||||
if echo "$PROBE" | grep -qi 'production\|disabled\|not found.*endpoint'; then
|
||||
echo "::error::GET /admin/workspaces/:id/test-token disabled (MOLECULE_ENV=production?). Cannot mint a local MCP bearer." >&2
|
||||
exit 1
|
||||
@@ -164,6 +174,28 @@ runtime_secrets() {
|
||||
esac
|
||||
}
|
||||
|
||||
choose_parent_runtime() {
|
||||
local rt
|
||||
if [ -n "${PV_PARENT_RUNTIME:-}" ]; then
|
||||
runtime_secrets "$PV_PARENT_RUNTIME" >/dev/null || return 1
|
||||
echo "$PV_PARENT_RUNTIME"
|
||||
return 0
|
||||
fi
|
||||
|
||||
if runtime_secrets claude-code >/dev/null; then
|
||||
echo "claude-code"
|
||||
return 0
|
||||
fi
|
||||
|
||||
for rt in $PV_RUNTIMES; do
|
||||
if runtime_secrets "$rt" >/dev/null; then
|
||||
echo "$rt"
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
# Block until $1 reaches one of $2 (space-separated), or $3 sec elapse.
|
||||
wait_for_status() {
|
||||
local wsid="$1" want="$2" budget="$3" start=$SECONDS last=""
|
||||
@@ -182,27 +214,42 @@ except Exception:
|
||||
return 1
|
||||
}
|
||||
|
||||
# ─── 1. Provision parent (claude-code) + one sibling per runtime ───────
|
||||
# Same topology as the staging script: a claude-code parent plus one
|
||||
# sibling per runtime under test, so each runtime should see all others.
|
||||
log "1/5 provisioning parent (claude-code) + one sibling per runtime under test..."
|
||||
|
||||
PARENT_SECRETS=$(runtime_secrets claude-code) || PARENT_SECRETS=""
|
||||
if [ -z "$PARENT_SECRETS" ]; then
|
||||
# Parent still needs to exist as a peer target even without an LLM key;
|
||||
# it never has to answer list_peers itself (it is excluded from the
|
||||
# caller set), so an empty-secrets claude-code shell is sufficient.
|
||||
# ─── 1. Provision parent + one sibling per runtime ──────────────────────
|
||||
# Same topology as the staging script: one parent plus one sibling per
|
||||
# runtime under test, so each runtime should see all others. The default
|
||||
# local backend uses external-mode rows because the literal MCP list_peers
|
||||
# path is platform-local and must not depend on local template boot/heartbeat.
|
||||
if [ "$PV_LOCAL_PROVISION_MODE" = "external" ]; then
|
||||
PARENT_RUNTIME="external"
|
||||
PARENT_SECRETS="{}"
|
||||
PARENT_EXTRA=',"external":true'
|
||||
else
|
||||
# Container mode is still available for local runtime-boot debugging.
|
||||
# Prefer a claude-code parent for staging parity, but local CI is
|
||||
# intentionally allowed to be partially keyed; an unkeyed parent can
|
||||
# never heartbeat.
|
||||
PARENT_RUNTIME=$(choose_parent_runtime) || {
|
||||
echo "::error::No keyed runtime available for parent — cannot run the local peer-visibility gate. Set CLAUDE_CODE_OAUTH_TOKEN and/or E2E_MINIMAX_API_KEY (or ANTHROPIC/OPENAI)." >&2
|
||||
exit 1
|
||||
}
|
||||
PARENT_SECRETS=$(runtime_secrets "$PARENT_RUNTIME") || PARENT_SECRETS=""
|
||||
if [ -z "$PARENT_SECRETS" ]; then
|
||||
echo "::error::parent runtime $PARENT_RUNTIME has no provider secrets" >&2
|
||||
exit 1
|
||||
fi
|
||||
PARENT_EXTRA=""
|
||||
fi
|
||||
P_RESP=$(curl -s -X POST "$BASE/workspaces" -H "Content-Type: application/json" \
|
||||
-d "{\"name\":\"${NAME_PREFIX}-parent\",\"runtime\":\"claude-code\",\"tier\":3,\"secrets\":$PARENT_SECRETS}")
|
||||
log "1/5 provisioning parent ($PARENT_RUNTIME, mode=$PV_LOCAL_PROVISION_MODE) + one sibling per runtime under test..."
|
||||
|
||||
P_RESP=$(curl -s -X POST "$BASE/workspaces" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} -H "Content-Type: application/json" \
|
||||
-d "{\"name\":\"${NAME_PREFIX}-parent\",\"runtime\":\"$PARENT_RUNTIME\",\"tier\":3$PARENT_EXTRA,\"secrets\":$PARENT_SECRETS}")
|
||||
PARENT_ID=$(echo "$P_RESP" | python3 -c 'import json,sys;print(json.load(sys.stdin).get("id",""))' 2>/dev/null)
|
||||
if [ -z "$PARENT_ID" ]; then
|
||||
echo "::error::parent create failed: $(echo "$P_RESP" | head -c 300)" >&2
|
||||
exit 1
|
||||
fi
|
||||
CREATED_WSIDS+=("$PARENT_ID")
|
||||
log " PARENT_ID=$PARENT_ID"
|
||||
log " PARENT_ID=$PARENT_ID runtime=$PARENT_RUNTIME"
|
||||
|
||||
# NOTE: no `declare -A` — this script must also run on a local macOS dev
|
||||
# box (bash 3.2, no associative arrays) per feedback_local_must_mimic_
|
||||
@@ -231,13 +278,21 @@ _map_get() { # _map_get <mapvarname> <key> -> stdout value (empty if absent)
|
||||
ALL_WS_IDS="$PARENT_ID"
|
||||
ACTIVE_RUNTIMES=""
|
||||
for rt in $PV_RUNTIMES; do
|
||||
SEC=$(runtime_secrets "$rt") || SEC=""
|
||||
if [ -z "$SEC" ]; then
|
||||
log " SKIP $rt — no provider key in env (partially-keyed local env; not a failure)"
|
||||
continue
|
||||
if [ "$PV_LOCAL_PROVISION_MODE" = "external" ]; then
|
||||
SEC="{}"
|
||||
CREATE_RUNTIME="external"
|
||||
CREATE_EXTRA=',"external":true'
|
||||
else
|
||||
SEC=$(runtime_secrets "$rt") || SEC=""
|
||||
if [ -z "$SEC" ]; then
|
||||
log " SKIP $rt — no provider key in env (partially-keyed local env; not a failure)"
|
||||
continue
|
||||
fi
|
||||
CREATE_RUNTIME="$rt"
|
||||
CREATE_EXTRA=""
|
||||
fi
|
||||
R=$(curl -s -X POST "$BASE/workspaces" -H "Content-Type: application/json" \
|
||||
-d "{\"name\":\"${NAME_PREFIX}-$rt\",\"runtime\":\"$rt\",\"tier\":2,\"parent_id\":\"$PARENT_ID\",\"secrets\":$SEC}")
|
||||
R=$(curl -s -X POST "$BASE/workspaces" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} -H "Content-Type: application/json" \
|
||||
-d "{\"name\":\"${NAME_PREFIX}-$rt\",\"runtime\":\"$CREATE_RUNTIME\",\"tier\":2,\"parent_id\":\"$PARENT_ID\"$CREATE_EXTRA,\"secrets\":$SEC}")
|
||||
WID=$(echo "$R" | python3 -c 'import json,sys;print(json.load(sys.stdin).get("id",""))' 2>/dev/null)
|
||||
if [ -z "$WID" ]; then
|
||||
echo "::error::$rt workspace create failed: $(echo "$R" | head -c 300)" >&2
|
||||
@@ -257,32 +312,40 @@ if [ -z "$ACTIVE_RUNTIMES" ]; then
|
||||
fi
|
||||
|
||||
# ─── 2. Wait for the parent online (it is a peer target) ───────────────
|
||||
log "2/5 waiting for parent online (peer target)..."
|
||||
PF=$(wait_for_status "$PARENT_ID" "online" "$PROVISION_TIMEOUT_SECS") || true
|
||||
if [ "$PF" != "online" ]; then
|
||||
echo "::error::parent ($PARENT_ID) never reached online (last=$PF) within ${PROVISION_TIMEOUT_SECS}s" >&2
|
||||
exit 3
|
||||
fi
|
||||
ok " parent online"
|
||||
|
||||
# ─── 3. Wait for every sibling online ──────────────────────────────────
|
||||
# A runtime that never comes online locally is itself a finding: it
|
||||
# reproduces the openclaw-never-online class (#165) on the local stack.
|
||||
log "3/5 waiting for all siblings online (up to ${PROVISION_TIMEOUT_SECS}s each — cold boot)..."
|
||||
REGRESSED=0
|
||||
ONLINE_RUNTIMES=""
|
||||
for rt in $ACTIVE_RUNTIMES; do
|
||||
wid="$(_map_get WS_IDS_MAP "$rt")"
|
||||
S=$(wait_for_status "$wid" "online" "$PROVISION_TIMEOUT_SECS") || true
|
||||
if [ "$S" != "online" ]; then
|
||||
echo " ✗ $rt ($wid): never reached online (last=$S) — reproduces the never-online class locally"
|
||||
_map_set VERDICT_MAP "$rt" "FAIL(never-online:last=$S)"
|
||||
REGRESSED=1
|
||||
continue
|
||||
if [ "$PV_LOCAL_PROVISION_MODE" = "external" ]; then
|
||||
log "2/5 external-mode local backend: parent is awaiting_agent; no container-online wait needed"
|
||||
ok " parent created"
|
||||
log "3/5 external-mode local backend: siblings are awaiting_agent; driving MCP directly"
|
||||
ONLINE_RUNTIMES="$ACTIVE_RUNTIMES"
|
||||
else
|
||||
log "2/5 waiting for parent online (peer target)..."
|
||||
PF=$(wait_for_status "$PARENT_ID" "online" "$PROVISION_TIMEOUT_SECS") || true
|
||||
if [ "$PF" != "online" ]; then
|
||||
echo "::error::parent ($PARENT_ID) never reached online (last=$PF) within ${PROVISION_TIMEOUT_SECS}s" >&2
|
||||
exit 3
|
||||
fi
|
||||
ok " $rt online"
|
||||
ONLINE_RUNTIMES="$ONLINE_RUNTIMES $rt"
|
||||
done
|
||||
ok " parent online"
|
||||
|
||||
# ─── 3. Wait for every sibling online ──────────────────────────────────
|
||||
# A runtime that never comes online locally is itself a finding in
|
||||
# container mode. The default external mode keeps this gate focused on
|
||||
# literal MCP peer visibility.
|
||||
log "3/5 waiting for all siblings online (up to ${PROVISION_TIMEOUT_SECS}s each — cold boot)..."
|
||||
for rt in $ACTIVE_RUNTIMES; do
|
||||
wid="$(_map_get WS_IDS_MAP "$rt")"
|
||||
S=$(wait_for_status "$wid" "online" "$PROVISION_TIMEOUT_SECS") || true
|
||||
if [ "$S" != "online" ]; then
|
||||
echo " ✗ $rt ($wid): never reached online (last=$S) — reproduces the never-online class locally"
|
||||
_map_set VERDICT_MAP "$rt" "FAIL(never-online:last=$S)"
|
||||
REGRESSED=1
|
||||
continue
|
||||
fi
|
||||
ok " $rt online"
|
||||
ONLINE_RUNTIMES="$ONLINE_RUNTIMES $rt"
|
||||
done
|
||||
fi
|
||||
|
||||
# ─── 4. THE GATE — literal mcp_molecule_list_peers via POST /:id/mcp ────
|
||||
# Shared, byte-identical assertion. Local passes "" for the org id (the
|
||||
|
||||
@@ -40,8 +40,10 @@
|
||||
# drives: POST /cp/admin/orgs (provision), GET
|
||||
# /cp/admin/orgs/:slug/admin-token (per-tenant token), DELETE
|
||||
# /cp/admin/tenants/:slug (teardown). The per-tenant admin token drives
|
||||
# tenant workspace creation; each workspace's OWN auth_token (returned by
|
||||
# POST /workspaces) drives its MCP call.
|
||||
# tenant workspace creation; each workspace's OWN auth_token drives its
|
||||
# MCP call. External-like runtimes may return the token in POST
|
||||
# /workspaces; managed container runtimes usually require the admin token
|
||||
# mint fallback below.
|
||||
#
|
||||
# Required env:
|
||||
# MOLECULE_ADMIN_TOKEN CP admin bearer — Railway staging CP_ADMIN_API_TOKEN
|
||||
@@ -104,6 +106,46 @@ tenant_call() {
|
||||
-H "Content-Type: application/json" "$@"
|
||||
}
|
||||
|
||||
tenant_call_capture() {
|
||||
local method="$1" path="$2" out="$3"; shift 3
|
||||
curl -sS -o "$out" -w "%{http_code}" -X "$method" "$TENANT_URL$path" \
|
||||
-H "Authorization: Bearer $TENANT_TOKEN" \
|
||||
-H "X-Molecule-Org-Id: $ORG_ID" \
|
||||
-H "Content-Type: application/json" "$@"
|
||||
}
|
||||
|
||||
redact_token_body() {
|
||||
python3 -c '
|
||||
import json, re, sys
|
||||
raw = sys.stdin.read()
|
||||
try:
|
||||
data = json.loads(raw)
|
||||
except Exception:
|
||||
print(re.sub(r"(?i)([a-z0-9_]*token)=([^&\\s]+)", r"\1=<redacted>", raw)[:500])
|
||||
raise SystemExit(0)
|
||||
|
||||
def scrub(v):
|
||||
if isinstance(v, dict):
|
||||
return {k: ("<redacted>" if "token" in k.lower() else scrub(val)) for k, val in v.items()}
|
||||
if isinstance(v, list):
|
||||
return [scrub(x) for x in v]
|
||||
return v
|
||||
|
||||
print(json.dumps(scrub(data), separators=(",", ":"))[:500])
|
||||
'
|
||||
}
|
||||
|
||||
extract_auth_token() {
|
||||
python3 -c "
|
||||
import sys, json
|
||||
try:
|
||||
d = json.load(sys.stdin)
|
||||
except Exception:
|
||||
print(''); sys.exit(0)
|
||||
print(d.get('auth_token') or d.get('connection', {}).get('auth_token') or '')
|
||||
" 2>/dev/null
|
||||
}
|
||||
|
||||
# ─── Scoped teardown ───────────────────────────────────────────────────
|
||||
# Deletes ONLY the org this run created (DELETE /cp/admin/tenants/$SLUG
|
||||
# with the {"confirm":$SLUG} fat-finger guard). Never a cluster-wide
|
||||
@@ -218,16 +260,31 @@ for rt in $PV_RUNTIMES; do
|
||||
R=$(tenant_call POST /workspaces \
|
||||
-d "{\"name\":\"pv-$rt\",\"runtime\":\"$rt\",\"tier\":2,\"parent_id\":\"$PARENT_ID\",\"secrets\":$SECRETS_JSON}")
|
||||
WID=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))" 2>/dev/null)
|
||||
# auth_token is top-level for container runtimes; external-like nest it
|
||||
# under connection.auth_token (verified vs staging response shape).
|
||||
WTOK=$(echo "$R" | python3 -c "
|
||||
import sys, json
|
||||
try: d = json.load(sys.stdin)
|
||||
except Exception: print(''); sys.exit(0)
|
||||
print(d.get('auth_token') or d.get('connection', {}).get('auth_token') or '')
|
||||
" 2>/dev/null)
|
||||
# External-like runtimes may return connection.auth_token on create.
|
||||
# Managed container runtimes usually return only id/status here, then
|
||||
# receive their bearer through registry/bootstrap; for this literal MCP
|
||||
# driver we mint an admin test token below.
|
||||
WTOK=$(echo "$R" | extract_auth_token)
|
||||
[ -n "$WID" ] || fail "$rt workspace create failed: $(echo "$R" | head -c 300)"
|
||||
[ -n "$WTOK" ] || fail "$rt workspace did not return an auth_token — cannot drive its MCP call (resp: $(echo "$R" | head -c 300))"
|
||||
TOKEN_DIAG=""
|
||||
if [ -z "$WTOK" ]; then
|
||||
TTOK_FILE=$(mktemp)
|
||||
TTOK_CODE=$(tenant_call_capture POST "/admin/workspaces/$WID/tokens" "$TTOK_FILE" 2>/dev/null || echo "curl_error")
|
||||
TTOK_RESP=$(cat "$TTOK_FILE" 2>/dev/null || true)
|
||||
WTOK=$(echo "$TTOK_RESP" | extract_auth_token)
|
||||
TOKEN_DIAG="POST /admin/workspaces/$WID/tokens -> HTTP $TTOK_CODE body: $(echo "$TTOK_RESP" | redact_token_body)"
|
||||
rm -f "$TTOK_FILE"
|
||||
fi
|
||||
if [ -z "$WTOK" ]; then
|
||||
TTOK_FILE=$(mktemp)
|
||||
TTOK_CODE=$(tenant_call_capture GET "/admin/workspaces/$WID/test-token" "$TTOK_FILE" 2>/dev/null || echo "curl_error")
|
||||
TTOK_RESP=$(cat "$TTOK_FILE" 2>/dev/null || true)
|
||||
WTOK=$(echo "$TTOK_RESP" | extract_auth_token)
|
||||
TOKEN_DIAG="${TOKEN_DIAG}
|
||||
GET /admin/workspaces/$WID/test-token -> HTTP $TTOK_CODE body: $(echo "$TTOK_RESP" | redact_token_body)"
|
||||
rm -f "$TTOK_FILE"
|
||||
fi
|
||||
[ -n "$WTOK" ] || fail "$rt workspace did not return or mint an auth_token — cannot drive its MCP call (create_resp: $(echo "$R" | redact_token_body); token_fallbacks: $TOKEN_DIAG)"
|
||||
WS_IDS[$rt]="$WID"
|
||||
WS_TOKENS[$rt]="$WTOK"
|
||||
ALL_WS_IDS="$ALL_WS_IDS $WID"
|
||||
|
||||
@@ -179,8 +179,14 @@ echo "--- Phase 3.5: Python parser classifies real server response (#2967) ---"
|
||||
PARSE_RESULT=$(WORKSPACE_ID="00000000-0000-0000-0000-000000000001" \
|
||||
python3 -c "
|
||||
import json, sys
|
||||
sys.path.insert(0, '$(cd "$(dirname "$0")/../../workspace" && pwd)')
|
||||
import a2a_response
|
||||
try:
|
||||
from molecule_runtime import a2a_response
|
||||
except ModuleNotFoundError as exc:
|
||||
raise SystemExit(
|
||||
'molecule-ai-workspace-runtime is required for poll-mode parser '
|
||||
'coverage; install it from the Gitea package registry before running '
|
||||
'this E2E'
|
||||
) from exc
|
||||
data = json.loads(r'''$A2A_RESP''')
|
||||
v = a2a_response.parse(data)
|
||||
print(type(v).__name__)
|
||||
|
||||
@@ -25,6 +25,13 @@ source "$(dirname "$0")/_lib.sh" # sets BASE default + helpers
|
||||
PASS=0
|
||||
FAIL=0
|
||||
TIMEOUT="${E2E_TIMEOUT:-60}"
|
||||
ADMIN_BEARER="${MOLECULE_ADMIN_TOKEN:-${ADMIN_TOKEN:-}}"
|
||||
ADMIN_AUTH=()
|
||||
[ -n "$ADMIN_BEARER" ] && ADMIN_AUTH=(-H "Authorization: Bearer $ADMIN_BEARER")
|
||||
WS_A_TOKEN=""
|
||||
WS_A_AUTH=()
|
||||
WS_B_TOKEN=""
|
||||
WS_B_AUTH=()
|
||||
|
||||
check() {
|
||||
local desc="$1" expected="$2" actual="$3"
|
||||
@@ -75,15 +82,26 @@ echo "--- A. Per-workspace MCP server-name slug uniqueness ---"
|
||||
WS_A_NAME="e2e-cov-alpha-$$"
|
||||
WS_B_NAME="e2e-cov-beta-$$"
|
||||
|
||||
R=$(curl -s -X POST "$BASE/workspaces" -H "Content-Type: application/json" \
|
||||
-d "{\"name\":\"$WS_A_NAME\",\"tier\":1}")
|
||||
check "POST /workspaces (alpha)" '"status":"provisioning"' "$R"
|
||||
R=$(curl -s -X POST "$BASE/workspaces" "${ADMIN_AUTH[@]}" -H "Content-Type: application/json" \
|
||||
-d "{\"name\":\"$WS_A_NAME\",\"runtime\":\"external\",\"external\":true,\"tier\":1}")
|
||||
check "POST /workspaces (alpha)" '"status":"awaiting_agent"' "$R"
|
||||
WS_A_ID=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))")
|
||||
if [ -n "$WS_A_ID" ]; then
|
||||
WS_A_TOKEN=$(e2e_mint_test_token "$WS_A_ID" 2>/dev/null || true)
|
||||
[ -n "$WS_A_TOKEN" ] && WS_A_AUTH=(-H "Authorization: Bearer $WS_A_TOKEN")
|
||||
if [ -z "$ADMIN_BEARER" ] && [ -n "$WS_A_TOKEN" ]; then
|
||||
ADMIN_AUTH=(-H "Authorization: Bearer $WS_A_TOKEN")
|
||||
fi
|
||||
fi
|
||||
|
||||
R=$(curl -s -X POST "$BASE/workspaces" -H "Content-Type: application/json" \
|
||||
-d "{\"name\":\"$WS_B_NAME\",\"tier\":1}")
|
||||
check "POST /workspaces (beta)" '"status":"provisioning"' "$R"
|
||||
R=$(curl -s -X POST "$BASE/workspaces" "${ADMIN_AUTH[@]}" -H "Content-Type: application/json" \
|
||||
-d "{\"name\":\"$WS_B_NAME\",\"runtime\":\"external\",\"external\":true,\"tier\":1}")
|
||||
check "POST /workspaces (beta)" '"status":"awaiting_agent"' "$R"
|
||||
WS_B_ID=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))")
|
||||
if [ -n "$WS_B_ID" ]; then
|
||||
WS_B_TOKEN=$(e2e_mint_test_token "$WS_B_ID" 2>/dev/null || true)
|
||||
[ -n "$WS_B_TOKEN" ] && WS_B_AUTH=(-H "Authorization: Bearer $WS_B_TOKEN")
|
||||
fi
|
||||
|
||||
# external/connection returns the install-snippet. The per-workspace
|
||||
# fix (mc#1535) derives the MCP name as molecule-<slug>; mc#1536 extends
|
||||
@@ -91,8 +109,10 @@ WS_B_ID=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin).ge
|
||||
# grep the `claude mcp add` line, and assert the names differ.
|
||||
if [ -n "$WS_A_ID" ] && [ -n "$WS_B_ID" ]; then
|
||||
SNIPPET_A=$(curl -s --max-time "$TIMEOUT" \
|
||||
"${WS_A_AUTH[@]}" \
|
||||
"$BASE/workspaces/$WS_A_ID/external/connection")
|
||||
SNIPPET_B=$(curl -s --max-time "$TIMEOUT" \
|
||||
"${WS_B_AUTH[@]}" \
|
||||
"$BASE/workspaces/$WS_B_ID/external/connection")
|
||||
|
||||
MCP_A=$(echo "$SNIPPET_A" | python3 -c "
|
||||
@@ -151,7 +171,11 @@ import sys, json, re
|
||||
d=json.load(sys.stdin)
|
||||
def find(o):
|
||||
if isinstance(o,str):
|
||||
m=re.search(r'\[mcp_servers\.([^\]]+)\]',o); return m.group(1) if m else None
|
||||
for m in re.finditer(r'\[mcp_servers\.([^\]]+)\]',o):
|
||||
name=m.group(1)
|
||||
if name.startswith('molecule-') and '<' not in name:
|
||||
return name
|
||||
return None
|
||||
if isinstance(o,dict):
|
||||
for v in o.values():
|
||||
r=find(v)
|
||||
@@ -168,7 +192,11 @@ import sys, json, re
|
||||
d=json.load(sys.stdin)
|
||||
def find(o):
|
||||
if isinstance(o,str):
|
||||
m=re.search(r'\[mcp_servers\.([^\]]+)\]',o); return m.group(1) if m else None
|
||||
for m in re.finditer(r'\[mcp_servers\.([^\]]+)\]',o):
|
||||
name=m.group(1)
|
||||
if name.startswith('molecule-') and '<' not in name:
|
||||
return name
|
||||
return None
|
||||
if isinstance(o,dict):
|
||||
for v in o.values():
|
||||
r=find(v)
|
||||
@@ -212,7 +240,7 @@ echo "--- B. GIT_ASKPASS + GIT_HTTP_* env injection (mc#1525 + mc#1542) ---"
|
||||
if [ -n "${WS_A_ID:-}" ]; then
|
||||
# Wait briefly for provisioning to expose the container.
|
||||
for _ in 1 2 3 4 5 6 7 8 9 10; do
|
||||
R=$(curl -s "$BASE/workspaces/$WS_A_ID")
|
||||
R=$(curl -s "${ADMIN_AUTH[@]}" "$BASE/workspaces/$WS_A_ID")
|
||||
STATUS=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin).get('status',''))" 2>/dev/null)
|
||||
[ "$STATUS" = "online" ] && break
|
||||
sleep 1
|
||||
@@ -225,7 +253,7 @@ if [ -n "${WS_A_ID:-}" ]; then
|
||||
# acceptable for the dev platform). The point is that the KEYS are
|
||||
# propagated by the post-#1542 provisioner — pre-#1542 these keys
|
||||
# were absent entirely.
|
||||
DEBUG=$(curl -s "$BASE/admin/workspaces/$WS_A_ID/debug" 2>/dev/null || true)
|
||||
DEBUG=$(curl -s "${ADMIN_AUTH[@]}" "$BASE/admin/workspaces/$WS_A_ID/debug" 2>/dev/null || true)
|
||||
if [ -n "$DEBUG" ] && echo "$DEBUG" | grep -q "workspace_secrets"; then
|
||||
# Presence-only check: KEY in the secrets map, value MAY be empty
|
||||
# in dev where no persona is bound.
|
||||
@@ -261,6 +289,7 @@ if [ -n "${WS_A_ID:-}" ]; then
|
||||
# The expected response shape post-fix is a structured failure (HTTP
|
||||
# 4xx or success:false JSON) — NOT a queued task that round-trips.
|
||||
R=$(curl -s --max-time 10 -X POST "$BASE/workspaces/$WS_A_ID/delegate" \
|
||||
"${WS_A_AUTH[@]}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"target_workspace_id\":\"$WS_A_ID\",\"task\":\"self-echo-test\"}" 2>&1)
|
||||
# Either the API gate (delegation.go) rejects, OR the inbox guard
|
||||
@@ -281,7 +310,7 @@ if [ -n "${WS_A_ID:-}" ]; then
|
||||
# an inboxable peer_agent kind. The /activity endpoint is the inbox
|
||||
# poller's source-of-truth.
|
||||
sleep 2
|
||||
AL=$(curl -s "$BASE/workspaces/$WS_A_ID/activity" 2>/dev/null || echo '[]')
|
||||
AL=$(curl -s "${WS_A_AUTH[@]}" "$BASE/workspaces/$WS_A_ID/activity" 2>/dev/null || echo '[]')
|
||||
# Count rows where source_id == workspace_id AND method != "delegate_result".
|
||||
ECHO_COUNT=$(echo "$AL" | python3 -c "
|
||||
import sys, json
|
||||
@@ -315,7 +344,15 @@ echo
|
||||
echo "--- Cleanup ---"
|
||||
for wid in "${WS_A_ID:-}" "${WS_B_ID:-}"; do
|
||||
[ -n "$wid" ] || continue
|
||||
curl -s -X DELETE "$BASE/workspaces/$wid?confirm=true" > /dev/null || true
|
||||
DELETE_AUTH=("${ADMIN_AUTH[@]}")
|
||||
if [ -z "$ADMIN_BEARER" ]; then
|
||||
if [ "$wid" = "${WS_A_ID:-}" ]; then
|
||||
DELETE_AUTH=("${WS_A_AUTH[@]}")
|
||||
elif [ "$wid" = "${WS_B_ID:-}" ]; then
|
||||
DELETE_AUTH=("${WS_B_AUTH[@]}")
|
||||
fi
|
||||
fi
|
||||
curl -s -X DELETE "$BASE/workspaces/$wid?confirm=true" "${DELETE_AUTH[@]}" > /dev/null || true
|
||||
echo "deleted $wid"
|
||||
done
|
||||
|
||||
|
||||
@@ -0,0 +1,193 @@
|
||||
"""Tests for `.gitea/scripts/detect-changes.py`."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib.util
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||
SCRIPT = REPO_ROOT / ".gitea" / "scripts" / "detect-changes.py"
|
||||
|
||||
|
||||
def load_module():
|
||||
spec = importlib.util.spec_from_file_location("detect_changes", SCRIPT)
|
||||
assert spec is not None
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
assert spec.loader is not None
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
def test_ci_profile_classifies_surfaces():
|
||||
mod = load_module()
|
||||
|
||||
assert mod.classify("ci", ["workspace-server/internal/handlers/a2a_proxy.go"]) == {
|
||||
"platform": True,
|
||||
"canvas": False,
|
||||
"python": False,
|
||||
"scripts": False,
|
||||
}
|
||||
assert mod.classify("ci", ["canvas/src/app/page.tsx"]) == {
|
||||
"platform": False,
|
||||
"canvas": True,
|
||||
"python": False,
|
||||
"scripts": False,
|
||||
}
|
||||
assert mod.classify("ci", ["tests/e2e/test_model_slug.sh"]) == {
|
||||
"platform": False,
|
||||
"canvas": False,
|
||||
"python": False,
|
||||
"scripts": True,
|
||||
}
|
||||
assert mod.classify("ci", [".gitea/workflows/ci.yml", "README.md"]) == {
|
||||
"platform": False,
|
||||
"canvas": False,
|
||||
"python": False,
|
||||
"scripts": False,
|
||||
}
|
||||
|
||||
|
||||
def test_handlers_postgres_profile_is_narrower_than_workspace_server():
|
||||
mod = load_module()
|
||||
|
||||
assert mod.classify("handlers-postgres", ["workspace-server/internal/handlers/a2a_proxy.go"]) == {
|
||||
"handlers": True,
|
||||
}
|
||||
assert mod.classify("handlers-postgres", ["workspace-server/internal/provisioner/provisioner.go"]) == {
|
||||
"handlers": False,
|
||||
}
|
||||
|
||||
|
||||
def test_e2e_api_profile_covers_api_inputs():
|
||||
mod = load_module()
|
||||
|
||||
assert mod.classify("e2e-api", ["workspace-server/internal/handlers/workspace.go"]) == {
|
||||
"api": True,
|
||||
}
|
||||
assert mod.classify("e2e-api", ["tests/e2e/test_api.sh"]) == {"api": True}
|
||||
assert mod.classify("e2e-api", ["canvas/src/app/page.tsx"]) == {"api": False}
|
||||
|
||||
|
||||
def test_fail_open_all_true_for_missing_base():
|
||||
mod = load_module()
|
||||
|
||||
assert mod.all_true("ci") == {
|
||||
"platform": True,
|
||||
"canvas": True,
|
||||
"python": True,
|
||||
"scripts": True,
|
||||
}
|
||||
|
||||
|
||||
def test_fetch_base_prefers_advertised_base_ref(monkeypatch):
|
||||
mod = load_module()
|
||||
calls: list[list[str]] = []
|
||||
exists_checks = 0
|
||||
|
||||
def fake_base_exists(base: str) -> bool:
|
||||
nonlocal exists_checks
|
||||
exists_checks += 1
|
||||
return exists_checks >= 1
|
||||
|
||||
def fake_run_git(args: list[str], *, timeout: int = 30):
|
||||
calls.append(args)
|
||||
|
||||
class Result:
|
||||
returncode = 0
|
||||
stdout = ""
|
||||
stderr = ""
|
||||
|
||||
return Result()
|
||||
|
||||
monkeypatch.setattr(mod, "base_exists", fake_base_exists)
|
||||
monkeypatch.setattr(mod, "run_git", fake_run_git)
|
||||
|
||||
mod.fetch_base("abc123", "main")
|
||||
|
||||
assert calls == [["fetch", "--depth=1", "origin", "main"]]
|
||||
|
||||
|
||||
def test_fetch_base_falls_back_to_sha_when_ref_fetch_does_not_materialize(monkeypatch):
|
||||
mod = load_module()
|
||||
calls: list[list[str]] = []
|
||||
|
||||
monkeypatch.setattr(mod, "base_exists", lambda _base: False)
|
||||
|
||||
def fake_run_git(args: list[str], *, timeout: int = 30):
|
||||
calls.append(args)
|
||||
|
||||
class Result:
|
||||
returncode = 0
|
||||
stdout = ""
|
||||
stderr = ""
|
||||
|
||||
return Result()
|
||||
|
||||
monkeypatch.setattr(mod, "run_git", fake_run_git)
|
||||
|
||||
mod.fetch_base("abc123", "main")
|
||||
|
||||
assert calls == [
|
||||
["fetch", "--depth=1", "origin", "main"],
|
||||
["fetch", "--depth=1", "origin", "abc123"],
|
||||
]
|
||||
|
||||
|
||||
def test_changed_paths_uses_merge_base_for_pull_request(monkeypatch):
|
||||
mod = load_module()
|
||||
calls: list[list[str]] = []
|
||||
|
||||
def fake_run_git(args: list[str], *, timeout: int = 30):
|
||||
calls.append(args)
|
||||
|
||||
class Result:
|
||||
returncode = 0
|
||||
stdout = "workspace/agent.py\n"
|
||||
stderr = ""
|
||||
|
||||
if args[0] == "merge-base":
|
||||
Result.stdout = "merge123\n"
|
||||
return Result()
|
||||
|
||||
monkeypatch.setattr(mod, "run_git", fake_run_git)
|
||||
|
||||
assert mod.changed_paths("base123", use_merge_base=True) == ["workspace/agent.py"]
|
||||
assert calls == [
|
||||
["merge-base", "base123", "HEAD"],
|
||||
["diff", "--name-only", "merge123", "HEAD"],
|
||||
]
|
||||
|
||||
|
||||
def test_detect_deepens_base_ref_when_pr_merge_base_missing(monkeypatch):
|
||||
mod = load_module()
|
||||
calls: list[tuple[str, str | None]] = []
|
||||
merge_base_calls = 0
|
||||
|
||||
monkeypatch.setattr(mod, "base_exists", lambda _base: True)
|
||||
|
||||
def fake_merge_base(base: str):
|
||||
nonlocal merge_base_calls
|
||||
merge_base_calls += 1
|
||||
if merge_base_calls == 1:
|
||||
return None
|
||||
return "merge123"
|
||||
|
||||
def fake_deepen_base_ref(base_ref: str):
|
||||
calls.append(("deepen", base_ref))
|
||||
|
||||
def fake_changed_paths(base: str, *, use_merge_base: bool):
|
||||
calls.append(("changed", str(use_merge_base)))
|
||||
return [".gitea/workflows/ci.yml"]
|
||||
|
||||
monkeypatch.setattr(mod, "merge_base", fake_merge_base)
|
||||
monkeypatch.setattr(mod, "deepen_base_ref", fake_deepen_base_ref)
|
||||
monkeypatch.setattr(mod, "changed_paths", fake_changed_paths)
|
||||
|
||||
assert mod.detect("ci", "pull_request", "base123", "", "main") == {
|
||||
"platform": False,
|
||||
"canvas": False,
|
||||
"python": False,
|
||||
"scripts": False,
|
||||
}
|
||||
assert calls == [("deepen", "main"), ("changed", "True")]
|
||||
@@ -26,9 +26,11 @@ import re
|
||||
import subprocess
|
||||
import sys
|
||||
import textwrap
|
||||
import importlib.util
|
||||
from pathlib import Path
|
||||
|
||||
import pytest # noqa: F401 (declares the dep)
|
||||
import yaml
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||
SCRIPT = REPO_ROOT / ".gitea" / "scripts" / "lint-workflow-yaml.py"
|
||||
@@ -616,16 +618,24 @@ def test_rule10_docker_info_head_in_separate_step_without_pipefail_passes(tmp_pa
|
||||
|
||||
CI_WORKFLOW = REPO_ROOT / ".gitea" / "workflows" / "ci.yml"
|
||||
CI_SURFACES = ("platform", "canvas", "python", "scripts")
|
||||
DETECT_CHANGES_SCRIPT = REPO_ROOT / ".gitea" / "scripts" / "detect-changes.py"
|
||||
|
||||
|
||||
def _load_detect_changes():
|
||||
spec = importlib.util.spec_from_file_location("detect_changes", DETECT_CHANGES_SCRIPT)
|
||||
assert spec is not None
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
assert spec.loader is not None
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
def _ci_change_patterns() -> dict[str, re.Pattern[str]]:
|
||||
text = CI_WORKFLOW.read_text(encoding="utf-8")
|
||||
patterns: dict[str, re.Pattern[str]] = {}
|
||||
for surface, pattern in re.findall(
|
||||
r'echo "(platform|canvas|python|scripts)=.*?grep -qE \'([^\']+)\'',
|
||||
text,
|
||||
):
|
||||
patterns[surface] = re.compile(pattern)
|
||||
detect_changes = _load_detect_changes()
|
||||
patterns = {
|
||||
surface: re.compile(pattern)
|
||||
for surface, pattern in detect_changes.PROFILES["ci"].items()
|
||||
}
|
||||
assert set(patterns) == set(CI_SURFACES)
|
||||
return patterns
|
||||
|
||||
@@ -693,3 +703,58 @@ def test_ci_change_detector_docs_and_meta_scripts_do_not_trigger_surfaces():
|
||||
"python": False,
|
||||
"scripts": False,
|
||||
}
|
||||
|
||||
|
||||
def test_ci_platform_go_pr_steps_are_path_scoped():
|
||||
doc = yaml.safe_load(CI_WORKFLOW.read_text(encoding="utf-8"))
|
||||
platform = doc["jobs"]["platform-build"]
|
||||
assert platform.get("needs") == "changes"
|
||||
|
||||
expensive_steps = [
|
||||
step
|
||||
for step in platform["steps"]
|
||||
if step.get("uses")
|
||||
or step.get("run", "").startswith("go ")
|
||||
or "golangci-lint" in step.get("run", "")
|
||||
]
|
||||
assert expensive_steps
|
||||
for step in expensive_steps:
|
||||
expr = step.get("if", "")
|
||||
assert "github.event_name != 'pull_request'" in expr
|
||||
assert "needs.changes.outputs.platform == 'true'" in expr
|
||||
|
||||
|
||||
def test_ci_canvas_nextjs_pr_steps_are_path_scoped():
|
||||
doc = yaml.safe_load(CI_WORKFLOW.read_text(encoding="utf-8"))
|
||||
canvas = doc["jobs"]["canvas-build"]
|
||||
assert canvas.get("needs") == "changes"
|
||||
|
||||
expensive_steps = [
|
||||
step
|
||||
for step in canvas["steps"]
|
||||
if step.get("uses")
|
||||
or step.get("run", "").startswith("npm ")
|
||||
or step.get("run", "").startswith("npx ")
|
||||
]
|
||||
assert expensive_steps
|
||||
for step in expensive_steps:
|
||||
expr = step.get("if", "")
|
||||
assert "github.event_name != 'pull_request'" in expr
|
||||
assert "needs.changes.outputs.canvas == 'true'" in expr
|
||||
|
||||
|
||||
def test_ci_shellcheck_pr_steps_are_path_scoped():
|
||||
doc = yaml.safe_load(CI_WORKFLOW.read_text(encoding="utf-8"))
|
||||
shellcheck = doc["jobs"]["shellcheck"]
|
||||
assert shellcheck.get("needs") == "changes"
|
||||
|
||||
expensive_steps = [
|
||||
step
|
||||
for step in shellcheck["steps"]
|
||||
if step.get("uses") or step.get("run", "").startswith(("bash ", "find ", "shellcheck "))
|
||||
]
|
||||
assert expensive_steps
|
||||
for step in expensive_steps:
|
||||
expr = step.get("if", "")
|
||||
assert "github.event_name != 'pull_request'" in expr
|
||||
assert "needs.changes.outputs.scripts == 'true'" in expr
|
||||
|
||||
@@ -56,6 +56,21 @@ SCRIPT_PATH = (
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _stub_time_sleep(monkeypatch):
|
||||
"""Autouse: stub time.sleep across every test.
|
||||
|
||||
The watchdog's RECHECK_DELAY_SECS (default 90s) is wired into
|
||||
run_once() via time.sleep(). Without this stub, integration-style
|
||||
tests that exercise run_once() would each block for 90s — a
|
||||
pre-fix `pytest -q` ran in ~0.1s; the unstubbed equivalent took
|
||||
>4 minutes (task #394 review evidence). Stubbing here keeps the
|
||||
suite fast and deterministic without requiring every red-path test
|
||||
to remember the patch.
|
||||
"""
|
||||
monkeypatch.setattr("time.sleep", lambda s: None)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def wd_module():
|
||||
"""Import the script as a module under a known env."""
|
||||
@@ -809,3 +824,214 @@ def test_require_runtime_env_exits_when_missing(wd_module, monkeypatch):
|
||||
with pytest.raises(SystemExit) as excinfo:
|
||||
wd_module._require_runtime_env()
|
||||
assert excinfo.value.code == 2
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Action-run status filter + HEAD-recheck (task #394, mc#1597..1630)
|
||||
#
|
||||
# The existing cancel-cascade filter matched description=='Has been
|
||||
# cancelled' EXACTLY, but a 7-day DB sweep on 2026-05-20 showed that
|
||||
# only 76/702 (~11%) of action_run.status=3 (Cancelled) entries carry
|
||||
# that string — 89% are written as 'Failing after Ns', indistinguishable
|
||||
# from real action_run.status=2 (Failure) at the commit_status layer.
|
||||
#
|
||||
# Gitea 1.22.6 has NO REST endpoint exposing action_run.status, so the
|
||||
# canonical filter (status=2 only) cannot run from a Gitea Actions
|
||||
# runner. The next-best signal is the HEAD-recheck: re-fetch HEAD SHA
|
||||
# (or its combined status) right before filing. If HEAD moved on or
|
||||
# combined state recovered, the prior "red" was a transient
|
||||
# cancel-cascade and we skip-file.
|
||||
#
|
||||
# References:
|
||||
# - reference_chronic_red_sweep_cancelled_vs_failed_filter
|
||||
# - feedback_gitea_status_enum_use_helper_not_raw_int
|
||||
# - reference_gitea_action_status_enum_corrected_2026_05_19
|
||||
# - triage evidence 2026-05-21 04:55 (6 cancellation + 1 emission
|
||||
# artifact across mc#1597,1605,1609,1613,1626,1627,1630)
|
||||
# --------------------------------------------------------------------------
|
||||
def test_head_recheck_skips_file_when_head_moved(wd_module, monkeypatch, capsys):
|
||||
"""When initial tick sees red at SHA_A but HEAD has since moved to
|
||||
SHA_B (next commit landed mid-tick), the watchdog must NOT file.
|
||||
Re-evaluation happens on the next cron tick against the new SHA.
|
||||
|
||||
REGRESSION CLASS: this guards mc#1597..#1630 — 7 false-positives
|
||||
filed in 24h because cancel-cascade fired commit_status=failure
|
||||
rows on SHAs that were already superseded by new merges."""
|
||||
SHA_A = SHA_RED
|
||||
SHA_B = SHA_GREEN
|
||||
failed_ctx = [
|
||||
{"context": "ci/test", "status": "failure",
|
||||
"target_url": "/r/runs/100/jobs/0",
|
||||
"description": "Failing after 12s"},
|
||||
]
|
||||
# First branches read returns SHA_A; the second (recheck) returns SHA_B
|
||||
# → watchdog detects HEAD drift and skip-files.
|
||||
branches_responses = iter([
|
||||
(200, _branches_response(SHA_A)),
|
||||
(200, _branches_response(SHA_B)),
|
||||
])
|
||||
|
||||
def fake_api(method, path, *, body=None, query=None, expect_json=True):
|
||||
if method == "GET" and path == "/repos/owner/repo/branches/main":
|
||||
return next(branches_responses)
|
||||
if method == "GET" and path == f"/repos/owner/repo/commits/{SHA_A}/status":
|
||||
return (200, _combined_status("failure", failed_ctx))
|
||||
if method == "POST" and path == "/repos/owner/repo/issues":
|
||||
raise AssertionError(
|
||||
"watchdog filed a phantom issue despite HEAD moving away "
|
||||
"from the red SHA (regression: mc#1597..1630)"
|
||||
)
|
||||
if method == "GET" and path == "/repos/owner/repo/issues":
|
||||
return (200, [])
|
||||
raise AssertionError(f"unexpected api call: {method} {path}")
|
||||
|
||||
# Settling delay is no-op'd by the _stub_time_sleep autouse fixture.
|
||||
monkeypatch.setattr(wd_module, "api", fake_api)
|
||||
wd_module.run_once(dry_run=False)
|
||||
captured = capsys.readouterr()
|
||||
assert "head drift" in captured.out.lower() or "head moved" in captured.out.lower(), (
|
||||
f"expected a notice about HEAD drift, got: {captured.out!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_head_recheck_skips_file_when_recheck_status_recovered(
|
||||
wd_module, monkeypatch, capsys,
|
||||
):
|
||||
"""When initial tick sees red at SHA, but the post-settling recheck
|
||||
on the SAME SHA shows combined status recovered (e.g. transient
|
||||
cancel-cascade rolled forward to success on retry), skip-file.
|
||||
|
||||
This catches the mid-flight cancel-cascade window — the second
|
||||
largest false-positive cluster in mc#1597..1630."""
|
||||
failed_ctx_initial = [
|
||||
{"context": "ci/test", "status": "failure",
|
||||
"target_url": "/r/runs/100/jobs/0",
|
||||
"description": "Failing after 12s"},
|
||||
]
|
||||
recovered_ctx = [
|
||||
{"context": "ci/test", "status": "success",
|
||||
"target_url": "/r/runs/100/jobs/0",
|
||||
"description": "Successful in 30s"},
|
||||
]
|
||||
# Same SHA across both branch reads; status flips from failure→success
|
||||
# between the two combined-status reads.
|
||||
status_responses = iter([
|
||||
(200, _combined_status("failure", failed_ctx_initial)),
|
||||
(200, _combined_status("success", recovered_ctx)),
|
||||
])
|
||||
|
||||
def fake_api(method, path, *, body=None, query=None, expect_json=True):
|
||||
if method == "GET" and path == "/repos/owner/repo/branches/main":
|
||||
return (200, _branches_response(SHA_RED))
|
||||
if method == "GET" and path == f"/repos/owner/repo/commits/{SHA_RED}/status":
|
||||
return next(status_responses)
|
||||
if method == "POST" and path == "/repos/owner/repo/issues":
|
||||
raise AssertionError(
|
||||
"watchdog filed a phantom issue despite combined status "
|
||||
"recovering on recheck (mid-flight cancel-cascade window)"
|
||||
)
|
||||
if method == "GET" and path == "/repos/owner/repo/issues":
|
||||
return (200, [])
|
||||
raise AssertionError(f"unexpected api call: {method} {path}")
|
||||
|
||||
monkeypatch.setattr(wd_module, "api", fake_api)
|
||||
wd_module.run_once(dry_run=False)
|
||||
captured = capsys.readouterr()
|
||||
assert "recovered" in captured.out.lower() or "settled" in captured.out.lower(), (
|
||||
f"expected a notice about post-settling recovery, got: {captured.out!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_head_recheck_files_when_still_red_after_settling(
|
||||
wd_module, monkeypatch,
|
||||
):
|
||||
"""When BOTH the initial detection AND the post-settling recheck
|
||||
show the same SHA still red, file the issue. This is the genuine-
|
||||
failure path the watchdog is designed to surface.
|
||||
|
||||
Locks the over-filter: a future change that always-skips after
|
||||
recheck would dismiss real failures."""
|
||||
failed_ctx = [
|
||||
{"context": "ci/test", "status": "failure",
|
||||
"target_url": "/r/runs/100/jobs/0",
|
||||
"description": "Failing after 12s"},
|
||||
]
|
||||
post_filed = {"value": False}
|
||||
|
||||
def fake_api(method, path, *, body=None, query=None, expect_json=True):
|
||||
if method == "GET" and path == "/repos/owner/repo/branches/main":
|
||||
return (200, _branches_response(SHA_RED))
|
||||
if method == "GET" and path == f"/repos/owner/repo/commits/{SHA_RED}/status":
|
||||
return (200, _combined_status("failure", failed_ctx))
|
||||
if method == "GET" and path == "/repos/owner/repo/issues":
|
||||
return (200, [])
|
||||
if method == "GET" and path == "/repos/owner/repo/labels":
|
||||
return (200, [{"id": 9, "name": "tier:high"}])
|
||||
if method == "POST" and path == "/repos/owner/repo/issues":
|
||||
post_filed["value"] = True
|
||||
return (201, {"number": 999})
|
||||
if method == "POST" and path == "/repos/owner/repo/issues/999/labels":
|
||||
return (200, [])
|
||||
raise AssertionError(f"unexpected api call: {method} {path}")
|
||||
|
||||
monkeypatch.setattr(wd_module, "api", fake_api)
|
||||
wd_module.run_once(dry_run=False)
|
||||
assert post_filed["value"], (
|
||||
"genuine-failure path was skip-filed — head-recheck over-filter "
|
||||
"regression (would suppress all real main-red alarms)"
|
||||
)
|
||||
|
||||
|
||||
def test_head_recheck_skips_when_initial_was_only_cancel_cascade(
|
||||
wd_module, monkeypatch,
|
||||
):
|
||||
"""Belt-and-braces: combined-status failure caused exclusively by
|
||||
description='Has been cancelled' entries should still be filtered
|
||||
by the EXISTING cancel-cascade filter — head-recheck must not
|
||||
accidentally bypass it. Regression guard for the existing mc#1564
|
||||
fix."""
|
||||
failed_ctx = [
|
||||
{"context": "ci/test", "status": "failure",
|
||||
"description": "Has been cancelled"},
|
||||
]
|
||||
|
||||
def fake_api(method, path, *, body=None, query=None, expect_json=True):
|
||||
if method == "GET" and path == "/repos/owner/repo/branches/main":
|
||||
return (200, _branches_response(SHA_RED))
|
||||
if method == "GET" and path == f"/repos/owner/repo/commits/{SHA_RED}/status":
|
||||
return (200, _combined_status("failure", failed_ctx))
|
||||
if method == "POST" and path == "/repos/owner/repo/issues":
|
||||
raise AssertionError(
|
||||
"cancel-cascade-only entry must be filtered before any "
|
||||
"head-recheck logic runs"
|
||||
)
|
||||
if method == "GET" and path == "/repos/owner/repo/issues":
|
||||
return (200, [])
|
||||
# No commit-status recheck should happen because is_red() returned False
|
||||
raise AssertionError(f"unexpected api call: {method} {path}")
|
||||
|
||||
monkeypatch.setattr(wd_module, "api", fake_api)
|
||||
wd_module.run_once(dry_run=False)
|
||||
# success: no AssertionError raised, no POST
|
||||
|
||||
|
||||
def test_resolve_action_run_status_returns_none_on_no_endpoint(wd_module):
|
||||
"""The action_run.status REST endpoint does NOT exist in Gitea
|
||||
1.22.6 (verified empirically 2026-05-20 — /api/v1/.../actions/runs/N
|
||||
returns HTTP 404 across all probe variants). The resolver must
|
||||
return None gracefully so callers fall back to the description-
|
||||
string + head-recheck heuristics.
|
||||
|
||||
This pins the extensibility hook: when a future Gitea release (or
|
||||
an op-host proxy) exposes the endpoint, the resolver implementation
|
||||
can be swapped in without touching the caller contract."""
|
||||
# The function exists and is callable
|
||||
assert hasattr(wd_module, "_resolve_action_run_status")
|
||||
# A typical target_url shape from real Gitea commit_status rows:
|
||||
target_url = "/molecule-ai/molecule-core/actions/runs/75020/jobs/0"
|
||||
# Return None when no endpoint available
|
||||
out = wd_module._resolve_action_run_status(target_url)
|
||||
assert out is None, (
|
||||
"resolver must return None when the action_run.status endpoint "
|
||||
"isn't reachable — callers depend on the None-fallback path"
|
||||
)
|
||||
|
||||
@@ -442,6 +442,46 @@ def test_reap_preserves_real_push(sr_module, monkeypatch):
|
||||
assert calls == [] # NO POST
|
||||
|
||||
|
||||
def test_reap_compensates_cancelled_real_push_status(sr_module, monkeypatch):
|
||||
"""Gitea 1.22.6 maps cancelled push runs to failure statuses.
|
||||
|
||||
A real push workflow with description exactly "Has been cancelled"
|
||||
is cancel-cascade noise, not a defect signal. Status-reaper should
|
||||
compensate it even though the workflow has a push trigger.
|
||||
"""
|
||||
calls = []
|
||||
|
||||
def fake_api(method, path, *, body=None, query=None, expect_json=True):
|
||||
calls.append((method, path, body))
|
||||
return (201, {})
|
||||
|
||||
monkeypatch.setattr(sr_module, "api", fake_api)
|
||||
|
||||
workflow_map = {"ci": True}
|
||||
combined = {
|
||||
"state": "failure",
|
||||
"statuses": [
|
||||
{
|
||||
"context": "ci / test (push)",
|
||||
"status": "failure",
|
||||
"description": "Has been cancelled",
|
||||
"target_url": "https://example.test/actions/runs/1",
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
counters = sr_module.reap(workflow_map, combined, SHA, dry_run=False)
|
||||
|
||||
assert counters["compensated"] == 1
|
||||
assert counters["compensated_cancelled_push"] == 1
|
||||
assert counters["preserved_real_push"] == 0
|
||||
assert len(calls) == 1
|
||||
assert calls[0][0] == "POST"
|
||||
assert calls[0][1] == f"/repos/owner/repo/statuses/{SHA}"
|
||||
assert calls[0][2]["context"] == "ci / test (push)"
|
||||
assert calls[0][2]["state"] == "success"
|
||||
|
||||
|
||||
def test_reap_preserves_unknown_workflow(sr_module, monkeypatch, capsys):
|
||||
"""Workflow not in map → ::notice:: + skip (conservative)."""
|
||||
monkeypatch.setattr(
|
||||
|
||||
@@ -686,11 +686,22 @@ func (h *WorkspaceHandler) resolveAgentURL(ctx context.Context, workspaceID stri
|
||||
_ = db.CacheURL(ctx, workspaceID, agentURL)
|
||||
}
|
||||
|
||||
// When the platform runs inside Docker, 127.0.0.1:{host_port} is
|
||||
// unreachable (it's the platform container's own localhost, not the
|
||||
// Docker host). Rewrite to the container's Docker-bridge hostname.
|
||||
// When the platform runs inside Docker, a managed workspace's
|
||||
// 127.0.0.1:{host_port} URL points at the Docker host and must be
|
||||
// rewritten to the workspace container's Docker-bridge hostname.
|
||||
// External runtimes are not managed containers; their local test/runtime
|
||||
// URL is the target and must not be synthesized into ws-<id>:8000.
|
||||
if strings.HasPrefix(agentURL, "http://127.0.0.1:") && h.provisioner != nil && platformInDocker {
|
||||
agentURL = provisioner.InternalURL(workspaceID)
|
||||
var wsRuntime string
|
||||
if err := db.DB.QueryRowContext(ctx,
|
||||
`SELECT COALESCE(runtime, 'langgraph') FROM workspaces WHERE id = $1`,
|
||||
workspaceID,
|
||||
).Scan(&wsRuntime); err != nil {
|
||||
log.Printf("ProxyA2A: runtime lookup before Docker URL rewrite failed for %s: %v", workspaceID, err)
|
||||
}
|
||||
if !isExternalLikeRuntime(wsRuntime) {
|
||||
agentURL = provisioner.InternalURL(workspaceID)
|
||||
}
|
||||
}
|
||||
// SSRF defence: reject private/metadata URLs before making outbound call.
|
||||
if err := isSafeURL(agentURL); err != nil {
|
||||
|
||||
@@ -1511,6 +1511,35 @@ func TestResolveAgentURL_DockerRewrite(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveAgentURL_ExternalRuntimeLoopbackNotRewrittenInDocker(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
mr := setupTestRedis(t)
|
||||
allowLoopbackForTest(t)
|
||||
handler := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
|
||||
waitForHandlerAsyncBeforeDBCleanup(t, handler)
|
||||
handler.provisioner = &stubLocalProv{}
|
||||
|
||||
restore := setPlatformInDockerForTest(true)
|
||||
defer restore()
|
||||
|
||||
agentURL := "http://127.0.0.1:55555"
|
||||
mr.Set("ws:ws-external:url", agentURL)
|
||||
mock.ExpectQuery("SELECT COALESCE\\(runtime").
|
||||
WithArgs("ws-external").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("external"))
|
||||
|
||||
url, perr := handler.resolveAgentURL(context.Background(), "ws-external")
|
||||
if perr != nil {
|
||||
t.Fatalf("unexpected error: %+v", perr)
|
||||
}
|
||||
if url != agentURL {
|
||||
t.Errorf("external runtime loopback URL must not be rewritten; got %q want %q", url, agentURL)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// --- dispatchA2A direct unit tests ---
|
||||
|
||||
func TestDispatchA2A_BuildRequestError(t *testing.T) {
|
||||
|
||||
@@ -0,0 +1,72 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/wsauth"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// AdminWorkspaceTokenHandler lets tenant admins mint the first workspace
|
||||
// bearer for managed SaaS workspaces whose runtime receives its token later
|
||||
// through registry registration.
|
||||
type AdminWorkspaceTokenHandler struct{}
|
||||
|
||||
func NewAdminWorkspaceTokenHandler() *AdminWorkspaceTokenHandler {
|
||||
return &AdminWorkspaceTokenHandler{}
|
||||
}
|
||||
|
||||
// Create handles POST /admin/workspaces/:id/tokens. The route must be mounted
|
||||
// behind AdminAuth; the plaintext token is returned exactly once.
|
||||
func (h *AdminWorkspaceTokenHandler) Create(c *gin.Context) {
|
||||
workspaceID := c.Param("id")
|
||||
if !validWorkspaceID(workspaceID) {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid workspace id"})
|
||||
return
|
||||
}
|
||||
|
||||
var existing string
|
||||
err := db.DB.QueryRowContext(c.Request.Context(),
|
||||
`SELECT id FROM workspaces WHERE id = $1 AND status <> 'removed'`,
|
||||
workspaceID).Scan(&existing)
|
||||
if err != nil {
|
||||
if err == sql.ErrNoRows {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
|
||||
return
|
||||
}
|
||||
log.Printf("admin workspace tokens: workspace lookup failed for %s: %v", workspaceID, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "workspace lookup failed"})
|
||||
return
|
||||
}
|
||||
|
||||
var count int
|
||||
if err := db.DB.QueryRowContext(c.Request.Context(),
|
||||
`SELECT COUNT(*) FROM workspace_auth_tokens WHERE workspace_id = $1 AND revoked_at IS NULL`,
|
||||
workspaceID).Scan(&count); err != nil {
|
||||
log.Printf("admin workspace tokens: count failed for %s: %v", workspaceID, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to count tokens"})
|
||||
return
|
||||
}
|
||||
if count >= maxTokensPerWorkspace {
|
||||
c.JSON(http.StatusTooManyRequests, gin.H{"error": fmt.Sprintf("maximum %d active tokens per workspace", maxTokensPerWorkspace)})
|
||||
return
|
||||
}
|
||||
|
||||
token, err := wsauth.IssueToken(c.Request.Context(), db.DB, workspaceID)
|
||||
if err != nil {
|
||||
log.Printf("admin workspace tokens: issue failed for %s: %v", workspaceID, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to create token"})
|
||||
return
|
||||
}
|
||||
|
||||
log.Printf("admin workspace tokens: issued token for workspace %s", workspaceID)
|
||||
c.JSON(http.StatusCreated, gin.H{
|
||||
"auth_token": token,
|
||||
"workspace_id": workspaceID,
|
||||
"message": "Save this token now — it cannot be retrieved again.",
|
||||
})
|
||||
}
|
||||
@@ -0,0 +1,102 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"net/http"
|
||||
"testing"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
func TestAdminWorkspaceTokenHandler_Create_HappyPath(t *testing.T) {
|
||||
mock, cleanup := withMockDB(t)
|
||||
defer cleanup()
|
||||
|
||||
mock.ExpectQuery(`SELECT id FROM workspaces WHERE id = \$1 AND status <> 'removed'`).
|
||||
WithArgs(wsUUID1).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow(wsUUID1))
|
||||
mock.ExpectQuery(`SELECT COUNT\(\*\) FROM workspace_auth_tokens`).
|
||||
WithArgs(wsUUID1).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
|
||||
mock.ExpectExec(`INSERT INTO workspace_auth_tokens`).
|
||||
WithArgs(wsUUID1, sqlmock.AnyArg(), sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
|
||||
w := makeReq(t, NewAdminWorkspaceTokenHandler().Create, "POST",
|
||||
"/admin/workspaces/"+wsUUID1+"/tokens", gin.Params{{Key: "id", Value: wsUUID1}})
|
||||
|
||||
if w.Code != http.StatusCreated {
|
||||
t.Fatalf("expected 201, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
var body struct {
|
||||
AuthToken string `json:"auth_token"`
|
||||
WorkspaceID string `json:"workspace_id"`
|
||||
}
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &body); err != nil {
|
||||
t.Fatalf("decode: %v", err)
|
||||
}
|
||||
if body.AuthToken == "" || body.WorkspaceID != wsUUID1 {
|
||||
t.Fatalf("unexpected body: %+v", body)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAdminWorkspaceTokenHandler_Create_MissingWorkspace(t *testing.T) {
|
||||
mock, cleanup := withMockDB(t)
|
||||
defer cleanup()
|
||||
|
||||
mock.ExpectQuery(`SELECT id FROM workspaces WHERE id = \$1 AND status <> 'removed'`).
|
||||
WithArgs(wsUUID1).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id"}))
|
||||
|
||||
w := makeReq(t, NewAdminWorkspaceTokenHandler().Create, "POST",
|
||||
"/admin/workspaces/"+wsUUID1+"/tokens", gin.Params{{Key: "id", Value: wsUUID1}})
|
||||
|
||||
if w.Code != http.StatusNotFound {
|
||||
t.Fatalf("expected 404, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestAdminWorkspaceTokenHandler_Create_RateLimited(t *testing.T) {
|
||||
mock, cleanup := withMockDB(t)
|
||||
defer cleanup()
|
||||
|
||||
mock.ExpectQuery(`SELECT id FROM workspaces WHERE id = \$1 AND status <> 'removed'`).
|
||||
WithArgs(wsUUID1).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow(wsUUID1))
|
||||
mock.ExpectQuery(`SELECT COUNT\(\*\) FROM workspace_auth_tokens`).
|
||||
WithArgs(wsUUID1).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(maxTokensPerWorkspace))
|
||||
|
||||
w := makeReq(t, NewAdminWorkspaceTokenHandler().Create, "POST",
|
||||
"/admin/workspaces/"+wsUUID1+"/tokens", gin.Params{{Key: "id", Value: wsUUID1}})
|
||||
|
||||
if w.Code != http.StatusTooManyRequests {
|
||||
t.Fatalf("expected 429, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestAdminWorkspaceTokenHandler_Create_IssueFails(t *testing.T) {
|
||||
mock, cleanup := withMockDB(t)
|
||||
defer cleanup()
|
||||
|
||||
mock.ExpectQuery(`SELECT id FROM workspaces WHERE id = \$1 AND status <> 'removed'`).
|
||||
WithArgs(wsUUID1).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow(wsUUID1))
|
||||
mock.ExpectQuery(`SELECT COUNT\(\*\) FROM workspace_auth_tokens`).
|
||||
WithArgs(wsUUID1).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
|
||||
mock.ExpectExec(`INSERT INTO workspace_auth_tokens`).
|
||||
WillReturnError(errors.New("disk full"))
|
||||
|
||||
w := makeReq(t, NewAdminWorkspaceTokenHandler().Create, "POST",
|
||||
"/admin/workspaces/"+wsUUID1+"/tokens", gin.Params{{Key: "id", Value: wsUUID1}})
|
||||
|
||||
if w.Code != http.StatusInternalServerError {
|
||||
t.Fatalf("expected 500, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
@@ -283,7 +283,7 @@ claude --dangerously-load-development-channels \
|
||||
|
||||
// externalUniversalMcpTemplate — runtime-agnostic standalone path.
|
||||
// Ships as the `molecule-mcp` console script in the
|
||||
// molecule-ai-workspace-runtime PyPI wheel (workspace/mcp_cli.py).
|
||||
// molecule-ai-workspace-runtime wheel published to the Gitea package registry.
|
||||
// Any MCP-aware runtime (Claude Code, hermes, codex, third-party)
|
||||
// registers it once and gets the same 8 universal tools that
|
||||
// container-bound runtimes use today: delegate_task, list_peers,
|
||||
@@ -322,7 +322,7 @@ const externalUniversalMcpTemplate = `# Universal MCP — standalone register +
|
||||
|
||||
# 1. Install the workspace runtime wheel (once per machine — safe to
|
||||
# re-run; subsequent workspaces share the same wheel):
|
||||
pip install molecule-ai-workspace-runtime
|
||||
pip install --index-url https://git.moleculesai.app/api/packages/molecule-ai/pypi/simple/ molecule-ai-workspace-runtime
|
||||
|
||||
# 2. Wire molecule-mcp into your agent's MCP config. Claude Code:
|
||||
# NOTE the server name is workspace-specific ("{{MCP_SERVER_NAME}}") so
|
||||
@@ -344,7 +344,7 @@ claude mcp add {{MCP_SERVER_NAME}} -s user -- env \
|
||||
# needed when calling tools through the MCP server.
|
||||
|
||||
# Need help?
|
||||
# Where to install: https://pypi.org/project/molecule-ai-workspace-runtime/
|
||||
# Where to install: https://git.moleculesai.app/api/packages/molecule-ai/pypi/simple/molecule-ai-workspace-runtime/
|
||||
# Documentation: https://doc.moleculesai.app/docs/guides/mcp-server-setup
|
||||
# Common errors:
|
||||
# • "Tools not appearing in your agent" — run ` + "`claude mcp list`" + ` (or
|
||||
@@ -359,8 +359,8 @@ claude mcp add {{MCP_SERVER_NAME}} -s user -- env \
|
||||
`
|
||||
|
||||
// externalPythonTemplate uses molecule-sdk-python's RemoteAgentClient +
|
||||
// A2AServer (PR #13 in that repo). Until the SDK cuts a v0.y release
|
||||
// to PyPI the snippet pins git+main.
|
||||
// A2AServer. Until the SDK is published to the Gitea package registry the
|
||||
// snippet pins git+main.
|
||||
const externalPythonTemplate = `# pip install 'git+https://git.moleculesai.app/molecule-ai/molecule-sdk-python.git@main'
|
||||
|
||||
import asyncio
|
||||
@@ -396,7 +396,7 @@ if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
|
||||
# Need help?
|
||||
# Where to install: https://pypi.org/project/molecule-ai-workspace-runtime/
|
||||
# Where to install: https://git.moleculesai.app/api/packages/molecule-ai/pypi/simple/molecule-ai-workspace-runtime/
|
||||
# Documentation: https://doc.moleculesai.app/docs/guides/external-agent-registration
|
||||
# Common errors:
|
||||
# • 401 from /heartbeat — AUTH_TOKEN expired or wrong workspace_id.
|
||||
@@ -445,7 +445,7 @@ const externalHermesChannelTemplate = `# Hermes channel — bridges this workspa
|
||||
# also supported via the plugin's dual-mode fallback.
|
||||
#
|
||||
# 1. Install the runtime + plugin:
|
||||
pip install molecule-ai-workspace-runtime
|
||||
pip install --index-url https://git.moleculesai.app/api/packages/molecule-ai/pypi/simple/ molecule-ai-workspace-runtime
|
||||
pip install 'git+https://git.moleculesai.app/molecule-ai/hermes-channel-molecule.git'
|
||||
|
||||
# 2. Export the workspace credentials:
|
||||
@@ -528,7 +528,7 @@ const externalCodexTemplate = `# Codex external setup — outbound tools (MCP) +
|
||||
|
||||
# 1. Install codex CLI, the workspace runtime, and the bridge daemon:
|
||||
npm install -g @openai/codex@latest
|
||||
pip install molecule-ai-workspace-runtime
|
||||
pip install --index-url https://git.moleculesai.app/api/packages/molecule-ai/pypi/simple/ molecule-ai-workspace-runtime
|
||||
pip install codex-channel-molecule
|
||||
|
||||
# 2. Wire the molecule MCP server into codex's config.toml — this is
|
||||
@@ -620,7 +620,7 @@ const externalKimiTemplate = `# Kimi CLI external setup — register + heartbeat
|
||||
# No public URL needed; runs behind NAT in poll mode.
|
||||
|
||||
# 1. Install the workspace runtime wheel (provides HTTP client):
|
||||
pip install molecule-ai-workspace-runtime
|
||||
pip install --index-url https://git.moleculesai.app/api/packages/molecule-ai/pypi/simple/ molecule-ai-workspace-runtime
|
||||
|
||||
# 2. Save credentials and the bridge script:
|
||||
mkdir -p ~/.molecule-ai/kimi-{{MCP_SERVER_NAME}}
|
||||
@@ -779,7 +779,7 @@ const externalOpenClawTemplate = `# OpenClaw MCP config — outbound tool path.
|
||||
# (register-on-startup + 20s heartbeat). Older versions only ship
|
||||
# a2a_mcp_server which does not heartbeat.
|
||||
npm install -g openclaw@latest
|
||||
pip install "molecule-ai-workspace-runtime>=0.1.999"
|
||||
pip install --index-url https://git.moleculesai.app/api/packages/molecule-ai/pypi/simple/ "molecule-ai-workspace-runtime>=0.1.999"
|
||||
|
||||
# 2. Onboard openclaw against your model provider (one-time setup).
|
||||
# --non-interactive needs an explicit --provider + --model so it
|
||||
|
||||
@@ -84,6 +84,7 @@ type mcpTool struct {
|
||||
type MCPHandler struct {
|
||||
database *sql.DB
|
||||
broadcaster *events.Broadcaster
|
||||
a2aProxy func(ctx context.Context, workspaceID string, body []byte, callerID string, logActivity bool) (int, []byte, error)
|
||||
|
||||
// memv2 is the v2 memory plugin wiring (RFC #2728). nil-safe:
|
||||
// every v2 tool calls memoryV2Available() first and returns a
|
||||
@@ -98,6 +99,14 @@ func NewMCPHandler(database *sql.DB, broadcaster *events.Broadcaster) *MCPHandle
|
||||
return &MCPHandler{database: database, broadcaster: broadcaster}
|
||||
}
|
||||
|
||||
func (h *MCPHandler) proxyA2ARequest(ctx context.Context, workspaceID string, body []byte, callerID string, logActivity bool) (int, []byte, error) {
|
||||
if h.a2aProxy != nil {
|
||||
return h.a2aProxy(ctx, workspaceID, body, callerID, logActivity)
|
||||
}
|
||||
wh := NewWorkspaceHandler(h.broadcaster, nil, "", "")
|
||||
return wh.ProxyA2ARequest(ctx, workspaceID, body, callerID, logActivity)
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Tool definitions (mirrors workspace/a2a_mcp_server.py TOOLS list)
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -53,6 +53,15 @@ func mcpPost(t *testing.T, h *MCPHandler, workspaceID string, body interface{})
|
||||
return w
|
||||
}
|
||||
|
||||
func expectCanCommunicateSiblings(mock sqlmock.Sqlmock, callerID, targetID, parentID string) {
|
||||
mock.ExpectQuery(`SELECT id, parent_id FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(callerID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(callerID, parentID))
|
||||
mock.ExpectQuery(`SELECT id, parent_id FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(targetID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(targetID, parentID))
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// initialize
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
@@ -178,6 +187,98 @@ func TestMCPHandler_ToolsList_ContainsExpectedTools(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestMCPHandler_DelegateTask_RoutesThroughPlatformA2AProxy(t *testing.T) {
|
||||
h, mock := newMCPHandler(t)
|
||||
callerID := "11111111-1111-1111-1111-111111111111"
|
||||
targetID := "22222222-2222-2222-2222-222222222222"
|
||||
parentID := "33333333-3333-3333-3333-333333333333"
|
||||
|
||||
expectCanCommunicateSiblings(mock, callerID, targetID, parentID)
|
||||
mock.ExpectExec(`(?s)INSERT INTO activity_logs.*'delegation'.*'delegate'`).
|
||||
WithArgs(callerID, callerID, targetID, "Delegating to "+targetID, sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
mock.ExpectExec(`UPDATE activity_logs`).
|
||||
WithArgs("dispatched", "", callerID, sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
var gotTarget, gotCaller string
|
||||
h.a2aProxy = func(ctx context.Context, workspaceID string, body []byte, callerID string, logActivity bool) (int, []byte, error) {
|
||||
gotTarget = workspaceID
|
||||
gotCaller = callerID
|
||||
if !logActivity {
|
||||
t.Fatal("delegate_task should log through platform A2A proxy")
|
||||
}
|
||||
if !strings.Contains(string(body), "do work") {
|
||||
t.Fatalf("A2A body missing task text: %s", string(body))
|
||||
}
|
||||
return 200, []byte(`{"result":{"message":{"parts":[{"text":"done"}]}}}`), nil
|
||||
}
|
||||
|
||||
out, err := h.toolDelegateTask(context.Background(), callerID, map[string]interface{}{
|
||||
"workspace_id": targetID,
|
||||
"task": "do work",
|
||||
}, mcpCallTimeout)
|
||||
if err != nil {
|
||||
t.Fatalf("delegate_task returned error: %v", err)
|
||||
}
|
||||
if out != "done" {
|
||||
t.Fatalf("delegate_task response = %q, want done", out)
|
||||
}
|
||||
if gotTarget != targetID || gotCaller != callerID {
|
||||
t.Fatalf("proxy called with target=%q caller=%q, want target=%q caller=%q", gotTarget, gotCaller, targetID, callerID)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Fatalf("unmet expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMCPHandler_DelegateTaskAsync_RoutesThroughPlatformA2AProxy(t *testing.T) {
|
||||
h, mock := newMCPHandler(t)
|
||||
callerID := "11111111-1111-1111-1111-111111111111"
|
||||
targetID := "22222222-2222-2222-2222-222222222222"
|
||||
parentID := "33333333-3333-3333-3333-333333333333"
|
||||
|
||||
expectCanCommunicateSiblings(mock, callerID, targetID, parentID)
|
||||
mock.ExpectExec(`(?s)INSERT INTO activity_logs.*'delegation'.*'delegate'`).
|
||||
WithArgs(callerID, callerID, targetID, "Delegating to "+targetID, sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
mock.ExpectExec(`UPDATE activity_logs`).
|
||||
WithArgs("dispatched", "", callerID, sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
called := make(chan struct{}, 1)
|
||||
h.a2aProxy = func(ctx context.Context, workspaceID string, body []byte, proxyCallerID string, logActivity bool) (int, []byte, error) {
|
||||
if workspaceID != targetID || proxyCallerID != callerID {
|
||||
t.Fatalf("unexpected proxy route target=%q caller=%q", workspaceID, proxyCallerID)
|
||||
}
|
||||
if !strings.Contains(string(body), "async work") {
|
||||
t.Fatalf("A2A body missing task text: %s", string(body))
|
||||
}
|
||||
called <- struct{}{}
|
||||
return 200, []byte(`{"result":{"message":{"parts":[{"text":"accepted"}]}}}`), nil
|
||||
}
|
||||
|
||||
out, err := h.toolDelegateTaskAsync(context.Background(), callerID, map[string]interface{}{
|
||||
"workspace_id": targetID,
|
||||
"task": "async work",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("delegate_task_async returned error: %v", err)
|
||||
}
|
||||
if !strings.Contains(out, `"status":"dispatched"`) {
|
||||
t.Fatalf("delegate_task_async response = %s", out)
|
||||
}
|
||||
waitGlobalAsyncForTest()
|
||||
select {
|
||||
case <-called:
|
||||
default:
|
||||
t.Fatal("async delegate did not call platform A2A proxy")
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Fatalf("unmet expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// notifications/initialized
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -7,24 +7,19 @@ package handlers
|
||||
// and A2A response parsing helpers.
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/registry"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
// insertMCPDelegationRow writes a delegation activity row so the canvas
|
||||
// Agent Comms tab can show the task text for MCP-initiated delegations.
|
||||
// Mirrors insertDelegationRow (delegation.go) for the MCP tool path.
|
||||
@@ -190,15 +185,6 @@ func (h *MCPHandler) toolDelegateTask(ctx context.Context, callerID string, args
|
||||
// Non-fatal: still make the A2A call even if activity log write fails.
|
||||
}
|
||||
|
||||
agentURL, err := mcpResolveURL(ctx, h.database, targetID)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
// SSRF defence: reject private/metadata URLs before making outbound call.
|
||||
if err := isSafeURL(agentURL); err != nil {
|
||||
return "", fmt.Errorf("invalid workspace URL: %w", err)
|
||||
}
|
||||
|
||||
a2aBody, err := json.Marshal(map[string]interface{}{
|
||||
"jsonrpc": "2.0",
|
||||
"id": uuid.New().String(),
|
||||
@@ -218,36 +204,17 @@ func (h *MCPHandler) toolDelegateTask(ctx context.Context, callerID string, args
|
||||
reqCtx, cancel := context.WithTimeout(ctx, timeout)
|
||||
defer cancel()
|
||||
|
||||
httpReq, err := http.NewRequestWithContext(reqCtx, "POST", agentURL+"/a2a", bytes.NewReader(a2aBody))
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
httpReq.Header.Set("Content-Type", "application/json")
|
||||
// X-Workspace-ID identifies this caller to the A2A proxy. The /workspaces/:id/a2a
|
||||
// endpoint is intentionally outside WorkspaceAuth (agents do not hold bearer tokens
|
||||
// to peer workspaces). Access control is enforced by CanCommunicate above, which
|
||||
// already validated callerID → targetID before this request is constructed.
|
||||
// callerID was authenticated by WorkspaceAuth on the MCP bridge entry point,
|
||||
// so this header reflects a verified caller identity, not a spoofable value.
|
||||
httpReq.Header.Set("X-Workspace-ID", callerID)
|
||||
|
||||
resp, err := http.DefaultClient.Do(httpReq)
|
||||
status, body, err := h.proxyA2ARequest(reqCtx, targetID, a2aBody, callerID, true)
|
||||
if err != nil {
|
||||
updateMCPDelegationStatus(ctx, h.database, callerID, delegationID, "failed", err.Error())
|
||||
return "", fmt.Errorf("A2A call failed: %w", err)
|
||||
return "", fmt.Errorf("A2A proxy failed: %w", err)
|
||||
}
|
||||
if status < 200 || status >= 300 {
|
||||
updateMCPDelegationStatus(ctx, h.database, callerID, delegationID, "failed", fmt.Sprintf("A2A proxy returned status %d", status))
|
||||
return "", fmt.Errorf("A2A proxy returned status %d", status)
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
|
||||
// A 200/500 from the peer still means the call was dispatched — only
|
||||
// network errors are truly "failed". Status 'dispatched' is correct for
|
||||
// any HTTP response (peer's A2A layer handles the actual processing).
|
||||
updateMCPDelegationStatus(ctx, h.database, callerID, delegationID, "dispatched", "")
|
||||
|
||||
body, err := io.ReadAll(io.LimitReader(resp.Body, 1<<20))
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to read response: %w", err)
|
||||
}
|
||||
|
||||
return extractA2AText(body), nil
|
||||
}
|
||||
|
||||
@@ -278,24 +245,13 @@ func (h *MCPHandler) toolDelegateTaskAsync(ctx context.Context, callerID string,
|
||||
|
||||
// Fire and forget in a detached goroutine. Use a background context so
|
||||
// the call is not cancelled when the HTTP request completes.
|
||||
// RFC internal#524 Layer 1: globalGoAsync — the detached call reads
|
||||
// db.DB (mcpResolveURL + updateMCPDelegationStatus) and must be
|
||||
// drained by drainTestAsync before any t.Cleanup-driven db.DB swap.
|
||||
// RFC internal#524 Layer 1: globalGoAsync — the detached call reads db.DB
|
||||
// through the platform A2A proxy and must be drained by drainTestAsync
|
||||
// before any t.Cleanup-driven db.DB swap.
|
||||
globalGoAsync(func() {
|
||||
bgCtx, cancel := context.WithTimeout(context.Background(), mcpAsyncCallTimeout)
|
||||
defer cancel()
|
||||
|
||||
agentURL, err := mcpResolveURL(bgCtx, h.database, targetID)
|
||||
if err != nil {
|
||||
log.Printf("MCPHandler.delegate_task_async: resolve URL for %s: %v", targetID, err)
|
||||
return
|
||||
}
|
||||
// SSRF defence: reject private/metadata URLs before making outbound call.
|
||||
if err := isSafeURL(agentURL); err != nil {
|
||||
log.Printf("MCPHandler.delegate_task_async: unsafe URL for %s: %v", targetID, err)
|
||||
return
|
||||
}
|
||||
|
||||
a2aBody, _ := json.Marshal(map[string]interface{}{
|
||||
"jsonrpc": "2.0",
|
||||
"id": delegationID,
|
||||
@@ -309,22 +265,15 @@ func (h *MCPHandler) toolDelegateTaskAsync(ctx context.Context, callerID string,
|
||||
},
|
||||
})
|
||||
|
||||
httpReq, err := http.NewRequestWithContext(bgCtx, "POST", agentURL+"/a2a", bytes.NewReader(a2aBody))
|
||||
if err != nil {
|
||||
log.Printf("MCPHandler.delegate_task_async: create request: %v", err)
|
||||
status, _, err := h.proxyA2ARequest(bgCtx, targetID, a2aBody, callerID, true)
|
||||
if err != nil || status < 200 || status >= 300 {
|
||||
if err != nil {
|
||||
log.Printf("MCPHandler.delegate_task_async: A2A proxy to %s: %v", targetID, err)
|
||||
} else {
|
||||
log.Printf("MCPHandler.delegate_task_async: A2A proxy to %s returned status %d", targetID, status)
|
||||
}
|
||||
return
|
||||
}
|
||||
httpReq.Header.Set("Content-Type", "application/json")
|
||||
httpReq.Header.Set("X-Workspace-ID", callerID)
|
||||
|
||||
resp, err := http.DefaultClient.Do(httpReq)
|
||||
if err != nil {
|
||||
log.Printf("MCPHandler.delegate_task_async: A2A call to %s: %v", targetID, err)
|
||||
return
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
// Drain response so the connection can be reused.
|
||||
_, _ = io.Copy(io.Discard, resp.Body)
|
||||
})
|
||||
|
||||
return fmt.Sprintf(`{"task_id":%q,"status":"dispatched","target_id":%q}`, delegationID, targetID), nil
|
||||
@@ -405,7 +354,6 @@ func (h *MCPHandler) toolSendMessageToUser(ctx context.Context, workspaceID stri
|
||||
return "Message sent.", nil
|
||||
}
|
||||
|
||||
|
||||
func (h *MCPHandler) toolCommitMemory(ctx context.Context, workspaceID string, args map[string]interface{}) (string, error) {
|
||||
// PR-6 (RFC #2728) compat shim: when the v2 plugin is wired
|
||||
// (MEMORY_PLUGIN_URL set), translate legacy scope→namespace and
|
||||
@@ -534,56 +482,6 @@ func (h *MCPHandler) toolRecallMemory(ctx context.Context, workspaceID string, a
|
||||
// Helpers
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
// mcpResolveURL returns a routable URL for a workspace's A2A server.
|
||||
//
|
||||
// Resolution order:
|
||||
// 1. Docker-internal URL cache (set by provisioner; correct when platform is in Docker)
|
||||
// 2. Redis URL cache
|
||||
// 3. DB `url` column fallback, with 127.0.0.1→Docker bridge rewrite when in Docker
|
||||
//
|
||||
// SECURITY (F1083 / #1130): all three paths run the returned URL through
|
||||
// validateAgentURL to block SSRF targets (private IPs, loopback, cloud metadata).
|
||||
func mcpResolveURL(ctx context.Context, database *sql.DB, workspaceID string) (string, error) {
|
||||
if platformInDocker {
|
||||
if url, err := db.GetCachedInternalURL(ctx, workspaceID); err == nil && url != "" {
|
||||
if err := validateAgentURL(url); err != nil {
|
||||
return "", fmt.Errorf("workspace %s: forbidden URL from internal cache: %w", workspaceID, err)
|
||||
}
|
||||
return url, nil
|
||||
}
|
||||
}
|
||||
if url, err := db.GetCachedURL(ctx, workspaceID); err == nil && url != "" {
|
||||
if platformInDocker && strings.HasPrefix(url, "http://127.0.0.1:") {
|
||||
return provisioner.InternalURL(workspaceID), nil
|
||||
}
|
||||
if err := validateAgentURL(url); err != nil {
|
||||
return "", fmt.Errorf("workspace %s: forbidden URL from Redis cache: %w", workspaceID, err)
|
||||
}
|
||||
return url, nil
|
||||
}
|
||||
|
||||
var urlStr sql.NullString
|
||||
var status string
|
||||
if err := database.QueryRowContext(ctx,
|
||||
`SELECT url, status FROM workspaces WHERE id = $1`, workspaceID,
|
||||
).Scan(&urlStr, &status); err != nil {
|
||||
if err == sql.ErrNoRows {
|
||||
return "", fmt.Errorf("workspace %s not found", workspaceID)
|
||||
}
|
||||
return "", fmt.Errorf("workspace lookup failed: %w", err)
|
||||
}
|
||||
if !urlStr.Valid || urlStr.String == "" {
|
||||
return "", fmt.Errorf("workspace %s has no URL (status: %s)", workspaceID, status)
|
||||
}
|
||||
if platformInDocker && strings.HasPrefix(urlStr.String, "http://127.0.0.1:") {
|
||||
return provisioner.InternalURL(workspaceID), nil
|
||||
}
|
||||
if err := validateAgentURL(urlStr.String); err != nil {
|
||||
return "", fmt.Errorf("workspace %s: forbidden URL from DB: %w", workspaceID, err)
|
||||
}
|
||||
return urlStr.String, nil
|
||||
}
|
||||
|
||||
// extractA2AText extracts human-readable text from an A2A JSON-RPC response body.
|
||||
// Falls back to the raw JSON when no text part can be found.
|
||||
func extractA2AText(body []byte) string {
|
||||
@@ -632,4 +530,3 @@ func extractA2AText(body []byte) string {
|
||||
b, _ := json.Marshal(result)
|
||||
return string(b)
|
||||
}
|
||||
|
||||
|
||||
@@ -112,7 +112,7 @@ func (h *RegistryHandler) SetQueueDrainFunc(f QueueDrainFunc) {
|
||||
// Go's net.ParseIP.To4() before Contains() runs, so the IPv4 rules above
|
||||
// catch those without a separate entry.
|
||||
//
|
||||
// F1083/#1130 (SSRF on mcpResolveURL / a2a_proxy resolveAgentURL): in
|
||||
// F1083/#1130 (SSRF on direct A2A URL resolution): in
|
||||
// addition to blocking IP literals, DNS names are now resolved and each
|
||||
// returned IP is checked against the blocklist. This closes the gap where
|
||||
// an attacker could register agent.example.com pointing to 169.254.169.254.
|
||||
|
||||
@@ -397,6 +397,8 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
|
||||
wsAuth.GET("/tokens", tokh.List)
|
||||
wsAuth.POST("/tokens", tokh.Create)
|
||||
wsAuth.DELETE("/tokens/:tokenId", tokh.Revoke)
|
||||
adminTokH := handlers.NewAdminWorkspaceTokenHandler()
|
||||
r.POST("/admin/workspaces/:id/tokens", middleware.AdminAuth(db.DB), adminTokH.Create)
|
||||
|
||||
// Memory
|
||||
memh := handlers.NewMemoryHandler()
|
||||
|
||||
@@ -1,13 +0,0 @@
|
||||
# coverage.py config — consumed by `pytest --cov` via the pytest-cov
|
||||
# plugin. Lives here (not in pytest.ini) because coverage.py only reads
|
||||
# .coveragerc / setup.cfg / tox.ini / pyproject.toml — the [coverage:*]
|
||||
# sections in pytest.ini are silently ignored. See issue #1817.
|
||||
[run]
|
||||
omit =
|
||||
*/tests/*
|
||||
*/__init__.py
|
||||
plugins_registry/*
|
||||
|
||||
[report]
|
||||
# Skip files at 100% in the term-missing output to keep CI logs readable.
|
||||
skip_covered = True
|
||||
@@ -1,104 +0,0 @@
|
||||
FROM python:3.11-slim@sha256:e78299e55776ca065dcb769f80161f48465ad352014240eb5fe4712e22505e9b
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install Node.js, git, gh CLI in a single layer to minimize image size
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends curl git ca-certificates && \
|
||||
# Node.js 22
|
||||
curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \
|
||||
apt-get install -y --no-install-recommends nodejs && \
|
||||
# GitHub CLI
|
||||
curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \
|
||||
| dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg && \
|
||||
echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \
|
||||
> /etc/apt/sources.list.d/github-cli.list && \
|
||||
apt-get update && apt-get install -y --no-install-recommends gh && \
|
||||
# Cleanup apt caches and temp files
|
||||
apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false && \
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
|
||||
# Create non-root user (claude --dangerously-skip-permissions refuses root)
|
||||
RUN useradd -m -s /bin/bash agent
|
||||
|
||||
# Install base Python dependencies (A2A SDK + HTTP only)
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy runtime code (adapters/ has been removed — adapters now live in standalone
|
||||
# template repos and install molecule-ai-workspace-runtime from PyPI)
|
||||
COPY *.py ./
|
||||
COPY entrypoint.sh ./
|
||||
COPY skill_loader/ ./skill_loader/
|
||||
COPY builtin_tools/ ./builtin_tools/
|
||||
COPY plugins_registry/ ./plugins_registry/
|
||||
COPY policies/ ./policies/
|
||||
|
||||
# Create CLI aliases
|
||||
RUN ln -s /app/a2a_cli.py /usr/local/bin/a2a && chmod +x /app/a2a_cli.py /app/a2a_mcp_server.py && \
|
||||
ln -s /app/molecule_ai_status.py /usr/local/bin/molecule-monorepo-status && chmod +x /app/molecule_ai_status.py
|
||||
|
||||
# gh wrapper — auto-prefixes PR / issue titles with the agent role + appends
|
||||
# a body footer. Every agent in the template shares one GitHub PAT so plain
|
||||
# `gh pr list` can't distinguish workspaces; the wrapper reads GIT_AUTHOR_NAME
|
||||
# (set by the platform provisioner, "Molecule AI <Role>") and rewrites the
|
||||
# title/body accordingly. Fails open when the env is missing. Anything that
|
||||
# isn't `gh pr create` or `gh issue create` passes through untouched.
|
||||
# /usr/local/bin is earlier in PATH than /usr/bin/gh so this shadows the
|
||||
# real binary without renaming it.
|
||||
COPY scripts/gh-wrapper.sh /usr/local/bin/gh
|
||||
RUN chmod +x /usr/local/bin/gh
|
||||
|
||||
# Copy the git credential helper so entrypoint.sh can register it at boot.
|
||||
# molecule-git-token-helper.sh fetches a fresh GitHub App installation token
|
||||
# from the platform on every git push/fetch, preventing stale-token failures
|
||||
# after the ~60 min GitHub App token TTL (issue #613 / #547).
|
||||
COPY scripts/molecule-git-token-helper.sh ./scripts/
|
||||
RUN chmod +x ./scripts/molecule-git-token-helper.sh
|
||||
|
||||
# Copy the background token refresh daemon. Runs as a background process
|
||||
# started by entrypoint.sh — refreshes gh CLI auth and the credential
|
||||
# helper cache every 45 min so tokens never expire mid-operation.
|
||||
COPY scripts/molecule-gh-token-refresh.sh ./scripts/
|
||||
RUN chmod +x ./scripts/molecule-gh-token-refresh.sh
|
||||
|
||||
# Generic GIT_ASKPASS helper. Reads HTTPS Basic-Auth credentials from env
|
||||
# vars (GIT_HTTP_USERNAME / GIT_HTTP_PASSWORD, with GITEA_USER / GITEA_TOKEN
|
||||
# as fallback) and emits them on the git credential-prompt protocol so
|
||||
# container-side `git` can authenticate to any private HTTPS remote
|
||||
# without on-disk .gitconfig / .git-credentials mutation. The platform
|
||||
# provisioner sets GIT_ASKPASS=/usr/local/bin/molecule-askpass via
|
||||
# applyAgentGitIdentity (workspace-server/internal/handlers/agent_git_identity.go).
|
||||
# Filename is the only project-specific marker; the script body contains
|
||||
# no vendor literals and is identical to the script shipped in each
|
||||
# open-source workspace template (scripts/git-askpass.sh).
|
||||
COPY scripts/molecule-askpass /usr/local/bin/molecule-askpass
|
||||
RUN chmod +x /usr/local/bin/molecule-askpass
|
||||
|
||||
# Dirs and permissions
|
||||
RUN mkdir -p /workspace /plugins /home/agent/.claude /home/agent/.config /home/agent/.local \
|
||||
/home/agent/.molecule-token-cache && \
|
||||
chown -R agent:agent /app /home/agent /workspace
|
||||
|
||||
# Install gosu for clean root → agent user handoff in entrypoint.
|
||||
# The entrypoint starts as root to fix volume ownership, then exec's
|
||||
# as the agent user so Claude Code's --dangerously-skip-permissions works.
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends gosu && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
VOLUME /configs
|
||||
VOLUME /workspace
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
# HEALTHCHECK: probe the A2A agent-card endpoint so orchestrators and
|
||||
# container runtimes can detect a live, responsive workspace agent.
|
||||
# Uses curl (present in python:3.11-slim base) against the uvicorn server.
|
||||
# PORT is injected at runtime via the molecule-runtime entrypoint; the
|
||||
# default matches EXPOSE.
|
||||
HEALTHCHECK --interval=30s --timeout=5s --retries=3 \
|
||||
CMD curl -sf http://localhost:${PORT:-8000}/agent/card >/dev/null || exit 1
|
||||
|
||||
RUN chmod +x /app/entrypoint.sh
|
||||
# Start as root — entrypoint fixes volume permissions then drops to agent
|
||||
CMD ["./entrypoint.sh"]
|
||||
@@ -1 +0,0 @@
|
||||
# trigger autobump for python-multipart pin (PDF P0 cure)
|
||||
@@ -1,105 +0,0 @@
|
||||
"""OFFSEC-003: A2A peer-result sanitization — shared across delegation tools.
|
||||
|
||||
This module is intentionally a LEAF (no imports from the molecule-runtime
|
||||
package) to avoid circular dependency cycles. Both ``a2a_tools_delegation``
|
||||
and ``a2a_tools`` can import from here without creating import loops.
|
||||
|
||||
Trust-boundary design (OFFSEC-003):
|
||||
A2A peer responses are untrusted third-party content. Before passing
|
||||
them to the agent context, they MUST be wrapped in a trust-boundary
|
||||
marker pair so the calling agent knows the content is external.
|
||||
|
||||
Boundary markers:
|
||||
- _A2A_BOUNDARY_START = "[A2A_RESULT_FROM_PEER]"
|
||||
- _A2A_BOUNDARY_END = "[/A2A_RESULT_FROM_PEER]"
|
||||
|
||||
The boundary is the PRIMARY security control. A peer that sends
|
||||
"[A2A_RESULT_FROM_PEER]evil[/A2A_RESULT_FROM_PEER]safe" can make "safe"
|
||||
appear inside the trusted context unless the markers themselves are
|
||||
escaped before wrapping — see _escape_boundary_markers() below.
|
||||
|
||||
Defense-in-depth (secondary):
|
||||
Known prompt-injection control-words are also escaped so that even
|
||||
if a calling agent ignores the boundary marker, embedded attack
|
||||
patterns (SYSTEM:, OVERRIDE:, etc.) lose their special meaning.
|
||||
This is not a complete injection sanitizer — do not rely on it as
|
||||
the primary control.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
# ── Trust-boundary markers ────────────────────────────────────────────────────
|
||||
|
||||
_A2A_BOUNDARY_START = "[A2A_RESULT_FROM_PEER]"
|
||||
_A2A_BOUNDARY_END = "[/A2A_RESULT_FROM_PEER]"
|
||||
|
||||
# ── Boundary-marker escaping ─────────────────────────────────────────────────
|
||||
# A peer that sends "[/A2A_RESULT_FROM_PEER]evil" can make "evil" appear
|
||||
# inside the trusted zone. Escape BOTH boundary markers in the raw text
|
||||
# before wrapping so they can never close the boundary early.
|
||||
# We use "[/ " as the escape prefix — visually distinct from the real marker.
|
||||
_A2A_BOUNDARY_START_ESCAPED = "[/ A2A_RESULT_FROM_PEER]"
|
||||
_A2A_BOUNDARY_END_ESCAPED = "[/ /A2A_RESULT_FROM_PEER]"
|
||||
|
||||
|
||||
def _escape_boundary_markers(text: str) -> str:
|
||||
"""Escape boundary markers inside the raw peer text before wrapping.
|
||||
|
||||
Replaces any occurrence of the boundary start/end markers with a
|
||||
visually-similar escaped form so a malicious peer can never close
|
||||
the boundary early or inject a fake opener.
|
||||
"""
|
||||
return (
|
||||
text.replace(_A2A_BOUNDARY_START, _A2A_BOUNDARY_START_ESCAPED)
|
||||
.replace(_A2A_BOUNDARY_END, _A2A_BOUNDARY_END_ESCAPED)
|
||||
)
|
||||
|
||||
|
||||
# ── Defense-in-depth: injection pattern escaping ───────────────────────────────
|
||||
# These patterns cover common prompt-injection phrasings. They are NOT a
|
||||
# complete sanitizer — see module docstring. The boundary marker is the
|
||||
# primary control; these are purely defense-in-depth.
|
||||
|
||||
_INJECTION_PATTERNS = [
|
||||
# Single-word patterns: anchor to word boundary so they don't match
|
||||
# inside other words (e.g. "SYSTEM" in "mySYSTEMatic").
|
||||
# Single-word patterns: anchor to word boundary so they don't match
|
||||
# inside other words (e.g. "SYSTEM" in "mySYSTEMatic").
|
||||
(re.compile(r"(^|[^\w])SYSTEM\b", re.IGNORECASE), r"\1[ESCAPED_SYSTEM]"),
|
||||
(re.compile(r"(^|[^\w])OVERRIDE\b", re.IGNORECASE), r"\1[ESCAPED_OVERRIDE]"),
|
||||
# "INSTRUCTIONS" may appear at the start of a string or after a newline.
|
||||
(re.compile(r"(^|\n)INSTRUCTIONS?\b", re.IGNORECASE), " [ESCAPED_INSTRUCTIONS]"),
|
||||
(re.compile(r"(^|[^\w])IGNORE\s+ALL\b", re.IGNORECASE), r"\1[ESCAPED_IGNORE_ALL]"),
|
||||
(re.compile(r"(^|[^\w])YOU\s+ARE\s+NOW\b", re.IGNORECASE), r"\1[ESCAPED_YOU_ARE_NOW]"),
|
||||
]
|
||||
|
||||
|
||||
def sanitize_a2a_result(text: str) -> str:
|
||||
"""Sanitize untrusted text from an A2A peer (OFFSEC-003).
|
||||
|
||||
Order of operations:
|
||||
1. Escape boundary markers in the raw text (prevents injection).
|
||||
2. Escape known injection patterns (defense-in-depth).
|
||||
|
||||
Returns the input unchanged if it is empty/None.
|
||||
|
||||
Note: this function does NOT add boundary wrappers — callers that need
|
||||
to establish a trust boundary should wrap the sanitized result with
|
||||
``[A2A_RESULT_FROM_PEER]\\n{sanitized}\\n[/A2A_RESULT_FROM_PEER]``.
|
||||
See ``a2a_tools_delegation.py:tool_delegate_task`` for the canonical
|
||||
wrapping pattern.
|
||||
"""
|
||||
if not text:
|
||||
return text
|
||||
|
||||
# 1. Escape boundary markers so a malicious peer cannot break the
|
||||
# trust boundary from inside their response.
|
||||
escaped = _escape_boundary_markers(text)
|
||||
|
||||
# 2. Escape known injection control-words (defense-in-depth only).
|
||||
for pattern, replacement in _INJECTION_PATTERNS:
|
||||
escaped = pattern.sub(replacement, escaped)
|
||||
|
||||
return escaped
|
||||
@@ -1,251 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""A2A CLI — command-line tools for inter-workspace communication.
|
||||
|
||||
Supports both synchronous and asynchronous delegation:
|
||||
a2a delegate <id> <task> — Send task, wait for response (sync)
|
||||
a2a delegate --async <id> <task> — Send task, return task ID immediately
|
||||
a2a status <task_id> — Check task status / get result
|
||||
a2a peers — List available peers
|
||||
a2a info — Show this workspace's info
|
||||
|
||||
Environment variables:
|
||||
WORKSPACE_ID — this workspace's ID
|
||||
PLATFORM_URL — platform API base URL
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import uuid
|
||||
|
||||
import httpx
|
||||
|
||||
_WORKSPACE_ID_raw = os.environ.get("WORKSPACE_ID")
|
||||
if not _WORKSPACE_ID_raw:
|
||||
raise RuntimeError("WORKSPACE_ID environment variable is required but not set")
|
||||
WORKSPACE_ID = _WORKSPACE_ID_raw
|
||||
# Platform URL: always host.docker.internal inside containers. The platform API
|
||||
# is only reachable via the Docker network mesh from inside a workspace
|
||||
# container regardless of the runtime environment (Docker/host).
|
||||
PLATFORM_URL = os.environ.get("PLATFORM_URL", "http://host.docker.internal:8080")
|
||||
|
||||
|
||||
async def discover(target_id: str) -> dict | None:
|
||||
"""Discover a peer workspace's URL."""
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
resp = await client.get(
|
||||
f"{PLATFORM_URL}/registry/discover/{target_id}",
|
||||
headers={"X-Workspace-ID": WORKSPACE_ID},
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
return resp.json()
|
||||
return None
|
||||
|
||||
|
||||
async def delegate(target_id: str, task: str, async_mode: bool = False):
|
||||
"""Delegate a task to another workspace."""
|
||||
peer = await discover(target_id)
|
||||
if not peer:
|
||||
print(f"Error: cannot reach workspace {target_id} (access denied or offline)", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
target_url = peer.get("url", "")
|
||||
if not target_url:
|
||||
print(f"Error: workspace {target_id} has no URL", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
task_id = str(uuid.uuid4())
|
||||
|
||||
if async_mode:
|
||||
# Async: send and return immediately, don't wait for response
|
||||
# Use a background task that fires and forgets
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
try:
|
||||
# Send with a short timeout — just confirm receipt
|
||||
resp = await client.post(
|
||||
target_url,
|
||||
json={
|
||||
"jsonrpc": "2.0",
|
||||
"id": task_id,
|
||||
"method": "message/send",
|
||||
"params": {
|
||||
"message": {
|
||||
"role": "user",
|
||||
"messageId": str(uuid.uuid4()),
|
||||
"parts": [{"kind": "text", "text": task}],
|
||||
}
|
||||
},
|
||||
},
|
||||
)
|
||||
# Even if we timeout, the task is queued on the target
|
||||
print(json.dumps({
|
||||
"task_id": task_id,
|
||||
"target": target_id,
|
||||
"status": "submitted",
|
||||
"target_url": target_url,
|
||||
}))
|
||||
except httpx.TimeoutException:
|
||||
# Request was sent but we didn't get confirmation — task may or may not have been received
|
||||
print(json.dumps({
|
||||
"task_id": task_id,
|
||||
"target": target_id,
|
||||
"status": "uncertain",
|
||||
"note": "Request sent but response timed out — delivery unconfirmed. Use 'a2a status' to check.",
|
||||
}), file=sys.stderr)
|
||||
return
|
||||
|
||||
# Sync: wait for full response with retry on rate limit
|
||||
max_retries = 3
|
||||
for attempt in range(max_retries):
|
||||
async with httpx.AsyncClient(timeout=300.0) as client:
|
||||
try:
|
||||
resp = await client.post(
|
||||
target_url,
|
||||
json={
|
||||
"jsonrpc": "2.0",
|
||||
"id": task_id,
|
||||
"method": "message/send",
|
||||
"params": {
|
||||
"message": {
|
||||
"role": "user",
|
||||
"messageId": str(uuid.uuid4()),
|
||||
"parts": [{"kind": "text", "text": task}],
|
||||
}
|
||||
},
|
||||
},
|
||||
)
|
||||
try:
|
||||
data = resp.json()
|
||||
except Exception:
|
||||
print(f"Error: invalid JSON response (status {resp.status_code})", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
if "result" in data:
|
||||
parts = data["result"].get("parts", [])
|
||||
text = parts[0].get("text", "") if parts else ""
|
||||
if text and text != "(no response generated)":
|
||||
print(text)
|
||||
return
|
||||
# Empty or no-response — might be rate limited, retry
|
||||
if attempt < max_retries - 1:
|
||||
delay = 5 * (2 ** attempt)
|
||||
print(f"(empty response, retrying in {delay}s...)", file=sys.stderr)
|
||||
await asyncio.sleep(delay)
|
||||
continue
|
||||
print(text or "(no response after retries)")
|
||||
elif "error" in data:
|
||||
error_msg = data['error'].get('message', 'unknown')
|
||||
if ("rate" in error_msg.lower() or "overloaded" in error_msg.lower()) and attempt < max_retries - 1:
|
||||
delay = 5 * (2 ** attempt)
|
||||
print(f"(rate limited, retrying in {delay}s...)", file=sys.stderr)
|
||||
await asyncio.sleep(delay)
|
||||
continue
|
||||
print(f"Error: {error_msg}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
return
|
||||
except httpx.TimeoutException:
|
||||
if attempt < max_retries - 1:
|
||||
delay = 5 * (2 ** attempt)
|
||||
print(f"(timeout, retrying in {delay}s...)", file=sys.stderr)
|
||||
await asyncio.sleep(delay)
|
||||
continue
|
||||
print("Error: request timed out after retries", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
async def check_status(target_id: str, task_id: str):
|
||||
"""Check the status of an async task."""
|
||||
peer = await discover(target_id)
|
||||
if not peer:
|
||||
print(f"Error: cannot reach workspace {target_id}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
target_url = peer.get("url", "")
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
resp = await client.post(
|
||||
target_url,
|
||||
json={
|
||||
"jsonrpc": "2.0",
|
||||
"id": str(uuid.uuid4()),
|
||||
"method": "tasks/get",
|
||||
"params": {"id": task_id},
|
||||
},
|
||||
)
|
||||
data = resp.json()
|
||||
if "result" in data:
|
||||
task = data["result"]
|
||||
status = task.get("status", {}).get("state", "unknown")
|
||||
print(f"Status: {status}")
|
||||
if status == "completed":
|
||||
artifacts = task.get("artifacts", [])
|
||||
for a in artifacts:
|
||||
for p in a.get("parts", []):
|
||||
if p.get("text"):
|
||||
print(p["text"])
|
||||
elif "error" in data:
|
||||
print(f"Error: {data['error'].get('message', 'unknown')}")
|
||||
|
||||
|
||||
async def peers():
|
||||
"""List available peers."""
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.get(f"{PLATFORM_URL}/registry/{WORKSPACE_ID}/peers")
|
||||
if resp.status_code != 200:
|
||||
print("Error: could not fetch peers", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
for p in resp.json():
|
||||
status = p.get("status", "?")
|
||||
role = p.get("role", "")
|
||||
print(f"{p['id']} {p['name']:30s} {status:10s} {role}")
|
||||
|
||||
|
||||
async def info():
|
||||
"""Get this workspace's info."""
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.get(f"{PLATFORM_URL}/workspaces/{WORKSPACE_ID}")
|
||||
if resp.status_code == 200:
|
||||
d = resp.json()
|
||||
print(f"ID: {d['id']}")
|
||||
print(f"Name: {d['name']}")
|
||||
print(f"Role: {d.get('role', '')}")
|
||||
print(f"Tier: {d['tier']}")
|
||||
print(f"Status: {d['status']}")
|
||||
print(f"Parent: {d.get('parent_id', '(root)')}")
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: a2a <command> [args]")
|
||||
print("Commands:")
|
||||
print(" delegate <workspace_id> <task> — Send task, wait for response")
|
||||
print(" delegate --async <workspace_id> <task> — Send task, return immediately")
|
||||
print(" status <workspace_id> <task_id> — Check async task status")
|
||||
print(" peers — List available peers")
|
||||
print(" info — Show workspace info")
|
||||
sys.exit(1)
|
||||
|
||||
cmd = sys.argv[1]
|
||||
|
||||
if cmd == "delegate":
|
||||
async_mode = "--async" in sys.argv
|
||||
args = [a for a in sys.argv[2:] if a != "--async"]
|
||||
if len(args) < 2:
|
||||
print("Usage: a2a delegate [--async] <workspace_id> <task>", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
asyncio.run(delegate(args[0], " ".join(args[1:]), async_mode))
|
||||
elif cmd == "status":
|
||||
if len(sys.argv) < 4:
|
||||
print("Usage: a2a status <workspace_id> <task_id>", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
asyncio.run(check_status(sys.argv[2], sys.argv[3]))
|
||||
elif cmd == "peers":
|
||||
asyncio.run(peers())
|
||||
elif cmd == "info":
|
||||
asyncio.run(info())
|
||||
else:
|
||||
print(f"Unknown command: {cmd}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__": # pragma: no cover
|
||||
main()
|
||||
@@ -1,803 +0,0 @@
|
||||
"""A2A protocol client — peer discovery, messaging, and workspace info.
|
||||
|
||||
Shared constants (WORKSPACE_ID, PLATFORM_URL) live here so that
|
||||
a2a_tools and a2a_mcp_server can import them from a single place.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
from collections import OrderedDict
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
import httpx
|
||||
|
||||
import a2a_response
|
||||
from platform_auth import auth_headers, self_source_headers
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_WORKSPACE_ID_raw = os.environ.get("WORKSPACE_ID")
|
||||
if not _WORKSPACE_ID_raw:
|
||||
raise RuntimeError("WORKSPACE_ID environment variable is required but not set")
|
||||
WORKSPACE_ID = _WORKSPACE_ID_raw
|
||||
# Platform URL: always host.docker.internal inside containers. The platform API
|
||||
# is only reachable via the Docker network mesh from inside a workspace
|
||||
# container regardless of the runtime environment (Docker/host).
|
||||
PLATFORM_URL = os.environ.get("PLATFORM_URL", "http://host.docker.internal:8080")
|
||||
|
||||
# Cache workspace ID → name mappings (populated by list_peers calls)
|
||||
_peer_names: dict[str, str] = {}
|
||||
|
||||
# Cache: peer workspace_id → the source workspace_id whose registry
|
||||
# returned that peer. Populated by ``a2a_tools.tool_list_peers`` whenever
|
||||
# it queries a specific workspace's peers — so a later
|
||||
# ``tool_delegate_task(target)`` can auto-route through the correct
|
||||
# source workspace without the agent having to specify
|
||||
# ``source_workspace_id`` explicitly.
|
||||
#
|
||||
# Single-workspace mode: dict stays empty, all delegations fall through
|
||||
# to the module-level WORKSPACE_ID (existing behavior).
|
||||
#
|
||||
# Multi-workspace mode: as the agent calls list_peers, this map is
|
||||
# populated with each peer's source. Subsequent delegate_task calls
|
||||
# auto-route. If a peer is registered under multiple sources (rare —
|
||||
# e.g. an org-wide capability) the LAST observed source wins; the agent
|
||||
# can override by passing ``source_workspace_id`` explicitly.
|
||||
_peer_to_source: dict[str, str] = {}
|
||||
|
||||
# Cache workspace ID → full peer record (id, name, role, status, url, ...).
|
||||
# Populated by tool_list_peers and by the lazy registry lookup in
|
||||
# enrich_peer_metadata. The notification-callback path (channel envelope
|
||||
# enrichment) reads this cache on every inbound peer_agent push, so the
|
||||
# read shape stays a dict-like ``__getitem__`` lookup; entries carry
|
||||
# their fetched-at timestamp so TTL eviction is in-line with the
|
||||
# lookup. ``None`` as the record is the negative-cache sentinel:
|
||||
# registry failure is cached for one TTL window so we don't re-fire
|
||||
# the 2s-bounded GET on every push from a flaky peer.
|
||||
#
|
||||
# OrderedDict + maxsize bound (#2482): pre-fix this was an unbounded
|
||||
# ``dict``, so a workspace receiving from N distinct peers across its
|
||||
# lifetime accumulated ~100 bytes/entry × N indefinitely. At 10K peers
|
||||
# that's ~1 MB; at 100K (a chatty platform-wide router) ~10 MB; not
|
||||
# crash-class but unbounded. The LRU bound caps memory + the TTL caps
|
||||
# per-entry staleness — both gates are needed because a runaway poller
|
||||
# touching N new peer_ids per push could grow within a single TTL
|
||||
# window.
|
||||
#
|
||||
# All reads / writes go through ``_peer_metadata_get`` /
|
||||
# ``_peer_metadata_set`` so the LRU move-to-end + size-trim invariants
|
||||
# stay co-located. Direct mutation is allowed only in test fixtures
|
||||
# (clearing for isolation); production code path uses the helpers.
|
||||
_PEER_METADATA_MAXSIZE = 1024
|
||||
_peer_metadata: "OrderedDict[str, tuple[float, dict | None]]" = OrderedDict()
|
||||
_peer_metadata_lock = threading.Lock()
|
||||
|
||||
# How long an entry in ``_peer_metadata`` is treated as fresh. 5 minutes
|
||||
# is the same window we use for delegation routing — long enough that a
|
||||
# busy agent receiving repeated pushes from one peer doesn't hit the
|
||||
# registry on every push, short enough that role/name renames propagate
|
||||
# within a single agent session.
|
||||
_PEER_METADATA_TTL_SECONDS = 300.0
|
||||
|
||||
|
||||
def _peer_metadata_get(canon: str) -> tuple[float, dict | None] | None:
|
||||
"""Read with LRU touch — moves the entry to the most-recently-used
|
||||
position so steady-state pushes from a busy peer don't get evicted
|
||||
by a cold-start burst from new peers. Returns the raw tuple shape
|
||||
callers expect; TTL eviction stays at the call site.
|
||||
"""
|
||||
with _peer_metadata_lock:
|
||||
entry = _peer_metadata.get(canon)
|
||||
if entry is not None:
|
||||
_peer_metadata.move_to_end(canon)
|
||||
return entry
|
||||
|
||||
|
||||
def _peer_metadata_set(canon: str, value: tuple[float, dict | None]) -> None:
|
||||
"""Write + evict-if-over-maxsize. The eviction is in-process and
|
||||
cheap (popitem(last=False) on an OrderedDict is O(1)). Holding the
|
||||
lock across the trim keeps the size invariant stable under concurrent
|
||||
writes from background enrichment workers.
|
||||
"""
|
||||
with _peer_metadata_lock:
|
||||
_peer_metadata[canon] = value
|
||||
_peer_metadata.move_to_end(canon)
|
||||
# Trim the oldest entries until at-or-below maxsize. The bound
|
||||
# is a soft cap — a single overrun (set called when at maxsize)
|
||||
# evicts the LRU entry before returning, never letting size
|
||||
# exceed maxsize.
|
||||
while len(_peer_metadata) > _PEER_METADATA_MAXSIZE:
|
||||
_peer_metadata.popitem(last=False)
|
||||
|
||||
|
||||
# Background-fetch executor for enrich_peer_metadata_nonblocking (#2484).
|
||||
# A small pool — peers are highly TTL-cached, so the steady-state load
|
||||
# is "one fetch per peer per 5 minutes." Two workers handle the cold-
|
||||
# start burst when an agent starts receiving pushes from a new peer for
|
||||
# the first time without backing up the inbox poller. Daemon threads:
|
||||
# the executor must NOT block process exit if the inbox shuts down.
|
||||
_enrich_executor: ThreadPoolExecutor | None = None
|
||||
_enrich_executor_lock = threading.Lock()
|
||||
|
||||
# In-flight peer IDs — guards against a single peer's repeated pushes
|
||||
# scheduling N concurrent registry fetches before the first one fills
|
||||
# the cache. Set membership is "a worker is currently fetching this
|
||||
# peer; subsequent calls should NOT schedule another."
|
||||
_enrich_in_flight: set[str] = set()
|
||||
_enrich_in_flight_lock = threading.Lock()
|
||||
|
||||
|
||||
def _get_enrich_executor() -> ThreadPoolExecutor:
|
||||
"""Lazy-init the enrichment worker pool. Lazy because most test
|
||||
fixtures and short-lived CLI invocations don't need it; only the
|
||||
long-running molecule-mcp / inbox-poller path actually schedules
|
||||
background fetches.
|
||||
"""
|
||||
global _enrich_executor
|
||||
if _enrich_executor is not None:
|
||||
return _enrich_executor
|
||||
with _enrich_executor_lock:
|
||||
if _enrich_executor is None:
|
||||
_enrich_executor = ThreadPoolExecutor(
|
||||
max_workers=2,
|
||||
thread_name_prefix="enrich-peer",
|
||||
)
|
||||
return _enrich_executor
|
||||
|
||||
|
||||
def enrich_peer_metadata_nonblocking(
|
||||
peer_id: str,
|
||||
source_workspace_id: str | None = None,
|
||||
) -> dict | None:
|
||||
"""Cache-first variant of ``enrich_peer_metadata`` — returns
|
||||
immediately without blocking on a registry GET.
|
||||
|
||||
Behavior:
|
||||
- Cache hit (fresh): return the cached record.
|
||||
- Cache miss or TTL expired: schedule a background fetch via the
|
||||
worker pool, return ``None`` (caller renders bare peer_id).
|
||||
The next push for this peer hits the warm cache and gets the
|
||||
full record.
|
||||
|
||||
Why this exists (#2484): the inbox poller's notification callback
|
||||
in molecule-mcp called the synchronous ``enrich_peer_metadata`` on
|
||||
every push, blocking the poller for up to 2s × N uncached peers
|
||||
per batch. Push-delivery latency was gated on registry latency —
|
||||
the exact thing the negative-cache patch in PR #2471 was supposed
|
||||
to avoid amplifying. Moving the fetch off the poller thread means
|
||||
push delivery is bounded by the inbox poll interval, never by
|
||||
registry RTT.
|
||||
|
||||
Trade-off: the FIRST push from a new peer arrives metadata-light
|
||||
(no name/role). The MCP host renders the bare peer_id. Subsequent
|
||||
pushes (within the 5-min TTL) hit the warm cache and get the full
|
||||
record. Acceptable because:
|
||||
- Channel-envelope enrichment is a UX nicety, not a correctness
|
||||
invariant.
|
||||
- The cold-cache window per peer is bounded to one push.
|
||||
- The TTL is long enough that an active conversation never
|
||||
re-enters the cold state.
|
||||
"""
|
||||
canon = _validate_peer_id(peer_id)
|
||||
if canon is None:
|
||||
return None
|
||||
# Cache hit (fresh): return without blocking on a registry GET.
|
||||
# This is the hot path for active peer conversations — avoids
|
||||
# spawning a background thread for every push from a known peer.
|
||||
current = time.monotonic()
|
||||
cached = _peer_metadata_get(canon)
|
||||
if cached is not None:
|
||||
fetched_at, record = cached
|
||||
if current - fetched_at < _PEER_METADATA_TTL_SECONDS:
|
||||
return record
|
||||
# Cache miss or TTL expired: schedule background fetch unless one is
|
||||
# already in flight for this peer. The in-flight set keeps a flurry
|
||||
# of pushes from one peer (e.g., a chatty agent) from spawning N
|
||||
# parallel GETs.
|
||||
with _enrich_in_flight_lock:
|
||||
if canon in _enrich_in_flight:
|
||||
return None
|
||||
_enrich_in_flight.add(canon)
|
||||
try:
|
||||
_get_enrich_executor().submit(
|
||||
_enrich_peer_metadata_worker, canon, source_workspace_id
|
||||
)
|
||||
except RuntimeError:
|
||||
# Executor was shut down (process exit path) — drop the request,
|
||||
# let the caller render bare peer_id.
|
||||
with _enrich_in_flight_lock:
|
||||
_enrich_in_flight.discard(canon)
|
||||
return None
|
||||
|
||||
|
||||
def _enrich_peer_metadata_worker(
|
||||
canon: str, source_workspace_id: str | None
|
||||
) -> None:
|
||||
"""Background-thread body for ``enrich_peer_metadata_nonblocking``.
|
||||
Runs the same fetch logic as the synchronous helper but discards
|
||||
the return value — the cache write is the only output anyone
|
||||
needs. Always clears the in-flight marker so a future cache miss
|
||||
can retry.
|
||||
"""
|
||||
try:
|
||||
enrich_peer_metadata(canon, source_workspace_id)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
# Background workers must not crash the executor — log and
|
||||
# move on. The negative-cache path inside enrich_peer_metadata
|
||||
# already records failures, so a re-attempt is rate-limited
|
||||
# by TTL.
|
||||
logger.debug("_enrich_peer_metadata_worker: %s failed: %s", canon, exc)
|
||||
finally:
|
||||
with _enrich_in_flight_lock:
|
||||
_enrich_in_flight.discard(canon)
|
||||
|
||||
|
||||
def _wait_for_enrichment_inflight_for_testing(timeout: float = 2.0) -> None:
|
||||
"""Block until all in-flight enrichment workers have completed.
|
||||
|
||||
Test-only helper. Production code never has a reason to wait — the
|
||||
point of the nonblocking path is that callers don't care when the
|
||||
cache fills. Tests that want to assert "after the worker runs, the
|
||||
cache has the record" use this to synchronise without sleeping.
|
||||
|
||||
Polls ``_enrich_in_flight`` rather than holding a Condition because
|
||||
the worker pool is already serializing through ``_enrich_in_flight_lock``;
|
||||
poll keeps the production hot path lock-free.
|
||||
"""
|
||||
deadline = time.monotonic() + timeout
|
||||
while time.monotonic() < deadline:
|
||||
with _enrich_in_flight_lock:
|
||||
if not _enrich_in_flight:
|
||||
return
|
||||
time.sleep(0.01)
|
||||
|
||||
|
||||
def _peer_in_flight_clear_for_testing() -> None:
|
||||
"""Clear the in-flight enrichment set. Test-only helper."""
|
||||
with _enrich_in_flight_lock:
|
||||
_enrich_in_flight.clear()
|
||||
|
||||
|
||||
def enrich_peer_metadata(
|
||||
peer_id: str,
|
||||
source_workspace_id: str | None = None,
|
||||
*,
|
||||
now: float | None = None,
|
||||
) -> dict | None:
|
||||
"""Return cached or freshly-fetched metadata for ``peer_id``.
|
||||
|
||||
Sync helper — safe to call from the inbox poller's notification
|
||||
callback thread (which is not async). Hits the in-process cache
|
||||
first; on miss or TTL expiry, GETs ``/registry/discover/<peer_id>``
|
||||
synchronously with a tight timeout. Returns None on validation
|
||||
failure, network failure, or non-200 response so callers can
|
||||
degrade gracefully (the channel envelope falls back to the raw
|
||||
``peer_id`` instead of crashing the push path).
|
||||
|
||||
Negative caching: failure outcomes (4xx/5xx/non-JSON/network
|
||||
exception) are stored as ``(now, None)`` and treated as
|
||||
fresh-but-empty for the TTL window. Without this, a peer with a
|
||||
flaky/missing registry record would re-fire the 2s-bounded GET on
|
||||
EVERY push — turning the cache into a no-op for the exact failure
|
||||
scenarios it most needs to defend against.
|
||||
|
||||
The fetched dict is stored as-is, so callers can read whatever
|
||||
fields the platform exposes (currently: ``id``, ``name``, ``role``,
|
||||
``status``, ``url``). New fields surface automatically without a
|
||||
code change here.
|
||||
"""
|
||||
canon = _validate_peer_id(peer_id)
|
||||
if canon is None:
|
||||
return None
|
||||
|
||||
current = now if now is not None else time.monotonic()
|
||||
cached = _peer_metadata_get(canon)
|
||||
if cached is not None:
|
||||
fetched_at, record = cached
|
||||
if current - fetched_at < _PEER_METADATA_TTL_SECONDS:
|
||||
# Fresh entry — return whatever's there. ``None`` is the
|
||||
# negative-cache sentinel: caller treats absence of fields
|
||||
# the same as a registry miss, which is the desired UX.
|
||||
return record
|
||||
|
||||
src = (source_workspace_id or "").strip() or WORKSPACE_ID
|
||||
url = f"{PLATFORM_URL}/registry/discover/{canon}"
|
||||
try:
|
||||
with httpx.Client(timeout=2.0) as client:
|
||||
resp = client.get(url, headers={"X-Workspace-ID": src, **auth_headers(src)})
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.debug("enrich_peer_metadata: GET %s failed: %s", url, exc)
|
||||
_peer_metadata_set(canon, (current, None))
|
||||
return None
|
||||
|
||||
if resp.status_code != 200:
|
||||
logger.debug(
|
||||
"enrich_peer_metadata: %s returned HTTP %d", url, resp.status_code
|
||||
)
|
||||
_peer_metadata_set(canon, (current, None))
|
||||
return None
|
||||
|
||||
try:
|
||||
data = resp.json()
|
||||
except Exception: # noqa: BLE001
|
||||
_peer_metadata_set(canon, (current, None))
|
||||
return None
|
||||
if not isinstance(data, dict):
|
||||
_peer_metadata_set(canon, (current, None))
|
||||
return None
|
||||
|
||||
_peer_metadata_set(canon, (current, data))
|
||||
if name := data.get("name"):
|
||||
_peer_names[canon] = name
|
||||
return data
|
||||
|
||||
|
||||
def _agent_card_url_for(peer_id: str) -> str:
|
||||
"""Construct the platform-side agent-card URL for ``peer_id``.
|
||||
|
||||
Returns the empty string when ``peer_id`` is not a UUID — same
|
||||
trust-boundary rationale as ``discover_peer``: never interpolate
|
||||
path-traversal characters into a URL. An invalid id reflected back
|
||||
to the receiving agent as ``…/registry/discover/../../foo`` is a
|
||||
foothold we close at construction time.
|
||||
|
||||
Uses the registry's discovery path so the agent receiving a push
|
||||
can hit a single endpoint to enumerate the sender's capabilities
|
||||
+ role + URL. Same shape every workspace exposes regardless of
|
||||
runtime — claude-code, hermes, langchain wrappers all register
|
||||
through ``/registry/register`` and surface through ``/registry/discover``.
|
||||
"""
|
||||
safe_id = _validate_peer_id(peer_id)
|
||||
if safe_id is None:
|
||||
return ""
|
||||
return f"{PLATFORM_URL}/registry/discover/{safe_id}"
|
||||
|
||||
# Sentinel prefix for errors originating from send_a2a_message / child agents.
|
||||
# Used by delegate_task to distinguish real errors from normal response text.
|
||||
_A2A_ERROR_PREFIX = "[A2A_ERROR] "
|
||||
|
||||
# Sentinel prefix for queued-for-poll-mode-peer outcomes (#2967).
|
||||
# When the target workspace is registered as delivery_mode=poll (no
|
||||
# public URL — typical for external molecule-mcp standalone runtimes),
|
||||
# the platform's a2a_proxy.go:402 short-circuit returns a synthetic
|
||||
# {"status":"queued","delivery_mode":"poll","method":"..."} envelope
|
||||
# instead of dispatching over HTTP. The message IS delivered (written
|
||||
# to the platform's inbox queue); there's just no synchronous reply
|
||||
# to relay. Pre-#2967 the client treated this as "unexpected response
|
||||
# shape" → caller saw DELEGATION FAILED → retried → recipient saw
|
||||
# duplicates. The Queued prefix lets callers branch on this outcome
|
||||
# explicitly: "delivered async, no synchronous reply expected" is
|
||||
# different from both success-with-text and failure.
|
||||
_A2A_QUEUED_PREFIX = "[A2A_QUEUED] "
|
||||
|
||||
# Workspace IDs are UUIDs everywhere we generate them (platform's
|
||||
# workspaces.id column, /registry/discover/:id route param, etc.) but
|
||||
# the agent-facing tool surface receives them as free-form strings via
|
||||
# tool args. ``_validate_peer_id`` enforces UUID-shape at the
|
||||
# trust boundary so we never interpolate `..` or `/` into a URL path,
|
||||
# never silently coerce malformed input into a 404, and surface a
|
||||
# clear error to the agent rather than letting an HTTP 4xx bubble up
|
||||
# from the platform with a generic error message.
|
||||
#
|
||||
# Lenient on case + whitespace because real-world peer-id strings
|
||||
# come from list_peers/discover_peer responses (canonical lowercase)
|
||||
# or hand-typed agent input (mixed-case acceptable). Strict on
|
||||
# everything else.
|
||||
_UUID_RE = re.compile(
|
||||
r"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$"
|
||||
)
|
||||
|
||||
|
||||
def _validate_peer_id(peer_id: str) -> str | None:
|
||||
"""Return the canonicalised peer_id if valid, else None.
|
||||
|
||||
Returning None instead of raising so callers in tool surfaces can
|
||||
convert to a friendly agent-facing string ("workspace_id is not a
|
||||
valid UUID") rather than crashing with a stack trace.
|
||||
"""
|
||||
if not isinstance(peer_id, str):
|
||||
return None
|
||||
pid = peer_id.strip()
|
||||
if not _UUID_RE.match(pid):
|
||||
return None
|
||||
return pid.lower()
|
||||
|
||||
|
||||
async def discover_peer(target_id: str, source_workspace_id: str | None = None) -> dict | None:
|
||||
"""Discover a peer workspace's URL via the platform registry.
|
||||
|
||||
Validates ``target_id`` is a UUID before constructing the URL — a
|
||||
malformed id can't reach the platform handler now, which both
|
||||
short-circuits an avoidable round-trip AND ensures we never
|
||||
interpolate path-traversal characters into the URL.
|
||||
|
||||
``source_workspace_id`` selects which registered workspace asks the
|
||||
question — both the X-Workspace-ID header AND the Authorization
|
||||
bearer token must come from the same workspace, otherwise the
|
||||
platform's TenantGuard rejects the request. Defaults to the
|
||||
module-level WORKSPACE_ID for back-compat with single-workspace
|
||||
callers.
|
||||
"""
|
||||
safe_id = _validate_peer_id(target_id)
|
||||
if safe_id is None:
|
||||
return None
|
||||
src = (source_workspace_id or "").strip() or WORKSPACE_ID
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
try:
|
||||
resp = await client.get(
|
||||
f"{PLATFORM_URL}/registry/discover/{safe_id}",
|
||||
headers={"X-Workspace-ID": src, **auth_headers(src)},
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
return resp.json()
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Discovery failed for {target_id}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
# httpx exception classes that indicate a transient transport-layer
|
||||
# failure worth retrying — the request never produced an application
|
||||
# response, so a fresh attempt has a real chance of succeeding. Any
|
||||
# error not in this tuple is treated as deterministic (HTTP-status,
|
||||
# JSON parse, runtime-returned JSON-RPC error, etc.) and surfaced to
|
||||
# the caller on the first try.
|
||||
#
|
||||
# Why each one belongs here:
|
||||
# - ConnectError / ConnectTimeout: peer's listening socket wasn't
|
||||
# ready (mid-restart, not yet bound). Fast failure, fast recovery.
|
||||
# - RemoteProtocolError: peer closed the TCP connection without
|
||||
# writing a response — observed on 2026-04-27 when a peer's prior
|
||||
# in-flight Claude SDK session aborted and the new request's
|
||||
# connection was reset mid-handler.
|
||||
# - ReadError / WriteError: TCP read/write socket error mid-flight,
|
||||
# typically a network blip on the Docker bridge or a peer worker
|
||||
# crash.
|
||||
# - ReadTimeout: peer didn't write ANY response bytes within the
|
||||
# 300s read budget. Distinct from "peer is slow but progressing"
|
||||
# (which httpx surfaces as a successful read with chunked bytes).
|
||||
# Retry budget caps the worst case — see _DELEGATE_TOTAL_BUDGET_S.
|
||||
_TRANSIENT_HTTP_ERRORS: tuple[type[Exception], ...] = (
|
||||
httpx.ConnectError,
|
||||
httpx.ConnectTimeout,
|
||||
httpx.ReadError,
|
||||
httpx.WriteError,
|
||||
httpx.RemoteProtocolError,
|
||||
httpx.ReadTimeout,
|
||||
)
|
||||
|
||||
# Retry budget. Up to 5 attempts (1 initial + 4 retries) with
|
||||
# exponential backoff (1, 2, 4, 8 seconds), each backoff jittered ±25%
|
||||
# to prevent synchronized retry storms across siblings if a peer flaps.
|
||||
# _DELEGATE_TOTAL_BUDGET_S caps cumulative wall-clock so a string of
|
||||
# ReadTimeouts can't make the caller wait 25 minutes — once the
|
||||
# deadline elapses we stop retrying even if attempts remain. 600s = 10
|
||||
# minutes is the agreed worst case the caller can tolerate before
|
||||
# falling back to "peer unavailable" handling in tool_delegate_task.
|
||||
_DELEGATE_MAX_ATTEMPTS = 5
|
||||
_DELEGATE_BACKOFF_BASE_S = 1.0
|
||||
_DELEGATE_BACKOFF_CAP_S = 16.0
|
||||
_DELEGATE_TOTAL_BUDGET_S = 600.0
|
||||
|
||||
|
||||
def _delegate_backoff_seconds(attempt_zero_indexed: int) -> float:
|
||||
"""Return the (jittered) backoff delay before retrying after the
|
||||
given attempt index (0 = backoff before retry #1).
|
||||
|
||||
Pure function so the schedule is unit-testable without monkey-
|
||||
patching asyncio.sleep. Jitter is symmetric ±25% on top of the
|
||||
capped exponential — enough to break sync across simultaneous
|
||||
callers without making the schedule unpredictable.
|
||||
"""
|
||||
base = min(_DELEGATE_BACKOFF_BASE_S * (2 ** attempt_zero_indexed), _DELEGATE_BACKOFF_CAP_S)
|
||||
jitter = base * (0.5 * random.random() - 0.25)
|
||||
return max(0.0, base + jitter)
|
||||
|
||||
|
||||
def _format_a2a_error(exc: BaseException, target_url: str) -> str:
|
||||
"""Format an httpx exception as an [A2A_ERROR] string.
|
||||
|
||||
Some httpx exceptions stringify to empty (RemoteProtocolError,
|
||||
ConnectionReset variants) — the canvas would then render
|
||||
"[A2A_ERROR] " with no detail and the operator has no signal to
|
||||
act on. Always include the exception class name and the target
|
||||
URL so the activity log + Agent Comms panel have actionable
|
||||
information without a trip through container logs.
|
||||
"""
|
||||
msg = str(exc).strip()
|
||||
type_name = type(exc).__name__
|
||||
if not msg:
|
||||
detail = f"{type_name} (no message — likely connection reset or silent timeout)"
|
||||
elif msg.startswith(f"{type_name}:") or msg.startswith(f"{type_name} "):
|
||||
# Already prefixed with the type — don't double-prefix.
|
||||
# Prefix-anchored check (not substring) so a message that
|
||||
# happens to mention some OTHER class name mid-string
|
||||
# (e.g. "got OSError on read") doesn't suppress our own
|
||||
# type prefix and lose the diagnostic signal.
|
||||
detail = msg
|
||||
else:
|
||||
detail = f"{type_name}: {msg}"
|
||||
return f"{_A2A_ERROR_PREFIX}{detail} [target={target_url}]"
|
||||
|
||||
|
||||
async def send_a2a_message(peer_id: str, message: str, source_workspace_id: str | None = None) -> str:
|
||||
"""Send an A2A ``message/send`` to a peer workspace via the platform proxy.
|
||||
|
||||
The target URL is constructed internally as
|
||||
``${PLATFORM_URL}/workspaces/{peer_id}/a2a``. Going through the
|
||||
platform's A2A proxy is the only path that works for both
|
||||
in-container and external runtimes — see
|
||||
a2a_tools.tool_delegate_task for the rationale.
|
||||
|
||||
``source_workspace_id`` is the SENDING workspace — drives both the
|
||||
X-Workspace-ID source-tagging header and the bearer token. Defaults
|
||||
to the module-level WORKSPACE_ID for back-compat. Multi-workspace
|
||||
operators pass it explicitly so each registered workspace's peers
|
||||
are reached via their own auth chain.
|
||||
|
||||
Auto-retries up to _DELEGATE_MAX_ATTEMPTS times on transient
|
||||
transport-layer errors (RemoteProtocolError, ConnectError,
|
||||
ReadTimeout, etc.) with exponential-backoff + jitter, capped by
|
||||
_DELEGATE_TOTAL_BUDGET_S. Application-level failures (HTTP 4xx,
|
||||
JSON-RPC error response, malformed JSON) are NOT retried — they
|
||||
indicate a deterministic problem retry won't fix.
|
||||
"""
|
||||
safe_id = _validate_peer_id(peer_id)
|
||||
if safe_id is None:
|
||||
return f"{_A2A_ERROR_PREFIX}invalid peer_id (expected UUID): {peer_id!r}"
|
||||
src = (source_workspace_id or "").strip() or WORKSPACE_ID
|
||||
target_url = f"{PLATFORM_URL}/workspaces/{safe_id}/a2a"
|
||||
|
||||
# Fix F (Cycle 5 / H2 — flagged 5 consecutive audits): timeout=None allowed
|
||||
# a hung upstream to block the agent indefinitely. Use a generous but bounded
|
||||
# timeout: 30s connect + 300s read (long enough for slow LLM responses).
|
||||
timeout_cfg = httpx.Timeout(connect=30.0, read=300.0, write=30.0, pool=30.0)
|
||||
deadline = time.monotonic() + _DELEGATE_TOTAL_BUDGET_S
|
||||
last_exc: BaseException | None = None
|
||||
|
||||
for attempt in range(_DELEGATE_MAX_ATTEMPTS):
|
||||
async with httpx.AsyncClient(timeout=timeout_cfg) as client:
|
||||
try:
|
||||
# self_source_headers() includes X-Workspace-ID so the
|
||||
# platform's a2a_receive logger records source_id =
|
||||
# WORKSPACE_ID. Otherwise peer-A2A messages — including
|
||||
# the case where target_url resolves to this workspace's
|
||||
# own /a2a — get logged with source_id=NULL and surface
|
||||
# in the recipient's My Chat tab as user-typed input.
|
||||
resp = await client.post(
|
||||
target_url,
|
||||
headers=self_source_headers(src),
|
||||
json={
|
||||
"jsonrpc": "2.0",
|
||||
"id": str(uuid.uuid4()),
|
||||
"method": "message/send",
|
||||
"params": {
|
||||
"message": {
|
||||
"role": "user",
|
||||
"messageId": str(uuid.uuid4()),
|
||||
"parts": [{"kind": "text", "text": message}],
|
||||
}
|
||||
},
|
||||
},
|
||||
)
|
||||
data = resp.json()
|
||||
# Dispatch via the SSOT response model (a2a_response.py).
|
||||
# All shape detection lives in one place — the parser
|
||||
# never raises and routes unknown shapes to Malformed
|
||||
# so a future server-side change is loud, not silent.
|
||||
variant = a2a_response.parse(data)
|
||||
if isinstance(variant, a2a_response.Result):
|
||||
# Match legacy semantics:
|
||||
# parts non-empty + first part has no text → ""
|
||||
# parts empty → "(no response)"
|
||||
# Differentiation matters for callers that assert
|
||||
# on the empty-string case (test_a2a_client).
|
||||
if variant.parts:
|
||||
text = variant.text
|
||||
else:
|
||||
text = "(no response)"
|
||||
# Tag child-reported errors so the caller can
|
||||
# detect them reliably — agent-side bug surfaces
|
||||
# text like "Agent error: <traceback>" inside a
|
||||
# JSON-RPC success envelope.
|
||||
if text.startswith("Agent error:"):
|
||||
return f"{_A2A_ERROR_PREFIX}{text}"
|
||||
return text
|
||||
if isinstance(variant, a2a_response.Queued):
|
||||
# Poll-mode peer — message accepted into the inbox
|
||||
# queue, target agent will fetch via poll. NOT a
|
||||
# failure. Return the queued sentinel so callers
|
||||
# (delegate_task etc.) can render the outcome
|
||||
# accurately instead of treating it as an error.
|
||||
logger.info(
|
||||
"send_a2a_message: queued for poll-mode peer (target=%s method=%s)",
|
||||
target_url,
|
||||
variant.method,
|
||||
)
|
||||
return f"{_A2A_QUEUED_PREFIX}target={safe_id} method={variant.method}"
|
||||
if isinstance(variant, a2a_response.Error):
|
||||
msg = variant.message
|
||||
code = variant.code
|
||||
if msg and code is not None:
|
||||
detail = f"{msg} (code={code})"
|
||||
elif msg:
|
||||
detail = msg
|
||||
elif code is not None:
|
||||
detail = f"JSON-RPC error with no message (code={code})"
|
||||
else:
|
||||
detail = "JSON-RPC error with no message"
|
||||
if variant.restarting:
|
||||
# Surface platform-restart-in-progress
|
||||
# explicitly — caller (UI / delegating agent)
|
||||
# can render a softer "agent is restarting"
|
||||
# message rather than a generic failure.
|
||||
retry = (
|
||||
f", retry_after={variant.retry_after}s"
|
||||
if variant.retry_after is not None
|
||||
else ""
|
||||
)
|
||||
detail = f"{detail} (restarting{retry})"
|
||||
return f"{_A2A_ERROR_PREFIX}{detail} [target={target_url}]"
|
||||
# Malformed — log loud + surface as error so the
|
||||
# operator notices a server change. SSOT refactor
|
||||
# subsumes the inline "queued" check that landed in
|
||||
# the #2972 hotfix; that branch is now the typed
|
||||
# Queued variant above.
|
||||
logger.warning(
|
||||
"send_a2a_message: malformed response (target=%s body=%.200s)",
|
||||
target_url,
|
||||
str(variant.raw),
|
||||
)
|
||||
return (
|
||||
f"{_A2A_ERROR_PREFIX}unexpected response shape "
|
||||
f"(no result, error, or queued envelope): "
|
||||
f"{str(variant.raw)[:200]} [target={target_url}]"
|
||||
)
|
||||
except _TRANSIENT_HTTP_ERRORS as e:
|
||||
last_exc = e
|
||||
attempts_remaining = _DELEGATE_MAX_ATTEMPTS - (attempt + 1)
|
||||
if attempts_remaining <= 0 or time.monotonic() >= deadline:
|
||||
# Out of attempts OR out of total budget — surface
|
||||
# the last error to the caller.
|
||||
break
|
||||
delay = _delegate_backoff_seconds(attempt)
|
||||
# Don't sleep past the deadline — clamp.
|
||||
remaining = deadline - time.monotonic()
|
||||
if delay > remaining:
|
||||
delay = max(0.0, remaining)
|
||||
logger.warning(
|
||||
"send_a2a_message: transient %s on attempt %d/%d, retrying in %.1fs (target=%s)",
|
||||
type(e).__name__,
|
||||
attempt + 1,
|
||||
_DELEGATE_MAX_ATTEMPTS,
|
||||
delay,
|
||||
target_url,
|
||||
)
|
||||
await asyncio.sleep(delay)
|
||||
continue
|
||||
except Exception as e:
|
||||
# Non-transient (HTTP-status, JSON parse, etc.) — don't retry.
|
||||
return _format_a2a_error(e, target_url)
|
||||
# Retries exhausted (or budget elapsed). last_exc must be set
|
||||
# because we only break out of the loop after assigning it.
|
||||
assert last_exc is not None # noqa: S101
|
||||
return _format_a2a_error(last_exc, target_url)
|
||||
|
||||
|
||||
async def get_peers_with_diagnostic(source_workspace_id: str | None = None) -> tuple[list[dict], str | None]:
|
||||
"""Get this workspace's peers, returning (peers, diagnostic).
|
||||
|
||||
diagnostic is None when the call succeeded (status 200, even if the list
|
||||
is empty). When peers is [] for a non-trivial reason (auth failure,
|
||||
workspace-id missing from registry, platform error, network error),
|
||||
diagnostic is a short human-readable string explaining what went wrong
|
||||
so callers can surface it instead of "may be isolated" — see #2397.
|
||||
|
||||
``source_workspace_id`` selects which registered workspace's peers to
|
||||
enumerate; defaults to the module-level WORKSPACE_ID for
|
||||
single-workspace back-compat. Multi-workspace operators iterate over
|
||||
each registered workspace separately so each set of peers is fetched
|
||||
with the correct auth.
|
||||
|
||||
The legacy get_peers() shim below preserves the bare-list contract for
|
||||
non-tool callers.
|
||||
"""
|
||||
src = (source_workspace_id or "").strip() or WORKSPACE_ID
|
||||
url = f"{PLATFORM_URL}/registry/{src}/peers"
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
try:
|
||||
resp = await client.get(
|
||||
url,
|
||||
headers={"X-Workspace-ID": src, **auth_headers(src)},
|
||||
)
|
||||
except Exception as e:
|
||||
return [], f"Cannot reach platform at {PLATFORM_URL}: {e}"
|
||||
|
||||
if resp.status_code == 200:
|
||||
try:
|
||||
data = resp.json()
|
||||
except Exception as e:
|
||||
return [], f"Platform returned 200 but body was not JSON: {e}"
|
||||
if not isinstance(data, list):
|
||||
return [], f"Platform returned 200 but body was not a list: {type(data).__name__}"
|
||||
return data, None
|
||||
|
||||
if resp.status_code in (401, 403):
|
||||
return [], (
|
||||
f"Authentication to platform failed (HTTP {resp.status_code}). "
|
||||
"The workspace bearer token may be invalid — restarting the workspace usually re-mints it."
|
||||
)
|
||||
if resp.status_code == 404:
|
||||
return [], (
|
||||
f"Workspace ID {WORKSPACE_ID} is not registered with the platform (HTTP 404). "
|
||||
"Re-registration via the platform's /registry/register endpoint is needed."
|
||||
)
|
||||
if 500 <= resp.status_code < 600:
|
||||
return [], f"Platform error: HTTP {resp.status_code}."
|
||||
return [], f"Unexpected platform response: HTTP {resp.status_code}."
|
||||
|
||||
|
||||
async def get_peers() -> list[dict]:
|
||||
"""Get this workspace's peers from the platform registry.
|
||||
|
||||
Bare-list shim over get_peers_with_diagnostic() — discards the diagnostic
|
||||
so callers that don't care about the failure reason (e.g. system-prompt
|
||||
bootstrap formatters) get the same shape they always had.
|
||||
"""
|
||||
peers, _ = await get_peers_with_diagnostic()
|
||||
return peers
|
||||
|
||||
|
||||
async def get_workspace_info(source_workspace_id: str | None = None) -> dict:
|
||||
"""Get this workspace's info from the platform.
|
||||
|
||||
``source_workspace_id`` selects which registered workspace to
|
||||
introspect when the agent is registered into multiple workspaces
|
||||
(multi-workspace mode). Unset → defaults to the module-level
|
||||
WORKSPACE_ID — single-workspace operators see no behaviour change.
|
||||
|
||||
Distinguishes three failure shapes so callers can handle them
|
||||
distinctly (#2429):
|
||||
- 410 Gone → workspace was deleted; re-onboard required
|
||||
- 404 / other → workspace never existed (or transient)
|
||||
- exception → network / auth failure
|
||||
"""
|
||||
src = source_workspace_id or WORKSPACE_ID
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
try:
|
||||
resp = await client.get(
|
||||
f"{PLATFORM_URL}/workspaces/{src}",
|
||||
headers=auth_headers(src),
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
return resp.json()
|
||||
if resp.status_code == 410:
|
||||
# #2429: platform returns 410 when status='removed'.
|
||||
# Surface "removed" + the actionable hint so callers
|
||||
# can prompt re-onboard instead of falling through to
|
||||
# "not found" — which made the 2026-04-30 incident
|
||||
# impossible to diagnose ("workspace not found" with
|
||||
# a workspace_id we KNEW we'd just registered).
|
||||
try:
|
||||
body = resp.json()
|
||||
except Exception:
|
||||
body = {}
|
||||
return {
|
||||
"error": "removed",
|
||||
"id": body.get("id", src),
|
||||
"removed_at": body.get("removed_at"),
|
||||
"hint": body.get(
|
||||
"hint",
|
||||
"Workspace was deleted on the platform. "
|
||||
"Regenerate workspace + token from the canvas → Tokens tab.",
|
||||
),
|
||||
}
|
||||
return {"error": "not found"}
|
||||
except Exception as e:
|
||||
return {"error": str(e)}
|
||||
@@ -1,567 +0,0 @@
|
||||
"""Bridge between LangGraph agent and A2A protocol, with SSE streaming support.
|
||||
|
||||
SSE streaming architecture
|
||||
--------------------------
|
||||
The A2A SDK (``DefaultRequestHandler`` + ``EventQueue``) owns the SSE transport
|
||||
layer. This executor's job is to push the right event types into the queue as
|
||||
work progresses:
|
||||
|
||||
1. ``TaskStatusUpdateEvent(state=working)`` — immediately signals start
|
||||
2. ``TaskArtifactUpdateEvent(chunk, append=…)`` — one per LLM text token
|
||||
3. ``Message(final_text)`` — terminal event
|
||||
|
||||
Client compatibility
|
||||
--------------------
|
||||
*Non-streaming* (``message/send``):
|
||||
``ResultAggregator.consume_all()`` processes status/artifact events
|
||||
(updating the task in the store) and returns the final ``Message``
|
||||
immediately — backward-compatible with ``a2a_client.py`` which reads
|
||||
``data["result"]["parts"][0]["text"]``.
|
||||
|
||||
*Streaming* (``message/stream``):
|
||||
``consume_and_emit()`` yields every event above as SSE, letting the client
|
||||
render tokens in real time.
|
||||
|
||||
LangGraph integration
|
||||
---------------------
|
||||
Uses ``agent.astream_events(version="v2")`` to receive ``on_chat_model_stream``
|
||||
events with ``AIMessageChunk`` payloads. Text is extracted from both plain
|
||||
strings (OpenAI / Groq) and Anthropic-style content-block lists. Non-text
|
||||
content (tool_use, etc.) is silently skipped. A fresh ``artifact_id`` is
|
||||
generated for each new LLM ``run_id`` so tool-call cycles are grouped cleanly.
|
||||
"""
|
||||
|
||||
import functools
|
||||
import logging
|
||||
import os
|
||||
import uuid
|
||||
|
||||
from a2a.server.agent_execution import AgentExecutor, RequestContext
|
||||
from a2a.server.events import EventQueue
|
||||
from a2a.server.tasks import TaskUpdater
|
||||
from a2a.types import Part
|
||||
# KI-009: a2a-sdk v1 renames a2a.utils → a2a.helpers; TextPart removed (Part takes text= directly)
|
||||
from a2a.helpers import new_text_message
|
||||
from shared_runtime import (
|
||||
extract_history as _extract_history,
|
||||
extract_message_text,
|
||||
brief_task,
|
||||
set_current_task,
|
||||
)
|
||||
from executor_helpers import (
|
||||
collect_outbound_files,
|
||||
extract_attached_files,
|
||||
read_delegation_results,
|
||||
sanitize_agent_error,
|
||||
)
|
||||
from builtin_tools.telemetry import (
|
||||
A2A_TASK_ID,
|
||||
GEN_AI_OPERATION_NAME,
|
||||
GEN_AI_REQUEST_MODEL,
|
||||
GEN_AI_SYSTEM,
|
||||
WORKSPACE_ID_ATTR,
|
||||
_incoming_trace_context,
|
||||
gen_ai_system_from_model,
|
||||
get_tracer,
|
||||
record_llm_token_usage,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_WORKSPACE_ID = os.environ.get("WORKSPACE_ID", "unknown")
|
||||
|
||||
# LangGraph ReAct cycle budget per turn. Library default is 25; 500 covers
|
||||
# PM fan-outs (plan → 6 delegations → 6 awaits → 6 results → synthesize ≈
|
||||
# 30+ steps even before retries). Overridable via LANGGRAPH_RECURSION_LIMIT.
|
||||
DEFAULT_RECURSION_LIMIT = 500
|
||||
|
||||
|
||||
def _parse_recursion_limit() -> int:
|
||||
"""Read LANGGRAPH_RECURSION_LIMIT; fall back to DEFAULT_RECURSION_LIMIT
|
||||
with a WARNING log on any unparseable or non-positive value."""
|
||||
raw = os.environ.get("LANGGRAPH_RECURSION_LIMIT", "")
|
||||
if not raw:
|
||||
return DEFAULT_RECURSION_LIMIT
|
||||
try:
|
||||
n = int(raw)
|
||||
except ValueError:
|
||||
logger.warning(
|
||||
"LANGGRAPH_RECURSION_LIMIT=%r is not an integer; using default %d",
|
||||
raw, DEFAULT_RECURSION_LIMIT,
|
||||
)
|
||||
return DEFAULT_RECURSION_LIMIT
|
||||
if n <= 0:
|
||||
logger.warning(
|
||||
"LANGGRAPH_RECURSION_LIMIT=%d is not positive; using default %d",
|
||||
n, DEFAULT_RECURSION_LIMIT,
|
||||
)
|
||||
return DEFAULT_RECURSION_LIMIT
|
||||
return n
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Compliance (OWASP Top 10 for Agentic Apps) — optional, lazy-loaded
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
try:
|
||||
from builtin_tools.compliance import (
|
||||
AgencyTracker,
|
||||
ExcessiveAgencyError,
|
||||
PromptInjectionError,
|
||||
redact_pii as _redact_pii,
|
||||
sanitize_input as _sanitize_input,
|
||||
)
|
||||
_COMPLIANCE_AVAILABLE = True
|
||||
except ImportError: # pragma: no cover
|
||||
_COMPLIANCE_AVAILABLE = False
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=1)
|
||||
def _get_compliance_cfg():
|
||||
"""Return ComplianceConfig or None (cached for process lifetime)."""
|
||||
try:
|
||||
from config import load_config
|
||||
return load_config().compliance
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _extract_chunk_text(content) -> list[str]:
|
||||
"""Extract text strings from an LLM streaming chunk's content field.
|
||||
|
||||
Handles both provider content styles:
|
||||
- OpenAI / Groq: ``content`` is a plain ``str`` (empty for tool-call chunks).
|
||||
- Anthropic: ``content`` is a list of typed blocks, e.g.
|
||||
``[{"type": "text", "text": "Hello"}, {"type": "tool_use", ...}]``
|
||||
|
||||
Only ``"text"`` blocks are returned; ``tool_use``, ``tool_result``, and
|
||||
other non-text blocks are filtered out so raw tool JSON never appears in
|
||||
the SSE stream.
|
||||
|
||||
Args:
|
||||
content: ``chunk.content`` value from an ``on_chat_model_stream`` event.
|
||||
|
||||
Returns:
|
||||
List of non-empty text strings.
|
||||
"""
|
||||
if isinstance(content, str):
|
||||
return [content] if content else []
|
||||
if isinstance(content, list):
|
||||
texts: list[str] = []
|
||||
for block in content:
|
||||
if isinstance(block, dict) and block.get("type") == "text":
|
||||
text = block.get("text", "")
|
||||
if text:
|
||||
texts.append(text)
|
||||
elif isinstance(block, str) and block:
|
||||
texts.append(block)
|
||||
return texts
|
||||
return []
|
||||
|
||||
|
||||
class LangGraphA2AExecutor(AgentExecutor):
|
||||
"""Bridges LangGraph agent to A2A event model with SSE streaming support.
|
||||
|
||||
Always uses ``agent.astream_events()`` so that:
|
||||
- Streaming clients (``message/stream``) receive token-level SSE events.
|
||||
- Non-streaming clients (``message/send``) receive the final ``Message``
|
||||
collected from the same stream — no duplicate LLM call, full compat.
|
||||
"""
|
||||
|
||||
def __init__(self, agent, heartbeat=None, model: str = "unknown"):
|
||||
self.agent = agent # Compiled LangGraph graph (create_react_agent output)
|
||||
self._heartbeat = heartbeat
|
||||
self._model = model # e.g. "anthropic:claude-sonnet-4-6"
|
||||
|
||||
async def execute(self, context: RequestContext, event_queue: EventQueue) -> None:
|
||||
"""Execute a task from an A2A request with SSE streaming.
|
||||
|
||||
Routes through the Temporal durable workflow when a global
|
||||
``TemporalWorkflowWrapper`` is initialised and connected to Temporal;
|
||||
otherwise falls back to ``_core_execute()`` (direct path).
|
||||
|
||||
Event emission sequence:
|
||||
1. TaskStatusUpdateEvent(working) — immediate start signal
|
||||
2. TaskArtifactUpdateEvent chunks — token-by-token via astream_events
|
||||
3. Message(final_text) — terminal; non-streaming clients
|
||||
return on this; streaming clients
|
||||
also receive it as the last SSE event.
|
||||
"""
|
||||
# ── Optional Temporal durable execution wrapper ──────────────────────
|
||||
# When a TemporalWorkflowWrapper is active this routes execution through
|
||||
# a MoleculeAIAgentWorkflow (task_receive → llm_call → task_complete).
|
||||
# Falls back silently to _core_execute() on any error or if Temporal
|
||||
# is unavailable, so the client always receives a response.
|
||||
try:
|
||||
from builtin_tools.temporal_workflow import get_wrapper as _get_temporal_wrapper
|
||||
|
||||
_tw = _get_temporal_wrapper()
|
||||
if _tw is not None and _tw.is_available():
|
||||
return await _tw.run(self, context, event_queue)
|
||||
except Exception:
|
||||
pass # Never let the wrapper path crash the executor
|
||||
|
||||
await self._core_execute(context, event_queue)
|
||||
|
||||
async def _core_execute(self, context: RequestContext, event_queue: EventQueue) -> str:
|
||||
"""Core execution pipeline — called directly or from a Temporal activity.
|
||||
|
||||
This is the original ``execute()`` body, extracted so that the Temporal
|
||||
``llm_call`` activity can invoke it without re-entering the wrapper
|
||||
check and causing infinite recursion.
|
||||
|
||||
Returns the final response text (empty string on empty input or error).
|
||||
|
||||
Event emission sequence:
|
||||
1. TaskStatusUpdateEvent(working) — immediate start signal
|
||||
2. TaskArtifactUpdateEvent chunks — token-by-token via astream_events
|
||||
3. Message(final_text) — terminal event
|
||||
"""
|
||||
user_input = extract_message_text(context)
|
||||
# Inject delegation results from prior turns. Heartbeat writes
|
||||
# completed delegation rows to DELEGATION_RESULTS_FILE and sends
|
||||
# a self-message to wake the agent; this consumes the file and
|
||||
# surfaces the results as context so the agent can act on them
|
||||
# without needing an explicit check_task_status call.
|
||||
# Results are prepended so they are visible even when the
|
||||
# self-message text is overwritten by a subsequent user message.
|
||||
pending_results = read_delegation_results()
|
||||
if pending_results:
|
||||
logger.info("A2A execute: injecting %d delegation result(s)", pending_results.count("\n") + 1)
|
||||
user_input = f"[Delegation results available]\n{pending_results}\n\n{user_input}"
|
||||
# Pull attached files from A2A message parts (kind: "file") and
|
||||
# append a manifest to the prompt so the agent knows they exist.
|
||||
# LangGraph tools (filesystem, bash, skills) can then open the
|
||||
# files by path — without this the agent silently ignores the
|
||||
# attachments and replies "I'm not sure what you're referring to".
|
||||
_attached_files = extract_attached_files(getattr(context, "message", None))
|
||||
if _attached_files:
|
||||
_manifest = "\n\nAttached files:\n" + "\n".join(
|
||||
f"- {f['name']} ({f['mime_type'] or 'unknown type'}) at {f['path']}"
|
||||
for f in _attached_files
|
||||
)
|
||||
user_input = (user_input + _manifest) if user_input else _manifest.lstrip()
|
||||
if not user_input:
|
||||
parts = getattr(getattr(context, "message", None), "parts", None)
|
||||
logger.warning("A2A execute: no text content in message parts: %s", parts)
|
||||
await event_queue.enqueue_event(
|
||||
new_text_message("Error: message contained no text content.")
|
||||
)
|
||||
return ""
|
||||
|
||||
# ── OA-01: Prompt injection check (OWASP Agentic Top 10) ────────────
|
||||
_compliance_cfg = _get_compliance_cfg() if _COMPLIANCE_AVAILABLE else None
|
||||
if _COMPLIANCE_AVAILABLE and _compliance_cfg and _compliance_cfg.mode == "owasp_agentic":
|
||||
try:
|
||||
user_input = _sanitize_input(
|
||||
user_input,
|
||||
prompt_injection_mode=_compliance_cfg.prompt_injection,
|
||||
context_id=context.context_id or "",
|
||||
)
|
||||
except PromptInjectionError as exc:
|
||||
await event_queue.enqueue_event(
|
||||
new_text_message(f"Request blocked: {exc}")
|
||||
)
|
||||
return ""
|
||||
|
||||
logger.info("A2A execute: user_input=%s", user_input[:200])
|
||||
|
||||
# ── OTEL: task_receive span ──────────────────────────────────────────
|
||||
parent_ctx = _incoming_trace_context.get()
|
||||
tracer = get_tracer()
|
||||
|
||||
_result: str = "" # captured inside the span for return after it closes
|
||||
|
||||
with tracer.start_as_current_span("task_receive", context=parent_ctx) as task_span:
|
||||
task_span.set_attribute(WORKSPACE_ID_ATTR, _WORKSPACE_ID)
|
||||
task_span.set_attribute(A2A_TASK_ID, context.context_id or "")
|
||||
task_span.set_attribute("a2a.input_preview", user_input[:256])
|
||||
|
||||
# Resolve IDs — the RequestContextBuilder always sets them, but
|
||||
# we generate fallbacks for safety (e.g. in unit tests).
|
||||
task_id = context.task_id or str(uuid.uuid4())
|
||||
context_id = context.context_id or str(uuid.uuid4())
|
||||
|
||||
# A2A v1 contract (a2a-sdk ≥ 1.0): enqueue a Task event before any
|
||||
# TaskStatusUpdateEvent. The framework only auto-creates the Task
|
||||
# on continuation messages (existing task_id resolves via
|
||||
# task_manager.get_task()). For fresh requests get_task() returns
|
||||
# None and the SDK rejects the first status update with
|
||||
# InvalidAgentResponseError("Agent should enqueue Task before
|
||||
# TaskStatusUpdateEvent event") — see a2a/server/agent_execution/
|
||||
# active_task.py for the validation site. PR #2170 migrated the
|
||||
# surface to v1 but missed this contract; the synth-E2E gate
|
||||
# surfaced it on every run after staging deploy.
|
||||
if getattr(context, "current_task", None) is None:
|
||||
from a2a.types import Task, TaskState, TaskStatus
|
||||
await event_queue.enqueue_event(
|
||||
Task(
|
||||
id=task_id,
|
||||
context_id=context_id,
|
||||
status=TaskStatus(state=TaskState.TASK_STATE_SUBMITTED),
|
||||
)
|
||||
)
|
||||
|
||||
updater = TaskUpdater(event_queue, task_id, context_id)
|
||||
|
||||
try:
|
||||
# set_current_task INSIDE the try so active_tasks is always
|
||||
# decremented by the finally block even if CancelledError hits
|
||||
# during the heartbeat HTTP push. Moving it outside the try
|
||||
# created a window where cancellation left active_tasks stuck
|
||||
# at 1, permanently blocking queue drain. (#2026)
|
||||
await set_current_task(self._heartbeat, brief_task(user_input))
|
||||
messages = _extract_history(context)
|
||||
if messages:
|
||||
logger.info("A2A execute: injecting %d history messages", len(messages))
|
||||
messages.append(("human", user_input))
|
||||
|
||||
# Recursion limit: see DEFAULT_RECURSION_LIMIT and
|
||||
# _parse_recursion_limit() at module top. Re-read on every
|
||||
# call so the env var can be hot-changed between requests.
|
||||
recursion_limit = _parse_recursion_limit()
|
||||
run_config = {
|
||||
"configurable": {"thread_id": context_id},
|
||||
"run_name": f"a2a-{context_id[:8]}",
|
||||
"recursion_limit": recursion_limit,
|
||||
}
|
||||
|
||||
# ── OTEL: llm_call span ──────────────────────────────────────
|
||||
with tracer.start_as_current_span("llm_call") as llm_span:
|
||||
llm_span.set_attribute(GEN_AI_OPERATION_NAME, "chat")
|
||||
llm_span.set_attribute(GEN_AI_SYSTEM, gen_ai_system_from_model(self._model))
|
||||
llm_span.set_attribute(GEN_AI_REQUEST_MODEL, self._model)
|
||||
llm_span.set_attribute(WORKSPACE_ID_ATTR, _WORKSPACE_ID)
|
||||
|
||||
# ── Step 1: signal "working" to streaming clients ─────────
|
||||
await updater.start_work()
|
||||
|
||||
# ── Step 2: stream tokens via LangGraph astream_events ────
|
||||
# Each "on_chat_model_stream" event carries an AIMessageChunk.
|
||||
# We emit one TaskArtifactUpdateEvent per text chunk so SSE
|
||||
# clients can render tokens in real time.
|
||||
# artifact_id resets on each new LLM run_id so agent→tool→agent
|
||||
# cycles each get their own artifact slot.
|
||||
|
||||
artifact_id = str(uuid.uuid4())
|
||||
has_streamed = False # True after first chunk for current artifact
|
||||
current_run_id = None # Detects new LLM call in a ReAct cycle
|
||||
accumulated: list[str] = [] # All text for the final Message
|
||||
last_ai_message = None # Saved for token-usage telemetry
|
||||
|
||||
# ── OA-03: Excessive agency tracker ──────────────────────
|
||||
_agency = (
|
||||
AgencyTracker(
|
||||
max_tool_calls=_compliance_cfg.max_tool_calls_per_task,
|
||||
max_duration_seconds=float(_compliance_cfg.max_task_duration_seconds),
|
||||
)
|
||||
if _COMPLIANCE_AVAILABLE and _compliance_cfg and _compliance_cfg.mode == "owasp_agentic"
|
||||
else None
|
||||
)
|
||||
|
||||
# ── Tool trace: collect every tool invocation for
|
||||
# platform-level observability ────────────────────
|
||||
# Keyed by run_id so parallel tool calls (LangGraph
|
||||
# supports them) pair start→end correctly. Capped at
|
||||
# MAX_TOOL_TRACE entries to prevent runaway loops from
|
||||
# ballooning the JSONB payload.
|
||||
MAX_TOOL_TRACE = 200
|
||||
tool_trace: list[dict] = []
|
||||
tool_trace_by_run: dict[str, dict] = {}
|
||||
|
||||
async for event in self.agent.astream_events(
|
||||
{"messages": messages},
|
||||
config=run_config,
|
||||
version="v2",
|
||||
):
|
||||
kind = event.get("event", "")
|
||||
|
||||
if kind == "on_chat_model_stream":
|
||||
run_id = event.get("run_id", "")
|
||||
if run_id and run_id != current_run_id:
|
||||
# New LLM run started — fresh artifact slot
|
||||
current_run_id = run_id
|
||||
artifact_id = str(uuid.uuid4())
|
||||
has_streamed = False
|
||||
|
||||
chunk = event.get("data", {}).get("chunk")
|
||||
if chunk is not None:
|
||||
texts = _extract_chunk_text(chunk.content)
|
||||
for text in texts:
|
||||
await updater.add_artifact(
|
||||
parts=[Part(text=text)], # v1: TextPart removed, Part takes text= directly
|
||||
artifact_id=artifact_id,
|
||||
append=has_streamed, # False=first, True=append
|
||||
last_chunk=False,
|
||||
)
|
||||
has_streamed = True
|
||||
accumulated.append(text)
|
||||
|
||||
elif kind == "on_tool_start":
|
||||
tool_name = event.get("name", "?")
|
||||
tool_input = event.get("data", {}).get("input", "")
|
||||
tool_run_id = event.get("run_id", "")
|
||||
logger.debug("SSE: tool start — %s", tool_name)
|
||||
if len(tool_trace) < MAX_TOOL_TRACE:
|
||||
entry = {
|
||||
"tool": tool_name,
|
||||
"input": str(tool_input)[:500] if tool_input else "",
|
||||
}
|
||||
tool_trace.append(entry)
|
||||
if tool_run_id:
|
||||
tool_trace_by_run[tool_run_id] = entry
|
||||
if _agency is not None:
|
||||
_agency.on_tool_call(
|
||||
tool_name=tool_name,
|
||||
context_id=context_id,
|
||||
)
|
||||
|
||||
elif kind == "on_tool_end":
|
||||
tool_end_name = event.get("name", "?")
|
||||
tool_output = event.get("data", {}).get("output", "")
|
||||
tool_run_id = event.get("run_id", "")
|
||||
logger.debug("SSE: tool end — %s", tool_end_name)
|
||||
# Pair via run_id so parallel tool calls don't clobber each other.
|
||||
entry = tool_trace_by_run.get(tool_run_id) if tool_run_id else None
|
||||
if entry is not None:
|
||||
entry["output_preview"] = str(tool_output)[:300] if tool_output else ""
|
||||
|
||||
elif kind == "on_chat_model_end":
|
||||
# Capture the last completed AIMessage for token telemetry
|
||||
output = event.get("data", {}).get("output")
|
||||
if output is not None:
|
||||
last_ai_message = output
|
||||
|
||||
# Record token usage from the last completed LLM call
|
||||
if last_ai_message is not None:
|
||||
record_llm_token_usage(llm_span, {"messages": [last_ai_message]})
|
||||
|
||||
# Build final text from all accumulated streaming tokens
|
||||
final_text = "".join(accumulated).strip() or "(no response generated)"
|
||||
logger.info("A2A execute: response length=%d chars", len(final_text))
|
||||
|
||||
# ── OA-02 / OA-06: Output PII redaction ──────────────────────
|
||||
if _COMPLIANCE_AVAILABLE and _compliance_cfg and _compliance_cfg.mode == "owasp_agentic":
|
||||
final_text, _pii_types = _redact_pii(final_text)
|
||||
if _pii_types:
|
||||
from builtin_tools.audit import log_event as _audit_log
|
||||
_audit_log(
|
||||
event_type="compliance",
|
||||
action="pii.redact",
|
||||
resource="task_output",
|
||||
outcome="redacted",
|
||||
pii_types=_pii_types,
|
||||
context_id=context_id,
|
||||
)
|
||||
|
||||
# ── OTEL: task_complete span ─────────────────────────────────
|
||||
with tracer.start_as_current_span("task_complete") as done_span:
|
||||
done_span.set_attribute(WORKSPACE_ID_ATTR, _WORKSPACE_ID)
|
||||
done_span.set_attribute(A2A_TASK_ID, context_id)
|
||||
done_span.set_attribute("task.has_response", bool(accumulated))
|
||||
done_span.set_attribute("task.response_length", len(final_text))
|
||||
|
||||
# ── Step 3: emit final Message ────────────────────────────────
|
||||
# Non-streaming: ResultAggregator.consume_all() returns this
|
||||
# immediately as the response (a2a_client.py reads .parts[0].text).
|
||||
# Streaming: yielded as the last SSE event in the stream.
|
||||
#
|
||||
# If the reply mentions /workspace/... paths, stage each one
|
||||
# and emit as FileParts alongside the text so the canvas can
|
||||
# render a download button. Same contract the hermes executor
|
||||
# uses — every runtime going through this code path (langgraph,
|
||||
# deepagents, future ReAct variants) inherits it.
|
||||
_outbound = collect_outbound_files(final_text)
|
||||
if _outbound:
|
||||
# NOTE: do NOT re-import `Part` here. It is already imported
|
||||
# at module scope (line 42). A function-scope `from a2a.types
|
||||
# import ... Part ...` would mark `Part` as a local name
|
||||
# throughout this function under Python's scoping rules,
|
||||
# making the earlier `Part(text=text)` call (line ~358, inside
|
||||
# the astream_events loop) raise UnboundLocalError because
|
||||
# the local binding is not yet in scope at that point.
|
||||
#
|
||||
# a2a-sdk 1.x flattened the Part shape: 0.x used
|
||||
# `Part(root=TextPart(text=...))` / `Part(root=FilePart(file=
|
||||
# FileWithUri(uri=..., name=..., mimeType=...)))` (Pydantic
|
||||
# discriminated-union style). 1.x's Part is a single proto
|
||||
# message with flat fields: text, url, filename, media_type,
|
||||
# raw, data, metadata. TextPart/FilePart/FileWithUri were
|
||||
# removed. Same for Message: messageId/taskId/contextId
|
||||
# camelCase became message_id/task_id/context_id.
|
||||
from a2a.types import Message, Role
|
||||
_parts: list[Part] = [Part(text=final_text)] if final_text else []
|
||||
for f in _outbound:
|
||||
_parts.append(Part(
|
||||
url="workspace:" + f["path"],
|
||||
filename=f["name"],
|
||||
media_type=f["mime_type"],
|
||||
))
|
||||
msg = Message(
|
||||
message_id=uuid.uuid4().hex,
|
||||
# 1.x Role is a protobuf enum: ROLE_UNSPECIFIED,
|
||||
# ROLE_USER, ROLE_AGENT. Old `Role.agent` (Pydantic
|
||||
# lowercase enum) doesn't exist anymore.
|
||||
role=Role.ROLE_AGENT,
|
||||
parts=_parts,
|
||||
task_id=task_id,
|
||||
context_id=context_id,
|
||||
)
|
||||
else:
|
||||
msg = new_text_message(final_text, task_id=task_id, context_id=context_id)
|
||||
# Attach tool_trace via metadata when supported. Guarded with
|
||||
# hasattr because some test mocks return a plain string here.
|
||||
if tool_trace and hasattr(msg, "metadata"):
|
||||
try:
|
||||
msg.metadata = {"tool_trace": tool_trace}
|
||||
except (AttributeError, TypeError):
|
||||
# `new_text_message()` returns a plain string in
|
||||
# MagicMock paths in tests, where assignment to
|
||||
# .metadata raises despite hasattr being true (the
|
||||
# mock has the attribute as a property). Suppression
|
||||
# is intentional — production Message objects always
|
||||
# accept the assignment. See #1787 + commit dcbcf19
|
||||
# for the original test-mock motivation.
|
||||
logger.debug("metadata attach skipped (non-Message return from new_text_message)")
|
||||
# A2A v1 (a2a-sdk ≥ 1.0): once Task is enqueued (above, PR #2558),
|
||||
# the executor is in task mode and raw Message enqueues are
|
||||
# rejected with InvalidAgentResponseError("Received Message
|
||||
# object in task mode. Use TaskStatusUpdateEvent or
|
||||
# TaskArtifactUpdateEvent instead."). updater.complete()
|
||||
# wraps the Message in a terminal TaskStatusUpdateEvent
|
||||
# (state=COMPLETED, final=True) which both streaming and
|
||||
# non-streaming clients accept.
|
||||
await updater.complete(message=msg)
|
||||
_result = final_text
|
||||
|
||||
except Exception as e:
|
||||
logger.error("A2A execute error: %s", e, exc_info=True)
|
||||
try:
|
||||
task_span.record_exception(e)
|
||||
from opentelemetry.trace import StatusCode
|
||||
task_span.set_status(StatusCode.ERROR, str(e))
|
||||
except Exception:
|
||||
pass
|
||||
# A2A v1: in task mode, terminal errors must publish a
|
||||
# FAILED TaskStatusUpdateEvent (carrying the error Message)
|
||||
# rather than a raw Message enqueue. updater.failed() does
|
||||
# exactly this — both streaming and non-streaming clients
|
||||
# receive the error and stop polling.
|
||||
await updater.failed(
|
||||
message=new_text_message(
|
||||
sanitize_agent_error(exc=e), task_id=task_id, context_id=context_id
|
||||
)
|
||||
)
|
||||
finally:
|
||||
await set_current_task(self._heartbeat, "")
|
||||
|
||||
return _result
|
||||
|
||||
async def cancel(self, context: RequestContext, event_queue: EventQueue) -> None:
|
||||
"""Cancel a running task — emits canceled state to comply with A2A protocol."""
|
||||
from a2a.types import TaskStatus, TaskState, TaskStatusUpdateEvent
|
||||
await event_queue.enqueue_event(
|
||||
TaskStatusUpdateEvent(
|
||||
status=TaskStatus(state=TaskState.TASK_STATE_CANCELED), # v1: TaskState uses SCREAMING_SNAKE_CASE
|
||||
final=True,
|
||||
)
|
||||
)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,263 +0,0 @@
|
||||
"""Single source of truth for A2A ``/workspaces/<id>/a2a`` response shapes.
|
||||
|
||||
The workspace-server proxy at
|
||||
``workspace-server/internal/handlers/a2a_proxy.go`` (the canonical
|
||||
emitter) returns one of the following shapes for a single A2A call:
|
||||
|
||||
* **JSON-RPC success** —
|
||||
``{"jsonrpc": "2.0", "result": {...}, "id": "..."}``
|
||||
The agent's reply, passed through unchanged.
|
||||
|
||||
* **JSON-RPC error** —
|
||||
``{"jsonrpc": "2.0", "error": {"message": "...", "code": ...}, "id": "..."}``
|
||||
The agent reported a structured error.
|
||||
|
||||
* **Poll-queued** (synthesized at proxy, RFC #2339 PR 2 — see
|
||||
``a2a_proxy.go:402-406``) —
|
||||
``{"status": "queued", "delivery_mode": "poll", "method": "..."}``
|
||||
The target is a poll-mode workspace (no public URL); the message
|
||||
was written to the platform's inbox queue. The target agent will
|
||||
fetch it via ``GET /activity?since_id=`` polling. NOT a failure —
|
||||
delivery succeeded, there's just no synchronous reply to relay.
|
||||
|
||||
* **Platform error** — ``{"error": "...", "restarting": true?, "retry_after": int?}``
|
||||
HTTP-level failure synthesized by the proxy when the agent is
|
||||
unreachable, the container is restarting, or some other infrastructure
|
||||
failure happened. ``restarting=true`` flags the platform-initiated
|
||||
container-restart path.
|
||||
|
||||
* **Malformed** — anything else. Surfaced explicitly so a future server
|
||||
change is loud rather than silent.
|
||||
|
||||
The ``parse(data)`` function classifies a pre-decoded JSON body into a
|
||||
typed variant. Callers ``match`` on the variant and never re-implement
|
||||
shape detection — that's the SSOT discipline.
|
||||
|
||||
# SSOT contract
|
||||
|
||||
This file is the Python half. The Go server emits these shapes today
|
||||
via inline ``gin.H{...}`` literals. A future PR can introduce a Go
|
||||
mirror (e.g. ``workspace-server/internal/models/a2a_response.go``)
|
||||
with a typed marshaller — until then, **any change to the wire shape
|
||||
must be reflected here** and gated by ``test_a2a_response.py``'s
|
||||
fixture corpus. The corpus exists specifically so a one-sided edit
|
||||
breaks CI.
|
||||
|
||||
# Why a typed model (vs. dict-key sniffing at every site)
|
||||
|
||||
The pre-2967 client at ``a2a_client.py:567-587`` sniffed for ``result``
|
||||
or ``error`` keys inline and treated everything else as malformed —
|
||||
which silently broke poll-mode peers (the queued envelope has neither
|
||||
key). Inline sniffing per call site multiplies the surface area where
|
||||
a new shape gets misclassified. A single typed parser with an
|
||||
explicit ``Malformed`` escape hatch makes shape additions a
|
||||
one-line change here + a fixture entry in the test corpus, instead of
|
||||
a hunt through every parsing site in the runtime.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import dataclasses
|
||||
import logging
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class Result:
|
||||
"""JSON-RPC success — agent's reply available synchronously.
|
||||
|
||||
``text`` is the convenience extraction from ``parts[0].text`` (the
|
||||
A2A multipart shape). ``parts`` is the full list, available for
|
||||
callers that need richer rendering (multiple parts, non-text parts).
|
||||
``raw_result`` preserves the unparsed ``result`` field for any
|
||||
caller that needs it (e.g. activity-row response_body audit).
|
||||
"""
|
||||
|
||||
text: str
|
||||
parts: list[dict[str, Any]] = dataclasses.field(default_factory=list)
|
||||
raw_result: Optional[dict[str, Any]] = None
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class Error:
|
||||
"""JSON-RPC error or platform-level error response.
|
||||
|
||||
``code`` is the JSON-RPC integer code when present, else None.
|
||||
``restarting`` / ``retry_after`` are platform-restart-in-progress
|
||||
metadata: when both are set, the caller knows the container is
|
||||
being recycled and may surface a softer error to the user.
|
||||
"""
|
||||
|
||||
message: str
|
||||
code: Optional[int] = None
|
||||
restarting: bool = False
|
||||
retry_after: Optional[int] = None
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class Queued:
|
||||
"""Platform poll-mode short-circuit — message accepted, peer will pick up async.
|
||||
|
||||
Returned when the target workspace is registered as
|
||||
``delivery_mode=poll`` (no public URL — typical for external
|
||||
standalone ``molecule-mcp`` runtimes). The message was written to
|
||||
the platform's inbox queue; the target agent will fetch it via
|
||||
``GET /activity?since_id=`` polling.
|
||||
|
||||
NOT a failure. Callers that expect a synchronous reply (the agent's
|
||||
response text) won't get one here — they should either:
|
||||
|
||||
* Tolerate the absence of a reply (fire-and-forget semantics).
|
||||
* Fall back to the durable ``/workspaces/:id/delegate`` +
|
||||
``/delegations`` polling path (see ``a2a_tools_delegation``'s
|
||||
``_delegate_sync_via_polling``), which writes the same A2A
|
||||
request through the platform's executeDelegation goroutine
|
||||
and lets the caller poll for the result row.
|
||||
|
||||
``method`` echoes the request method (``message/send``, ``notify``,
|
||||
etc.) so callers can correlate.
|
||||
"""
|
||||
|
||||
method: str
|
||||
delivery_mode: str = "poll"
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class Malformed:
|
||||
"""Server returned a body the parser can't classify.
|
||||
|
||||
Carries the raw decoded payload for diagnostic logging. Callers
|
||||
typically render this as an error to the user (see
|
||||
``send_a2a_message``) — but the Malformed variant is a separate
|
||||
type so logging / metrics can distinguish it from genuine
|
||||
JSON-RPC ``Error`` responses.
|
||||
"""
|
||||
|
||||
raw: Any # whatever the server returned: dict / list / str / number / etc.
|
||||
|
||||
|
||||
Variant = Union[Result, Error, Queued, Malformed]
|
||||
|
||||
|
||||
# Field-name constants — the wire vocabulary. Single source of truth;
|
||||
# the parser references these by name so a change here is a
|
||||
# one-line edit instead of a hunt through string literals.
|
||||
_KEY_RESULT = "result"
|
||||
_KEY_ERROR = "error"
|
||||
_KEY_STATUS = "status"
|
||||
_KEY_DELIVERY_MODE = "delivery_mode"
|
||||
_KEY_METHOD = "method"
|
||||
_KEY_RESTARTING = "restarting"
|
||||
_KEY_RETRY_AFTER = "retry_after"
|
||||
|
||||
_STATUS_QUEUED = "queued"
|
||||
_DELIVERY_MODE_POLL = "poll"
|
||||
|
||||
|
||||
def parse(data: Any) -> Variant:
|
||||
"""Classify a pre-decoded ``/a2a`` JSON response into a typed variant.
|
||||
|
||||
Never raises. Every branch is total: any input that doesn't match a
|
||||
known shape routes to ``Malformed`` so the caller can decide how
|
||||
to surface it.
|
||||
|
||||
The order of checks matters:
|
||||
|
||||
1. Non-dict input → Malformed (server contract is dict-shaped).
|
||||
2. Poll-queued envelope is checked BEFORE result/error because a
|
||||
server bug that sets both ``status=queued`` and ``result``
|
||||
should be loud, not silently treated as Result.
|
||||
3. ``result`` → Result (the JSON-RPC success path).
|
||||
4. ``error`` → Error (JSON-RPC error or platform error).
|
||||
5. Anything else → Malformed.
|
||||
"""
|
||||
if not isinstance(data, dict):
|
||||
logger.warning(
|
||||
"a2a_response.parse: non-dict body — got %s",
|
||||
type(data).__name__,
|
||||
)
|
||||
return Malformed(raw=data)
|
||||
|
||||
# Push-mode queue envelope — returned when a push-mode workspace
|
||||
# (one with a public URL) is at capacity. The platform queues the
|
||||
# request and returns {"queued": true, "message": "...", "queue_id": "..."}.
|
||||
# Unlike the poll-mode envelope (status=queued + delivery_mode=poll),
|
||||
# this shape has no delivery_mode key — it's distinguishable by
|
||||
# data.get("queued") is True alone. Checked before poll-mode so the
|
||||
# two cases are mutually exclusive even if a buggy server sends both.
|
||||
if data.get("queued") is True:
|
||||
method_raw = data.get(_KEY_METHOD)
|
||||
method = str(method_raw) if method_raw is not None else "message/send"
|
||||
logger.info(
|
||||
"a2a_response.parse: queued for busy push-mode peer (method=%s, queue_id=%s)",
|
||||
method,
|
||||
data.get("queue_id", "?"),
|
||||
)
|
||||
return Queued(method=method, delivery_mode="push")
|
||||
|
||||
# Poll-queued envelope. Both keys must be present — the workspace
|
||||
# server sets them together; if only one is present the body is
|
||||
# ambiguous and we route to Malformed for visibility.
|
||||
if (
|
||||
data.get(_KEY_STATUS) == _STATUS_QUEUED
|
||||
and data.get(_KEY_DELIVERY_MODE) == _DELIVERY_MODE_POLL
|
||||
):
|
||||
method_raw = data.get(_KEY_METHOD)
|
||||
method = str(method_raw) if method_raw is not None else "unknown"
|
||||
logger.info(
|
||||
"a2a_response.parse: queued for poll-mode peer (method=%s)",
|
||||
method,
|
||||
)
|
||||
return Queued(method=method)
|
||||
|
||||
# JSON-RPC success.
|
||||
if _KEY_RESULT in data:
|
||||
result = data[_KEY_RESULT]
|
||||
if isinstance(result, dict):
|
||||
parts_raw = result.get("parts")
|
||||
parts = parts_raw if isinstance(parts_raw, list) else []
|
||||
text = ""
|
||||
if parts:
|
||||
first = parts[0]
|
||||
if isinstance(first, dict):
|
||||
text_raw = first.get("text")
|
||||
text = str(text_raw) if text_raw is not None else ""
|
||||
return Result(text=text, parts=parts, raw_result=result)
|
||||
# ``result`` present but not a dict — unusual but not an error;
|
||||
# surface as a Result with the value rendered to text.
|
||||
return Result(text=str(result), parts=[], raw_result=None)
|
||||
|
||||
# JSON-RPC error or platform error.
|
||||
if _KEY_ERROR in data:
|
||||
err_raw = data[_KEY_ERROR]
|
||||
message = ""
|
||||
code: Optional[int] = None
|
||||
if isinstance(err_raw, dict):
|
||||
msg_raw = err_raw.get("message")
|
||||
if msg_raw is not None:
|
||||
message = str(msg_raw).strip()
|
||||
code_raw = err_raw.get("code")
|
||||
if isinstance(code_raw, int):
|
||||
code = code_raw
|
||||
elif isinstance(err_raw, str):
|
||||
message = err_raw.strip()
|
||||
else:
|
||||
message = str(err_raw)
|
||||
|
||||
restarting = bool(data.get(_KEY_RESTARTING, False))
|
||||
retry_after_raw = data.get(_KEY_RETRY_AFTER)
|
||||
retry_after = retry_after_raw if isinstance(retry_after_raw, int) else None
|
||||
|
||||
return Error(
|
||||
message=message,
|
||||
code=code,
|
||||
restarting=restarting,
|
||||
retry_after=retry_after,
|
||||
)
|
||||
|
||||
logger.warning(
|
||||
"a2a_response.parse: unrecognized shape — keys=%s",
|
||||
sorted(data.keys()),
|
||||
)
|
||||
return Malformed(raw=data)
|
||||
@@ -1,181 +0,0 @@
|
||||
"""A2A MCP tool implementations — the body of each tool handler.
|
||||
|
||||
Imports shared client functions and constants from a2a_client.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import mimetypes
|
||||
import os
|
||||
import uuid
|
||||
|
||||
import httpx
|
||||
|
||||
from a2a_client import (
|
||||
PLATFORM_URL,
|
||||
WORKSPACE_ID,
|
||||
_A2A_ERROR_PREFIX,
|
||||
_peer_names,
|
||||
_peer_to_source,
|
||||
discover_peer,
|
||||
get_peers,
|
||||
get_peers_with_diagnostic,
|
||||
get_workspace_info,
|
||||
send_a2a_message,
|
||||
)
|
||||
from builtin_tools.security import _redact_secrets
|
||||
from platform_auth import list_registered_workspaces
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# RBAC + auth helpers — extracted to a2a_tools_rbac (RFC #2873 iter 4a).
|
||||
# Re-exported here under the legacy underscore names so existing tests'
|
||||
# patch("a2a_tools._check_memory_write_permission", …) and call sites
|
||||
# inside this module that resolve bare names against the module-level
|
||||
# namespace continue to work unchanged.
|
||||
# ---------------------------------------------------------------------------
|
||||
from a2a_tools_rbac import ( # noqa: E402 (import after the from-a2a_client block)
|
||||
_auth_headers_for_heartbeat,
|
||||
_check_memory_read_permission,
|
||||
_check_memory_write_permission,
|
||||
_get_workspace_tier,
|
||||
_is_root_workspace,
|
||||
_ROLE_PERMISSIONS,
|
||||
)
|
||||
|
||||
|
||||
# Per-field caps on the heartbeat / activity payload. Borrowed from
|
||||
# hermes-agent's design discipline: cap ONCE in the helper, not at every
|
||||
# call site, so a future caller adding error_detail can't accidentally
|
||||
# DoS activity_logs by pasting a 4MB stack trace + base64 image.
|
||||
#
|
||||
# Why these specific limits:
|
||||
# - error_detail (4096): hermes' value. Long enough for a multi-frame
|
||||
# stack trace, short enough that 100 errors in 5min is < 500KB total.
|
||||
# - summary (256): summary is a one-liner shown in the canvas card +
|
||||
# activity row. 256 covers UTF-8 emoji + a sentence.
|
||||
# - response_text (NOT capped): this is the agent's actual reply
|
||||
# content. Capping would silently truncate user-visible output.
|
||||
_MAX_ERROR_DETAIL_CHARS = 4096
|
||||
_MAX_SUMMARY_CHARS = 256
|
||||
|
||||
|
||||
async def report_activity(
|
||||
activity_type: str, target_id: str = "", summary: str = "", status: str = "ok",
|
||||
task_text: str = "", response_text: str = "", error_detail: str = "",
|
||||
):
|
||||
"""Report activity to the platform for live progress tracking."""
|
||||
# Defensive caps in the helper itself so every caller benefits — see
|
||||
# _MAX_ERROR_DETAIL_CHARS / _MAX_SUMMARY_CHARS comments above.
|
||||
if error_detail and len(error_detail) > _MAX_ERROR_DETAIL_CHARS:
|
||||
error_detail = error_detail[:_MAX_ERROR_DETAIL_CHARS]
|
||||
if summary and len(summary) > _MAX_SUMMARY_CHARS:
|
||||
summary = summary[:_MAX_SUMMARY_CHARS]
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
payload: dict = {
|
||||
"activity_type": activity_type,
|
||||
"source_id": WORKSPACE_ID,
|
||||
"target_id": target_id,
|
||||
"method": "message/send",
|
||||
"summary": summary,
|
||||
"status": status,
|
||||
}
|
||||
if task_text:
|
||||
payload["request_body"] = {"task": task_text}
|
||||
if response_text:
|
||||
payload["response_body"] = {"result": response_text}
|
||||
if error_detail:
|
||||
# error_detail is a top-level activity row column on the
|
||||
# platform (handlers/activity.go). Surfacing the cleaned
|
||||
# exception string here lets the Activity tab render a
|
||||
# red error chip + the cause without forcing the user
|
||||
# to scroll into the raw response_body JSON.
|
||||
payload["error_detail"] = error_detail
|
||||
await client.post(
|
||||
f"{PLATFORM_URL}/workspaces/{WORKSPACE_ID}/activity",
|
||||
json=payload,
|
||||
headers=_auth_headers_for_heartbeat(),
|
||||
)
|
||||
# Also push current_task via heartbeat for canvas card display
|
||||
if summary:
|
||||
await client.post(
|
||||
f"{PLATFORM_URL}/registry/heartbeat",
|
||||
json={
|
||||
"workspace_id": WORKSPACE_ID,
|
||||
"current_task": summary,
|
||||
"active_tasks": 1,
|
||||
"error_rate": 0,
|
||||
"sample_error": "",
|
||||
"uptime_seconds": 0,
|
||||
},
|
||||
headers=_auth_headers_for_heartbeat(),
|
||||
)
|
||||
except Exception:
|
||||
pass # Best-effort — don't block delegation on activity reporting
|
||||
|
||||
|
||||
# Delegation tool handlers — extracted to a2a_tools_delegation
|
||||
# (RFC #2873 iter 4b). Re-imported here so call sites + tests that
|
||||
# reference ``a2a_tools.tool_delegate_task`` /
|
||||
# ``a2a_tools._delegate_sync_via_polling`` keep resolving identically.
|
||||
from a2a_tools_delegation import ( # noqa: E402 (import after the from-a2a_client block)
|
||||
_SYNC_POLL_BUDGET_S,
|
||||
_SYNC_POLL_INTERVAL_S,
|
||||
_delegate_sync_via_polling,
|
||||
tool_check_task_status,
|
||||
tool_delegate_task,
|
||||
tool_delegate_task_async,
|
||||
)
|
||||
|
||||
|
||||
# Messaging tool handlers — extracted to a2a_tools_messaging
|
||||
# (RFC #2873 iter 4d). Re-imported here so call sites + tests that
|
||||
# reference ``a2a_tools.tool_send_message_to_user`` /
|
||||
# ``tool_list_peers`` / ``tool_get_workspace_info`` /
|
||||
# ``tool_chat_history`` / ``_upload_chat_files`` keep resolving
|
||||
# identically.
|
||||
from a2a_tools_messaging import ( # noqa: E402 (import after the top-of-module imports)
|
||||
_upload_chat_files,
|
||||
tool_broadcast_message,
|
||||
tool_chat_history,
|
||||
tool_get_workspace_info,
|
||||
tool_list_peers,
|
||||
tool_send_message_to_user,
|
||||
)
|
||||
|
||||
|
||||
# Memory tool handlers — extracted to a2a_tools_memory (RFC #2873 iter 4c).
|
||||
# Re-imported here so call sites + tests that reference
|
||||
# ``a2a_tools.tool_commit_memory`` / ``tool_recall_memory`` keep
|
||||
# resolving identically.
|
||||
from a2a_tools_memory import ( # noqa: E402 (import after the top-of-module imports)
|
||||
tool_commit_memory,
|
||||
tool_recall_memory,
|
||||
)
|
||||
|
||||
|
||||
# Inbox tool handlers — extracted to a2a_tools_inbox (RFC #2873 iter 4e).
|
||||
# Re-imported here so call sites + tests that reference
|
||||
# ``a2a_tools.tool_inbox_peek`` / ``tool_inbox_pop`` / ``tool_wait_for_message``
|
||||
# / ``_enrich_inbound_for_agent`` / ``_INBOX_NOT_ENABLED_MSG`` keep
|
||||
# resolving identically.
|
||||
from a2a_tools_inbox import ( # noqa: E402 (import after the top-of-module imports)
|
||||
_INBOX_NOT_ENABLED_MSG,
|
||||
_enrich_inbound_for_agent,
|
||||
tool_inbox_peek,
|
||||
tool_inbox_pop,
|
||||
tool_wait_for_message,
|
||||
)
|
||||
|
||||
|
||||
# Identity tool handlers — extracted to a2a_tools_identity. Ports the
|
||||
# two T4-tier MCP tools (``tool_get_runtime_identity`` +
|
||||
# ``tool_update_agent_card``) from molecule-ai-workspace-runtime PR#17.
|
||||
# That repo is mirror-only (reference_runtime_repo_is_mirror_only);
|
||||
# this is the canonical edit point, and the wheel mirror is
|
||||
# regenerated by publish-runtime.yml on merge.
|
||||
from a2a_tools_identity import ( # noqa: E402 (import after the top-of-module imports)
|
||||
tool_get_runtime_identity,
|
||||
tool_update_agent_card,
|
||||
)
|
||||
@@ -1,459 +0,0 @@
|
||||
"""Delegation tool handlers — single-concern slice of the a2a_tools surface.
|
||||
|
||||
Extracted from ``a2a_tools.py`` (RFC #2873 iter 4b). Owns the three
|
||||
delegation MCP tools + the RFC #2829 PR-5 sync-via-polling helper they
|
||||
share.
|
||||
|
||||
Public surface:
|
||||
|
||||
* ``tool_delegate_task`` — synchronous delegation, waits for response.
|
||||
* ``tool_delegate_task_async`` — fire-and-forget delegation; returns
|
||||
``{delegation_id, ...}``.
|
||||
* ``tool_check_task_status`` — poll the platform's ``/delegations`` log.
|
||||
|
||||
Internal:
|
||||
|
||||
* ``_delegate_sync_via_polling`` — durable async + poll for terminal
|
||||
status (RFC #2829 PR-5 cutover path; toggled by
|
||||
``DELEGATION_SYNC_VIA_INBOX=1``).
|
||||
* ``_SYNC_POLL_INTERVAL_S`` / ``_SYNC_POLL_BUDGET_S`` constants.
|
||||
|
||||
Circular-import note: this module calls ``report_activity`` from
|
||||
``a2a_tools`` to emit activity rows around the delegate dispatch.
|
||||
``a2a_tools`` imports the public symbols here at module-load time,
|
||||
so we use a LAZY import for ``report_activity`` inside the function
|
||||
that needs it. Without the lazy hop Python raises an ImportError
|
||||
on first ``a2a_tools`` import.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from a2a_client import (
|
||||
PLATFORM_URL,
|
||||
WORKSPACE_ID,
|
||||
_A2A_ERROR_PREFIX,
|
||||
_A2A_QUEUED_PREFIX,
|
||||
_peer_names,
|
||||
_peer_to_source,
|
||||
discover_peer,
|
||||
send_a2a_message,
|
||||
)
|
||||
from a2a_tools_rbac import auth_headers_for_heartbeat as _auth_headers_for_heartbeat
|
||||
from _sanitize_a2a import (
|
||||
_A2A_BOUNDARY_END,
|
||||
_A2A_BOUNDARY_END_ESCAPED,
|
||||
_A2A_BOUNDARY_START,
|
||||
_A2A_BOUNDARY_START_ESCAPED,
|
||||
sanitize_a2a_result,
|
||||
) # noqa: E402
|
||||
|
||||
|
||||
# RFC #2829 PR-5 cutover constants. The poll cadence + timeout are
|
||||
# intentionally generous: 3s gives the platform's executeDelegation
|
||||
# goroutine room to dispatch + the callee to respond + the result to
|
||||
# write to activity_logs without thrashing the platform with rapid
|
||||
# polls; the budget matches the legacy DELEGATION_TIMEOUT (300s) so
|
||||
# operators don't see behavior change beyond "no more 600s timeouts".
|
||||
_SYNC_POLL_INTERVAL_S = 3.0
|
||||
_SYNC_POLL_BUDGET_S = float(os.environ.get("DELEGATION_TIMEOUT", "300.0"))
|
||||
|
||||
|
||||
async def _delegate_sync_via_polling(
|
||||
workspace_id: str,
|
||||
task: str,
|
||||
src: str,
|
||||
) -> str:
|
||||
"""RFC #2829 PR-5: durable async delegation + poll for terminal status.
|
||||
|
||||
Sidesteps the platform proxy's blocking `message/send` HTTP path that
|
||||
hits a hard 600s ceiling. Instead:
|
||||
|
||||
1. POST /workspaces/<src>/delegate (async, returns 202 + delegation_id)
|
||||
— platform's executeDelegation goroutine handles A2A dispatch in
|
||||
the background. No client-side timeout dependency on the platform
|
||||
holding a connection open.
|
||||
2. Poll GET /workspaces/<src>/delegations every 3s for a row with
|
||||
matching delegation_id reaching terminal status (completed/failed).
|
||||
3. Return the response_preview text on completed; surface error_detail
|
||||
on failed (with the same _A2A_ERROR_PREFIX wrapping the legacy
|
||||
path uses, so caller error-detection logic is unchanged).
|
||||
|
||||
Both /delegate and /delegations are existing endpoints — this helper
|
||||
just composes them into a polling synchronous facade. The result is
|
||||
available the moment the platform writes the terminal status row;
|
||||
no extra latency vs. the legacy proxy-blocked path on fast cases.
|
||||
"""
|
||||
import asyncio
|
||||
import time
|
||||
|
||||
idem_key = hashlib.sha256(f"{src}:{workspace_id}:{task}".encode()).hexdigest()[:32]
|
||||
|
||||
# 1. Dispatch via /delegate (the async, durable path).
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.post(
|
||||
f"{PLATFORM_URL}/workspaces/{src}/delegate",
|
||||
json={
|
||||
"target_id": workspace_id,
|
||||
"task": task,
|
||||
"idempotency_key": idem_key,
|
||||
},
|
||||
headers=_auth_headers_for_heartbeat(src),
|
||||
)
|
||||
except Exception as e: # pylint: disable=broad-except
|
||||
return f"{_A2A_ERROR_PREFIX}delegate dispatch failed: {e}"
|
||||
|
||||
if resp.status_code != 202 and resp.status_code != 200:
|
||||
return f"{_A2A_ERROR_PREFIX}delegate dispatch failed: HTTP {resp.status_code} {resp.text[:200]}"
|
||||
|
||||
try:
|
||||
dispatch = resp.json()
|
||||
except Exception as e: # pylint: disable=broad-except
|
||||
return f"{_A2A_ERROR_PREFIX}delegate dispatch returned non-JSON: {e}"
|
||||
|
||||
delegation_id = dispatch.get("delegation_id", "")
|
||||
if not delegation_id:
|
||||
return f"{_A2A_ERROR_PREFIX}delegate dispatch missing delegation_id: {dispatch}"
|
||||
|
||||
# 2. Poll for terminal status with a deadline. Each poll is a cheap
|
||||
# /delegations GET — bounded by the platform's existing rate limit.
|
||||
deadline = time.monotonic() + _SYNC_POLL_BUDGET_S
|
||||
last_status = "unknown"
|
||||
while time.monotonic() < deadline:
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
poll = await client.get(
|
||||
f"{PLATFORM_URL}/workspaces/{src}/delegations",
|
||||
headers=_auth_headers_for_heartbeat(src),
|
||||
)
|
||||
except Exception as e: # pylint: disable=broad-except
|
||||
# Transient — keep polling. The platform IS holding the
|
||||
# delegation row; we just lost a network request.
|
||||
last_status = f"poll-error: {e}"
|
||||
await asyncio.sleep(_SYNC_POLL_INTERVAL_S)
|
||||
continue
|
||||
|
||||
if poll.status_code != 200:
|
||||
last_status = f"poll HTTP {poll.status_code}"
|
||||
await asyncio.sleep(_SYNC_POLL_INTERVAL_S)
|
||||
continue
|
||||
|
||||
try:
|
||||
rows = poll.json()
|
||||
except Exception as e: # pylint: disable=broad-except
|
||||
last_status = f"poll non-JSON: {e}"
|
||||
await asyncio.sleep(_SYNC_POLL_INTERVAL_S)
|
||||
continue
|
||||
|
||||
# /delegations returns a flat list of delegation events. Filter to
|
||||
# our delegation_id; pick the first terminal one. The list may
|
||||
# have multiple rows per delegation_id (one for the original
|
||||
# dispatch, one per status update); we want the latest terminal.
|
||||
if not isinstance(rows, list):
|
||||
await asyncio.sleep(_SYNC_POLL_INTERVAL_S)
|
||||
continue
|
||||
terminal = None
|
||||
for r in rows:
|
||||
if not isinstance(r, dict):
|
||||
continue
|
||||
if r.get("delegation_id") != delegation_id:
|
||||
continue
|
||||
status = (r.get("status") or "").lower()
|
||||
last_status = status
|
||||
if status in ("completed", "failed"):
|
||||
terminal = r
|
||||
break
|
||||
if terminal:
|
||||
if (terminal.get("status") or "").lower() == "completed":
|
||||
# OFFSEC-003: sanitize response_preview before returning so
|
||||
# boundary markers injected by a malicious peer cannot escape
|
||||
# the trust boundary.
|
||||
return sanitize_a2a_result(terminal.get("response_preview") or "")
|
||||
# OFFSEC-003: sanitize error_detail / summary before wrapping with
|
||||
# the _A2A_ERROR_PREFIX sentinel so injected markers cannot appear
|
||||
# inside the trusted error block returned to the agent.
|
||||
err_raw = (
|
||||
terminal.get("error_detail")
|
||||
or terminal.get("summary")
|
||||
or "delegation failed"
|
||||
)
|
||||
err = sanitize_a2a_result(err_raw)
|
||||
return f"{_A2A_ERROR_PREFIX}{err}"
|
||||
|
||||
await asyncio.sleep(_SYNC_POLL_INTERVAL_S)
|
||||
|
||||
# Budget exhausted — the platform's row is still in flight (or queued).
|
||||
# Surface as an error so the caller can decide to retry or fall back;
|
||||
# the platform DOES still have the durable row, so the work isn't
|
||||
# lost — it'll complete eventually and a future check_task_status
|
||||
# will surface the result.
|
||||
return (
|
||||
f"{_A2A_ERROR_PREFIX}polling timeout after {_SYNC_POLL_BUDGET_S}s "
|
||||
f"(delegation_id={delegation_id}, last_status={last_status}); "
|
||||
f"the platform is still working on it — call check_task_status('{delegation_id}') to retrieve later"
|
||||
)
|
||||
|
||||
|
||||
async def tool_delegate_task(
|
||||
workspace_id: str,
|
||||
task: str,
|
||||
source_workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Delegate a task to another workspace via A2A (synchronous — waits for response).
|
||||
|
||||
``source_workspace_id`` selects which registered workspace this
|
||||
delegation originates from — drives auth + the X-Workspace-ID source
|
||||
header so the platform's a2a_proxy logs the correct sender. Single-
|
||||
workspace operators leave it None and routing falls back to the
|
||||
module-level WORKSPACE_ID.
|
||||
"""
|
||||
if not workspace_id or not task:
|
||||
return "Error: workspace_id and task are required"
|
||||
|
||||
# Self-delegation guard: delegating to your own workspace ID deadlocks —
|
||||
# the sending turn holds _run_lock while the receive handler waits for the
|
||||
# same lock, the request 30s-times-out, and the whole cycle is wasted.
|
||||
# Reject immediately with an actionable message. (effective_src mirrors the
|
||||
# `src or WORKSPACE_ID` resolution used below for routing.)
|
||||
effective_src = source_workspace_id or _peer_to_source.get(workspace_id) or WORKSPACE_ID
|
||||
if workspace_id and workspace_id == effective_src:
|
||||
return (
|
||||
"Error: cannot delegate_task to your own workspace — self-delegation "
|
||||
"deadlocks _run_lock (your sending turn holds it, the receive handler "
|
||||
"waits for it, the request times out). There is no peer who is also you: "
|
||||
"just do the work yourself, or call commit_memory / send_message_to_user directly."
|
||||
)
|
||||
|
||||
# Auto-route: if source not specified, look up which registered
|
||||
# workspace last saw this peer (populated by tool_list_peers). Falls
|
||||
# back to the legacy WORKSPACE_ID for single-workspace operators.
|
||||
src = source_workspace_id or _peer_to_source.get(workspace_id) or None
|
||||
|
||||
# Discover the target. discover_peer is the access-control gate +
|
||||
# name/status lookup. The peer's reported ``url`` field is NOT used
|
||||
# for routing — see send_a2a_message, which constructs the URL via
|
||||
# the platform's A2A proxy.
|
||||
peer = await discover_peer(workspace_id, source_workspace_id=src)
|
||||
if not peer:
|
||||
return f"Error: workspace {workspace_id} not found or not accessible (check access control)"
|
||||
|
||||
if (peer.get("status") or "").lower() == "offline":
|
||||
return f"Error: workspace {workspace_id} is offline"
|
||||
|
||||
# Lazy import: a2a_tools imports this module at top-level, so a
|
||||
# top-level import of report_activity from a2a_tools would create a
|
||||
# circular dependency at first-import time. Lazy resolution inside
|
||||
# the function body breaks the cycle without forcing a ground-up
|
||||
# restructure of the activity-reporting layer.
|
||||
from a2a_tools import report_activity
|
||||
|
||||
# Report delegation start — include the task text for traceability
|
||||
peer_name = peer.get("name") or _peer_names.get(workspace_id) or workspace_id[:8]
|
||||
_peer_names[workspace_id] = peer_name # cache for future use
|
||||
# Brief summary for canvas display — just the delegation target
|
||||
await report_activity("a2a_send", workspace_id, f"Delegating to {peer_name}", task_text=task)
|
||||
|
||||
# RFC #2829 PR-5: agent-side cutover. When DELEGATION_SYNC_VIA_INBOX=1,
|
||||
# use the platform's durable async delegation API (POST /delegate +
|
||||
# poll /delegations) instead of the proxy-blocked message/send path.
|
||||
# This sidesteps the 600s message/send timeout class that broke
|
||||
# iteration-14/90-style long-running delegations on 2026-05-05.
|
||||
#
|
||||
# Default off — staging-canary first, flip default after PR-2's
|
||||
# result-push flag (DELEGATION_RESULT_INBOX_PUSH) has been on for
|
||||
# ≥1 week without incident.
|
||||
if os.environ.get("DELEGATION_SYNC_VIA_INBOX") == "1":
|
||||
result = await _delegate_sync_via_polling(workspace_id, task, src or WORKSPACE_ID)
|
||||
else:
|
||||
# send_a2a_message routes through ${PLATFORM_URL}/workspaces/{id}/a2a
|
||||
# (the platform proxy) so the same code works for in-container and
|
||||
# external (standalone molecule-mcp) callers.
|
||||
result = await send_a2a_message(workspace_id, task, source_workspace_id=src)
|
||||
# #2967: when the target is a poll-mode peer, the platform's
|
||||
# a2a_proxy short-circuits and returns a queued envelope —
|
||||
# send_a2a_message surfaces that as the _A2A_QUEUED_PREFIX
|
||||
# sentinel. The synchronous proxy path can't deliver a reply
|
||||
# because the target has no public URL; fall back to the
|
||||
# durable /delegate + /delegations polling path which DOES
|
||||
# work for poll-mode peers (the executeDelegation goroutine
|
||||
# writes to the inbox queue and the result row arrives when
|
||||
# the target picks it up + replies).
|
||||
#
|
||||
# This is what makes external-runtime-to-external-runtime
|
||||
# A2A actually deliver synchronous replies — without the
|
||||
# fallback the calling agent sees the queued sentinel as
|
||||
# success-with-no-text and never gets the peer's response.
|
||||
if result.startswith(_A2A_QUEUED_PREFIX):
|
||||
logger.info(
|
||||
"tool_delegate_task: target=%s is poll-mode; "
|
||||
"falling back from message/send to /delegate-poll path",
|
||||
workspace_id,
|
||||
)
|
||||
result = await _delegate_sync_via_polling(
|
||||
workspace_id, task, src or WORKSPACE_ID,
|
||||
)
|
||||
|
||||
# Detect delegation failures — wrap them clearly so the calling agent
|
||||
# can decide to retry, use another peer, or handle the task itself.
|
||||
is_error = result.startswith(_A2A_ERROR_PREFIX)
|
||||
# Strip the sentinel prefix so error_detail is the human-readable
|
||||
# cause directly. The Activity tab's red error chip surfaces this
|
||||
# without the user having to scroll into the raw response JSON.
|
||||
#
|
||||
# Cap at 4096 chars before sending — the platform's
|
||||
# activity_logs.error_detail column is unbounded TEXT and a
|
||||
# malicious or buggy peer could otherwise stream an arbitrarily
|
||||
# large error message into the caller's activity log. 4096 is
|
||||
# comfortably above any real exception traceback we've seen and
|
||||
# well below an obvious-DoS threshold.
|
||||
error_detail = result[len(_A2A_ERROR_PREFIX):].strip()[:4096] if is_error else ""
|
||||
await report_activity(
|
||||
"a2a_receive", workspace_id,
|
||||
f"{peer_name} responded ({len(result)} chars)" if not is_error else f"{peer_name} failed: {error_detail[:120]}",
|
||||
task_text=task, response_text=result,
|
||||
status="error" if is_error else "ok",
|
||||
error_detail=error_detail,
|
||||
)
|
||||
if is_error:
|
||||
return (
|
||||
f"DELEGATION FAILED to {peer_name}: {result}\n"
|
||||
f"You should either: (1) try a different peer, (2) handle this task yourself, "
|
||||
f"or (3) inform the user that {peer_name} is unavailable and provide your best answer."
|
||||
)
|
||||
# OFFSEC-003: escape boundary markers in peer text, then wrap in boundary
|
||||
# markers so the agent can distinguish trusted (own output) from untrusted
|
||||
# (peer-supplied) content. Explicit wrapping here rather than inside
|
||||
# sanitize_a2a_result preserves a clean separation of concerns.
|
||||
#
|
||||
# Truncate at the closer BEFORE sanitizing so the raw closer (which gets
|
||||
# lost during escaping) is removed from the content. After truncation,
|
||||
# sanitize the remaining text and wrap with escaped boundary markers.
|
||||
if _A2A_BOUNDARY_END in result:
|
||||
result = result[:result.index(_A2A_BOUNDARY_END)]
|
||||
escaped = sanitize_a2a_result(result)
|
||||
return (
|
||||
f"{_A2A_BOUNDARY_START_ESCAPED}\n"
|
||||
f"{escaped}\n"
|
||||
f"{_A2A_BOUNDARY_END_ESCAPED}"
|
||||
)
|
||||
|
||||
|
||||
async def tool_delegate_task_async(
|
||||
workspace_id: str,
|
||||
task: str,
|
||||
source_workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Delegate a task via the platform's async delegation API (fire-and-forget).
|
||||
|
||||
Uses POST /workspaces/:id/delegate which runs the A2A request in the background.
|
||||
Results are tracked in the platform DB and broadcast via WebSocket.
|
||||
Use check_task_status to poll for results.
|
||||
|
||||
``source_workspace_id`` selects the sending workspace (which one of
|
||||
this agent's registered workspaces gets logged as the originator);
|
||||
auto-routes via the peer→source cache when omitted.
|
||||
"""
|
||||
if not workspace_id or not task:
|
||||
return "Error: workspace_id and task are required"
|
||||
|
||||
src = source_workspace_id or _peer_to_source.get(workspace_id) or WORKSPACE_ID
|
||||
|
||||
# Self-delegation guard: even on the async path, queuing a task to your own
|
||||
# workspace just makes you re-process your own dispatch — never useful, and
|
||||
# on the sync path it deadlocks (see tool_delegate_task). Reject early.
|
||||
if workspace_id and workspace_id == src:
|
||||
return (
|
||||
"Error: cannot delegate_task_async to your own workspace — there is no "
|
||||
"peer who is also you. Do the work yourself, or call commit_memory / "
|
||||
"send_message_to_user directly."
|
||||
)
|
||||
|
||||
# Idempotency key: SHA-256 of (source, target, task) so that a
|
||||
# restarted agent firing the same delegation gets the same key and
|
||||
# the platform returns the existing delegation_id instead of
|
||||
# creating a duplicate. Fixes #1456. Source is in the key so the
|
||||
# SAME task delegated from two different registered workspaces
|
||||
# produces two distinct delegations (the right behavior — one per
|
||||
# tenant audit trail).
|
||||
idem_key = hashlib.sha256(f"{src}:{workspace_id}:{task}".encode()).hexdigest()[:32]
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.post(
|
||||
f"{PLATFORM_URL}/workspaces/{src}/delegate",
|
||||
json={"target_id": workspace_id, "task": task, "idempotency_key": idem_key},
|
||||
headers=_auth_headers_for_heartbeat(src),
|
||||
)
|
||||
if resp.status_code == 202:
|
||||
data = resp.json()
|
||||
return json.dumps({
|
||||
"delegation_id": data.get("delegation_id", ""),
|
||||
"workspace_id": workspace_id,
|
||||
"status": "delegated",
|
||||
"note": "Task delegated. The platform runs it in the background. Use check_task_status to poll for results.",
|
||||
})
|
||||
else:
|
||||
return f"Error: delegation failed with status {resp.status_code}: {resp.text[:200]}"
|
||||
except Exception as e:
|
||||
return f"Error: delegation failed — {e}"
|
||||
|
||||
|
||||
async def tool_check_task_status(
|
||||
workspace_id: str,
|
||||
task_id: str,
|
||||
source_workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Check delegations for this workspace via the platform API.
|
||||
|
||||
Args:
|
||||
workspace_id: Ignored (kept for backward compat). Checks
|
||||
``source_workspace_id``'s delegations (the workspace that
|
||||
FIRED the delegations), not the target's.
|
||||
task_id: Optional delegation_id to filter. If empty, returns all recent delegations.
|
||||
source_workspace_id: Which registered workspace's delegation log
|
||||
to query. Defaults to the module-level WORKSPACE_ID.
|
||||
"""
|
||||
src = source_workspace_id or WORKSPACE_ID
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.get(
|
||||
f"{PLATFORM_URL}/workspaces/{src}/delegations",
|
||||
headers=_auth_headers_for_heartbeat(src),
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
return f"Error: failed to check delegations ({resp.status_code})"
|
||||
delegations = resp.json()
|
||||
if task_id:
|
||||
# Filter by delegation_id
|
||||
matching = [d for d in delegations if d.get("delegation_id") == task_id]
|
||||
if matching:
|
||||
# OFFSEC-003: sanitize peer-supplied fields
|
||||
d = matching[0]
|
||||
d["summary"] = sanitize_a2a_result(d.get("summary", ""))
|
||||
d["response_preview"] = sanitize_a2a_result(d.get("response_preview", ""))
|
||||
return json.dumps(d)
|
||||
return json.dumps({"status": "not_found", "delegation_id": task_id})
|
||||
# Return all recent delegations
|
||||
summary = []
|
||||
for d in delegations[:10]:
|
||||
preview = d.get("response_preview", "")
|
||||
if preview:
|
||||
preview = sanitize_a2a_result(preview)
|
||||
summary.append({
|
||||
"delegation_id": d.get("delegation_id", ""),
|
||||
"target_id": d.get("target_id", ""),
|
||||
"status": d.get("status", ""),
|
||||
"summary": sanitize_a2a_result(d.get("summary", "")),
|
||||
"response_preview": preview,
|
||||
})
|
||||
return json.dumps({"delegations": summary, "count": len(delegations)})
|
||||
except Exception as e:
|
||||
return f"Error checking delegations: {e}"
|
||||
@@ -1,187 +0,0 @@
|
||||
"""Identity tool handlers — single-concern slice of the a2a_tools surface.
|
||||
|
||||
Owns the two MCP tools that close the T4-tier workspace owner-permission
|
||||
gaps reported via the canvas:
|
||||
|
||||
* ``tool_get_runtime_identity`` — env-only; returns model, model_provider,
|
||||
molecule_model, anthropic_base_url, tier, workspace_id, runtime
|
||||
(ADAPTER_MODULE). No HTTP call. Always permitted by RBAC — even
|
||||
read-only agents may know what model they are.
|
||||
|
||||
* ``tool_update_agent_card`` — POSTs the card to ``/registry/update-card``
|
||||
with the workspace's own bearer (same auth path as ``tool_commit_memory``
|
||||
via ``a2a_tools_rbac.auth_headers_for_heartbeat``). The platform
|
||||
replaces the stored card and broadcasts an ``agent_card_updated``
|
||||
event so the canvas reflects the new card live. Gated on
|
||||
``memory.write`` capability via the existing RBAC permission map so
|
||||
read-only roles can't silently rewrite the platform card.
|
||||
|
||||
Both originated as a port of molecule-ai-workspace-runtime PR#17
|
||||
(``feat(mcp): add update_agent_card + get_runtime_identity tools``).
|
||||
The mirror-only PR#17 was closed without merge per
|
||||
``reference_runtime_repo_is_mirror_only``; the canonical edit point is
|
||||
this monorepo at ``workspace/`` and the wheel mirror is regenerated
|
||||
automatically by the publish-runtime workflow.
|
||||
|
||||
Imports the auth-header primitive from ``a2a_tools_rbac`` (iter 4a) —
|
||||
NOT from ``a2a_tools`` — to avoid a circular import with the
|
||||
kitchen-sink re-export module.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
from a2a_client import PLATFORM_URL
|
||||
from a2a_tools_rbac import (
|
||||
auth_headers_for_heartbeat as _auth_headers_for_heartbeat,
|
||||
check_memory_write_permission as _check_memory_write_permission,
|
||||
)
|
||||
|
||||
|
||||
def _runtime_identity_payload() -> dict[str, Any]:
|
||||
"""Build the identity dict — env-only, no I/O.
|
||||
|
||||
Factored out from ``tool_get_runtime_identity`` so tests can assert
|
||||
against the exact key set without re-parsing JSON. The MCP tool
|
||||
handler ``tool_get_runtime_identity`` is the only public caller in
|
||||
production; tests call this helper directly.
|
||||
"""
|
||||
return {
|
||||
"model": os.environ.get("MODEL", ""),
|
||||
"model_provider": os.environ.get("MODEL_PROVIDER", ""),
|
||||
"molecule_model": os.environ.get("MOLECULE_MODEL", ""),
|
||||
"anthropic_base_url": os.environ.get("ANTHROPIC_BASE_URL", ""),
|
||||
"tier": os.environ.get("TIER", ""),
|
||||
"workspace_id": os.environ.get("WORKSPACE_ID", ""),
|
||||
# Adapter module is the closest thing the runtime has to a
|
||||
# "template slug" — e.g. "adapter" for claude-code-default,
|
||||
# "hermes" for hermes-template, etc. Picked from
|
||||
# $ADAPTER_MODULE env baked by each template's Dockerfile.
|
||||
"runtime": os.environ.get("ADAPTER_MODULE", ""),
|
||||
}
|
||||
|
||||
|
||||
async def tool_get_runtime_identity() -> str:
|
||||
"""Return this runtime's identity — model, provider, tier, IDs.
|
||||
|
||||
Env-only; no HTTP call. Useful so the agent can answer "what model
|
||||
am I?" correctly instead of guessing from a stale system prompt
|
||||
that the operator may have changed between boots.
|
||||
|
||||
Returns the identity as a JSON-encoded string (the dispatch contract
|
||||
every MCP tool in this module follows). Tests that want to assert
|
||||
individual fields can call ``_runtime_identity_payload()`` directly,
|
||||
or ``json.loads`` the return value.
|
||||
|
||||
Always permitted by RBAC — there is no sensitive information here
|
||||
that isn't already available to the process via ``os.environ``.
|
||||
The point of the tool is to surface those env values to the agent
|
||||
layer in a stable, documented shape rather than expecting every
|
||||
agent runtime to know to ``echo $MODEL``.
|
||||
"""
|
||||
return json.dumps(_runtime_identity_payload(), indent=2)
|
||||
|
||||
|
||||
async def tool_update_agent_card(card: Any) -> str:
|
||||
"""Update this workspace's agent_card on the platform.
|
||||
|
||||
POSTs the provided card to ``/registry/update-card`` with the
|
||||
workspace's own bearer token (same auth path as ``tool_commit_memory``
|
||||
and ``tool_get_workspace_info``). The platform validates required
|
||||
fields server-side, replaces the stored card, and broadcasts an
|
||||
``agent_card_updated`` event so the canvas updates live.
|
||||
|
||||
Args:
|
||||
card: A JSON-serialisable object (typically a dict) holding the
|
||||
new card. The platform validates required fields server-side.
|
||||
|
||||
Returns:
|
||||
JSON-encoded string. Body:
|
||||
- ``{"success": true, "status": "updated"}`` on success;
|
||||
- ``{"success": false, "error": "<msg>", "status_code": <int>}``
|
||||
on platform error;
|
||||
- ``{"success": false, "error": "<reason>"}`` on local validation
|
||||
(non-dict card, missing WORKSPACE_ID, network error).
|
||||
|
||||
Permission gate: this tool requires the ``memory.write`` RBAC
|
||||
capability — same gate as ``tool_commit_memory``. The check runs
|
||||
inline rather than at the dispatcher layer to keep ``a2a_mcp_server``
|
||||
permission-agnostic (the gate sits with the implementation, not the
|
||||
transport). Read-only roles get a clear error string back instead
|
||||
of a 403 from the platform.
|
||||
|
||||
We re-check ``isinstance(card, dict)`` here defensively rather than
|
||||
trust the MCP schema validator alone — the schema only constrains
|
||||
the transport, not the in-process call surface used by tests and
|
||||
sibling modules.
|
||||
"""
|
||||
payload = await _update_agent_card_impl(card)
|
||||
return json.dumps(payload, indent=2)
|
||||
|
||||
|
||||
async def _update_agent_card_impl(card: Any) -> dict[str, Any]:
|
||||
"""Dict-returning core of ``tool_update_agent_card``.
|
||||
|
||||
Split out so tests can assert against the raw dict shape (status
|
||||
codes, error messages) without re-parsing JSON on every assertion.
|
||||
The string-returning ``tool_update_agent_card`` is a thin wrapper
|
||||
invoked by the MCP dispatcher.
|
||||
"""
|
||||
# RBAC: require memory.write permission. Same gate as
|
||||
# tool_commit_memory (the agent already needs this capability to
|
||||
# persist anything outbound). Read-only roles can still call
|
||||
# get_runtime_identity / get_workspace_info to introspect — those
|
||||
# are env-only / read-only and have no inline gate.
|
||||
if not _check_memory_write_permission():
|
||||
return {
|
||||
"success": False,
|
||||
"error": (
|
||||
"RBAC — this workspace does not have the 'memory.write' "
|
||||
"permission required to update the agent_card."
|
||||
),
|
||||
}
|
||||
if not isinstance(card, dict):
|
||||
return {
|
||||
"success": False,
|
||||
"error": "card must be a JSON object (dict)",
|
||||
}
|
||||
ws_id = os.environ.get("WORKSPACE_ID", "")
|
||||
if not ws_id:
|
||||
return {
|
||||
"success": False,
|
||||
"error": "WORKSPACE_ID env not set; cannot identify caller",
|
||||
}
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.post(
|
||||
f"{PLATFORM_URL}/registry/update-card",
|
||||
json={"workspace_id": ws_id, "agent_card": card},
|
||||
headers=_auth_headers_for_heartbeat(),
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
body: dict[str, Any] = {}
|
||||
try:
|
||||
body = resp.json()
|
||||
except Exception:
|
||||
pass
|
||||
return {
|
||||
"success": True,
|
||||
"status": body.get("status", "updated"),
|
||||
}
|
||||
# Non-200 — surface what the platform returned.
|
||||
error_msg = ""
|
||||
try:
|
||||
error_msg = resp.json().get("error", "") or resp.text
|
||||
except Exception:
|
||||
error_msg = resp.text
|
||||
return {
|
||||
"success": False,
|
||||
"status_code": resp.status_code,
|
||||
"error": error_msg,
|
||||
}
|
||||
except Exception as e:
|
||||
return {"success": False, "error": f"network error: {e}"}
|
||||
@@ -1,140 +0,0 @@
|
||||
"""Inbox tool handlers — single-concern slice of the a2a_tools surface.
|
||||
|
||||
Standalone-runtime path for inbound-message delivery (push-mode runtimes
|
||||
get messages via the channel-tag synthesis in a2a_mcp_server). The
|
||||
``InboxState`` singleton is set by ``mcp_cli`` before the MCP server
|
||||
starts; in-container runtimes never call ``inbox.activate(...)`` so
|
||||
``inbox.get_state()`` returns None and these tools surface an
|
||||
informational error instead of raising.
|
||||
|
||||
When-to-use guidance for agents (mirrored in
|
||||
``platform_tools/registry.py``):
|
||||
- ``wait_for_message``: block until a new inbound message arrives, then
|
||||
decide what to do with it; forms the loop ``wait → respond → wait``.
|
||||
- ``inbox_peek``: inspect the queue non-destructively.
|
||||
- ``inbox_pop``: remove a handled message by activity_id.
|
||||
|
||||
Extracted from ``a2a_tools.py`` in RFC #2873 iter 4e so the kitchen-sink
|
||||
module shrinks to a back-compat shim. The extraction also makes the
|
||||
``_enrich_inbound_for_agent`` helper unit-testable in isolation —
|
||||
previously it was buried in ``a2a_tools`` and only exercised through
|
||||
the inbox wrappers, leaving its peer-id-empty / cache-miss / registry-
|
||||
unavailable branches under-covered.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
|
||||
|
||||
# Surfaced when the inbox subsystem is not initialised. Returned by the
|
||||
# three inbox tool wrappers below so the agent gets a clear "this
|
||||
# runtime delivers via push" message instead of a NameError.
|
||||
_INBOX_NOT_ENABLED_MSG = (
|
||||
"Error: inbox polling is not enabled in this runtime. The standalone "
|
||||
"molecule-mcp wrapper activates it; in-container runtimes receive "
|
||||
"messages via push delivery and do not need these tools."
|
||||
)
|
||||
|
||||
|
||||
def _enrich_inbound_for_agent(d: dict) -> dict:
|
||||
"""Add peer_name / peer_role / agent_card_url to a poll-path message.
|
||||
|
||||
The PUSH path (a2a_mcp_server._build_channel_notification) already
|
||||
enriches the meta dict with these fields, so a Claude Code host
|
||||
with channel-push sees them. The POLL path goes through
|
||||
InboxMessage.to_dict, which is intentionally identity-free (the
|
||||
storage layer doesn't know about the registry cache). Without this
|
||||
helper, every non-Claude-Code MCP client that uses inbox_peek /
|
||||
wait_for_message gets a plain message and the receiving agent
|
||||
can't tell who's writing — breaking the contract documented in
|
||||
a2a_mcp_server.py:303-345 ("In both paths the same fields apply").
|
||||
|
||||
Cache-first non-blocking enrichment (same shape as push): on cache
|
||||
miss the helper returns the bare message; the next call within the
|
||||
5-min TTL hits the warm cache. Failure to enrich is non-fatal —
|
||||
the agent still gets text + peer_id + kind + activity_id, just
|
||||
without the friendly identity.
|
||||
"""
|
||||
peer_id = d.get("peer_id") or ""
|
||||
if not peer_id:
|
||||
# canvas_user — no peer to enrich; helper returns the plain
|
||||
# message unchanged so the canvas reply path still works.
|
||||
return d
|
||||
try:
|
||||
from a2a_client import ( # local import — avoid module-load cycle
|
||||
_agent_card_url_for,
|
||||
enrich_peer_metadata_nonblocking,
|
||||
)
|
||||
except Exception: # noqa: BLE001
|
||||
# If a2a_client is unavailable (test harness, partial install),
|
||||
# degrade gracefully — agent still gets the bare envelope.
|
||||
return d
|
||||
record = enrich_peer_metadata_nonblocking(peer_id)
|
||||
if record is not None:
|
||||
if name := record.get("name"):
|
||||
d["peer_name"] = name
|
||||
if role := record.get("role"):
|
||||
d["peer_role"] = role
|
||||
# agent_card_url is constructable from peer_id alone — surface it
|
||||
# even when registry enrichment misses, so the receiving agent has
|
||||
# a single endpoint to hit for the peer's full capability list.
|
||||
d["agent_card_url"] = _agent_card_url_for(peer_id)
|
||||
return d
|
||||
|
||||
|
||||
async def tool_inbox_peek(limit: int = 10) -> str:
|
||||
"""Return up to ``limit`` pending inbound messages without removing them."""
|
||||
import inbox # local import — avoids a circular dep at module load
|
||||
|
||||
state = inbox.get_state()
|
||||
if state is None:
|
||||
return _INBOX_NOT_ENABLED_MSG
|
||||
messages = state.peek(limit=limit if isinstance(limit, int) else 10)
|
||||
return json.dumps([_enrich_inbound_for_agent(m.to_dict()) for m in messages])
|
||||
|
||||
|
||||
async def tool_inbox_pop(activity_id: str) -> str:
|
||||
"""Remove a message from the inbox queue by activity_id."""
|
||||
import inbox
|
||||
|
||||
state = inbox.get_state()
|
||||
if state is None:
|
||||
return _INBOX_NOT_ENABLED_MSG
|
||||
if not isinstance(activity_id, str) or not activity_id:
|
||||
return "Error: activity_id is required."
|
||||
removed = state.pop(activity_id)
|
||||
if removed is None:
|
||||
return json.dumps({"removed": False, "activity_id": activity_id})
|
||||
return json.dumps({"removed": True, "activity_id": activity_id})
|
||||
|
||||
|
||||
async def tool_wait_for_message(timeout_secs: float = 60.0) -> str:
|
||||
"""Block until a new message arrives or ``timeout_secs`` elapses.
|
||||
|
||||
Returns the head message non-destructively; the agent decides
|
||||
whether to ``inbox_pop`` it after acting.
|
||||
"""
|
||||
import inbox
|
||||
|
||||
state = inbox.get_state()
|
||||
if state is None:
|
||||
return _INBOX_NOT_ENABLED_MSG
|
||||
|
||||
try:
|
||||
timeout = float(timeout_secs)
|
||||
except (TypeError, ValueError):
|
||||
timeout = 60.0
|
||||
# Cap at 300s — Claude Code's default tool timeout is ~10min, and
|
||||
# blocking longer than 5min wastes the prompt cache window for
|
||||
# nothing useful. Operators who want longer can call repeatedly.
|
||||
timeout = max(0.0, min(timeout, 300.0))
|
||||
|
||||
# The threading.Event-based wait would block the asyncio loop.
|
||||
# Run it on the default executor so the MCP server can keep
|
||||
# processing other JSON-RPC requests while we sleep.
|
||||
loop = asyncio.get_running_loop()
|
||||
message = await loop.run_in_executor(None, state.wait, timeout)
|
||||
if message is None:
|
||||
return json.dumps({"timeout": True, "timeout_secs": timeout})
|
||||
return json.dumps(_enrich_inbound_for_agent(message.to_dict()))
|
||||
@@ -1,141 +0,0 @@
|
||||
"""Memory tool handlers — single-concern slice of the a2a_tools surface.
|
||||
|
||||
Extracted from ``a2a_tools.py`` (RFC #2873 iter 4c). Owns the two
|
||||
agent-memory MCP tools:
|
||||
|
||||
* ``tool_commit_memory`` — write to the workspace's persistent memory.
|
||||
* ``tool_recall_memory`` — search the workspace's persistent memory.
|
||||
|
||||
Both go through the platform's ``/workspaces/:id/memories`` endpoint;
|
||||
the platform is the source of truth for namespace isolation + audit
|
||||
trail. Local responsibility here is RBAC enforcement BEFORE hitting
|
||||
the network so a denied operation surfaces a clear in-band error
|
||||
instead of an opaque platform 403.
|
||||
|
||||
Imports the RBAC primitives from ``a2a_tools_rbac`` (iter 4a).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
import httpx
|
||||
|
||||
from a2a_client import PLATFORM_URL, WORKSPACE_ID
|
||||
from a2a_tools_rbac import (
|
||||
auth_headers_for_heartbeat as _auth_headers_for_heartbeat,
|
||||
check_memory_read_permission as _check_memory_read_permission,
|
||||
check_memory_write_permission as _check_memory_write_permission,
|
||||
is_root_workspace as _is_root_workspace,
|
||||
)
|
||||
from builtin_tools.security import _redact_secrets
|
||||
|
||||
|
||||
async def tool_commit_memory(
|
||||
content: str,
|
||||
scope: str = "LOCAL",
|
||||
source_workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Save important information to persistent memory.
|
||||
|
||||
GLOBAL scope is writable only by root workspaces (tier == 0).
|
||||
RBAC memory.write permission is required for all scope levels.
|
||||
The source workspace_id is embedded in every record so the platform
|
||||
can enforce cross-workspace isolation and audit trail.
|
||||
|
||||
``source_workspace_id`` selects which registered workspace this
|
||||
memory belongs to when the agent is registered into multiple
|
||||
workspaces (PR-1 / multi-workspace mode). When unset, falls back
|
||||
to the module-level WORKSPACE_ID — single-workspace operators see
|
||||
no behaviour change.
|
||||
"""
|
||||
if not content:
|
||||
return "Error: content is required"
|
||||
content = _redact_secrets(content)
|
||||
scope = scope.upper()
|
||||
if scope not in ("LOCAL", "TEAM", "GLOBAL"):
|
||||
scope = "LOCAL"
|
||||
|
||||
# RBAC: require memory.write permission (mirrors builtin_tools/memory.py)
|
||||
if not _check_memory_write_permission():
|
||||
return (
|
||||
"Error: RBAC — this workspace does not have the 'memory.write' "
|
||||
"permission for this operation."
|
||||
)
|
||||
|
||||
# Scope enforcement: only root workspaces (tier 0) can write GLOBAL memory.
|
||||
# This prevents tenant workspaces from poisoning org-wide memory (GH#1610).
|
||||
if scope == "GLOBAL" and not _is_root_workspace():
|
||||
return (
|
||||
"Error: RBAC — only root workspaces (tier 0) can write to GLOBAL scope. "
|
||||
"Non-root workspaces may use LOCAL or TEAM scope."
|
||||
)
|
||||
|
||||
src = source_workspace_id or WORKSPACE_ID
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.post(
|
||||
f"{PLATFORM_URL}/workspaces/{src}/memories",
|
||||
json={
|
||||
"content": content,
|
||||
"scope": scope,
|
||||
# Embed source workspace so the platform can namespace-isolate
|
||||
# and audit cross-workspace writes (GH#1610 fix).
|
||||
"workspace_id": src,
|
||||
},
|
||||
headers=_auth_headers_for_heartbeat(src),
|
||||
)
|
||||
data = resp.json()
|
||||
if resp.status_code in (200, 201):
|
||||
return json.dumps({"success": True, "id": data.get("id"), "scope": scope})
|
||||
return f"Error: {data.get('error', resp.text)}"
|
||||
except Exception as e:
|
||||
return f"Error saving memory: {e}"
|
||||
|
||||
|
||||
async def tool_recall_memory(
|
||||
query: str = "",
|
||||
scope: str = "",
|
||||
source_workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Search persistent memory for previously saved information.
|
||||
|
||||
RBAC memory.read permission is required (mirrors builtin_tools/memory.py).
|
||||
The workspace_id is sent as a query parameter so the platform can
|
||||
cross-validate it against the auth token and defend against any future
|
||||
path traversal / cross-tenant read bugs in the platform itself.
|
||||
|
||||
``source_workspace_id`` selects which registered workspace's memories
|
||||
to search when the agent is registered into multiple workspaces.
|
||||
Unset → defaults to the module-level WORKSPACE_ID.
|
||||
"""
|
||||
# RBAC: require memory.read permission (mirrors builtin_tools/memory.py)
|
||||
if not _check_memory_read_permission():
|
||||
return (
|
||||
"Error: RBAC — this workspace does not have the 'memory.read' "
|
||||
"permission for this operation."
|
||||
)
|
||||
|
||||
src = source_workspace_id or WORKSPACE_ID
|
||||
params: dict[str, str] = {"workspace_id": src}
|
||||
if query:
|
||||
params["q"] = query
|
||||
if scope:
|
||||
params["scope"] = scope.upper()
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.get(
|
||||
f"{PLATFORM_URL}/workspaces/{src}/memories",
|
||||
params=params,
|
||||
headers=_auth_headers_for_heartbeat(src),
|
||||
)
|
||||
data = resp.json()
|
||||
if isinstance(data, list):
|
||||
if not data:
|
||||
return "No memories found."
|
||||
lines = []
|
||||
for m in data:
|
||||
lines.append(f"[{m.get('scope', '?')}] {m.get('content', '')}")
|
||||
return "\n".join(lines)
|
||||
return json.dumps(data)
|
||||
except Exception as e:
|
||||
return f"Error recalling memory: {e}"
|
||||
@@ -1,382 +0,0 @@
|
||||
"""Messaging tool handlers — single-concern slice of the a2a_tools surface.
|
||||
|
||||
Extracted from ``a2a_tools.py`` (RFC #2873 iter 4d). Owns the four
|
||||
human-and-peer messaging MCP tools + the chat-upload helper they share:
|
||||
|
||||
* ``tool_send_message_to_user`` — push a canvas-chat message via the
|
||||
platform's ``/notify`` endpoint.
|
||||
* ``tool_list_peers`` — discover peers across one or many registered
|
||||
workspaces, with side-effect of populating ``_peer_to_source`` for
|
||||
delegate-task auto-routing.
|
||||
* ``tool_get_workspace_info`` — JSON-encode the workspace's own info.
|
||||
* ``tool_chat_history`` — fetch prior conversation rows with a peer.
|
||||
* ``_upload_chat_files`` — internal helper for the message-attachments
|
||||
code path; routes local file paths through the platform's
|
||||
``/chat/uploads`` so the canvas can render them as download chips.
|
||||
|
||||
Imports the auth-header primitive from ``a2a_tools_rbac`` (iter 4a).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import mimetypes
|
||||
import os
|
||||
|
||||
import httpx
|
||||
|
||||
from a2a_client import (
|
||||
PLATFORM_URL,
|
||||
WORKSPACE_ID,
|
||||
_peer_names,
|
||||
_peer_to_source,
|
||||
get_peers_with_diagnostic,
|
||||
get_workspace_info,
|
||||
)
|
||||
from a2a_tools_rbac import auth_headers_for_heartbeat as _auth_headers_for_heartbeat
|
||||
from platform_auth import list_registered_workspaces
|
||||
|
||||
|
||||
async def _upload_chat_files(
|
||||
client: httpx.AsyncClient,
|
||||
paths: list[str],
|
||||
workspace_id: str | None = None,
|
||||
) -> tuple[list[dict], str | None]:
|
||||
"""Upload local file paths through /workspaces/<self>/chat/uploads.
|
||||
|
||||
The platform stages each upload under /workspace/.molecule/chat-uploads
|
||||
(an "allowed root" the canvas knows how to render via the Download
|
||||
endpoint) and returns metadata the broadcast payload references.
|
||||
|
||||
Why we route through upload instead of just passing the agent's path:
|
||||
the canvas's allowed-root list is /configs, /workspace, /home, /plugins
|
||||
— files at /tmp or /root would be unreachable. Uploading copies the
|
||||
bytes into an allowed root regardless of where the agent wrote them.
|
||||
|
||||
Returns (attachments, error). On any failure the caller should NOT
|
||||
fire the notify — partial-attach would surface a half-rendered chip.
|
||||
"""
|
||||
if not paths:
|
||||
return [], None
|
||||
files_payload: list[tuple[str, tuple[str, bytes, str]]] = []
|
||||
for p in paths:
|
||||
if not isinstance(p, str) or not p:
|
||||
return [], f"Error: invalid attachment path {p!r}"
|
||||
if not os.path.isfile(p):
|
||||
return [], f"Error: attachment not found: {p}"
|
||||
try:
|
||||
with open(p, "rb") as fh:
|
||||
data = fh.read()
|
||||
except OSError as e:
|
||||
return [], f"Error reading {p}: {e}"
|
||||
# Sniff mime from filename so the canvas can pick the right
|
||||
# icon / preview / inline-image renderer. Pre-fix this was
|
||||
# hardcoded application/octet-stream and chat_files.go's
|
||||
# Upload trusts whatever Content-Type the multipart part
|
||||
# carries — `mt := fh.Header.Get("Content-Type")` only falls
|
||||
# back to extension-sniffing when the header is empty. So a
|
||||
# hardcoded octet-stream meant every attachment lost its
|
||||
# real type forever, breaking the canvas chip's icon logic.
|
||||
mime_type, _ = mimetypes.guess_type(p)
|
||||
if not mime_type:
|
||||
mime_type = "application/octet-stream"
|
||||
files_payload.append(("files", (os.path.basename(p), data, mime_type)))
|
||||
target_workspace_id = (workspace_id or "").strip() or WORKSPACE_ID
|
||||
try:
|
||||
resp = await client.post(
|
||||
f"{PLATFORM_URL}/workspaces/{target_workspace_id}/chat/uploads",
|
||||
files=files_payload,
|
||||
headers=_auth_headers_for_heartbeat(target_workspace_id),
|
||||
)
|
||||
except Exception as e:
|
||||
return [], f"Error uploading attachments: {e}"
|
||||
if resp.status_code != 200:
|
||||
return [], f"Error: chat/uploads returned {resp.status_code}: {resp.text[:200]}"
|
||||
try:
|
||||
body = resp.json()
|
||||
except Exception as e:
|
||||
return [], f"Error parsing upload response: {e}"
|
||||
uploaded = body.get("files") or []
|
||||
if not isinstance(uploaded, list) or len(uploaded) != len(paths):
|
||||
return [], f"Error: upload returned {len(uploaded) if isinstance(uploaded, list) else 'invalid'} entries for {len(paths)} files"
|
||||
return uploaded, None
|
||||
|
||||
|
||||
async def tool_broadcast_message(
|
||||
message: str,
|
||||
workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Send a broadcast message to ALL agent workspaces in the org.
|
||||
|
||||
Requires the workspace to have broadcast_enabled=true (set by a user or
|
||||
admin via PATCH /workspaces/:id/abilities). Use for urgent org-wide
|
||||
signals — status changes, critical alerts, coordination instructions.
|
||||
Every non-removed workspace receives the message in its activity log so
|
||||
poll-mode agents pick it up, and push-mode canvases get a real-time
|
||||
BROADCAST_MESSAGE WebSocket event.
|
||||
|
||||
Args:
|
||||
message: The broadcast text. Keep it concise — all agents receive
|
||||
this, so avoid lengthy prose that floods every context.
|
||||
workspace_id: Optional. Which registered workspace to send the
|
||||
broadcast from. Single-workspace agents omit this.
|
||||
"""
|
||||
if not message:
|
||||
return "Error: message is required"
|
||||
target_workspace_id = (workspace_id or "").strip() or WORKSPACE_ID
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
resp = await client.post(
|
||||
f"{PLATFORM_URL}/workspaces/{target_workspace_id}/broadcast",
|
||||
json={"message": message},
|
||||
headers=_auth_headers_for_heartbeat(target_workspace_id),
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
delivered = data.get("delivered", "?")
|
||||
return f"Broadcast sent to {delivered} workspace(s)"
|
||||
if resp.status_code == 403:
|
||||
try:
|
||||
hint = resp.json().get("hint", "")
|
||||
except Exception:
|
||||
hint = ""
|
||||
return f"Error: broadcast ability not enabled.{(' ' + hint) if hint else ''}"
|
||||
return f"Error: platform returned {resp.status_code}"
|
||||
except Exception as e:
|
||||
return f"Error sending broadcast: {e}"
|
||||
|
||||
|
||||
async def tool_send_message_to_user(
|
||||
message: str,
|
||||
attachments: list[str] | None = None,
|
||||
workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Send a message directly to the user's canvas chat via WebSocket.
|
||||
|
||||
Args:
|
||||
message: The text to display in the user's chat. Required even
|
||||
when sending attachments — set to a short caption like
|
||||
"Here's the build output:" or "Done — see attached."
|
||||
attachments: Optional list of absolute file paths inside this
|
||||
container. Each is uploaded to the platform and rendered
|
||||
in the canvas as a clickable download chip. Use this
|
||||
instead of pasting paths in the message text — paths
|
||||
render as plain text and the user can't click them.
|
||||
Examples:
|
||||
attachments=["/tmp/build-output.zip"]
|
||||
attachments=["/workspace/report.pdf", "/workspace/data.csv"]
|
||||
workspace_id: Optional. When the agent is registered in MULTIPLE
|
||||
workspaces (external multi-workspace MCP path), this
|
||||
selects which workspace's chat to deliver the message to —
|
||||
should match the ``arrival_workspace_id`` of the inbound
|
||||
message you're replying to so the user sees the reply in
|
||||
the same canvas they typed in. Single-workspace agents
|
||||
omit this; the message routes to the only registered
|
||||
workspace.
|
||||
"""
|
||||
if not message:
|
||||
return "Error: message is required"
|
||||
target_workspace_id = (workspace_id or "").strip() or WORKSPACE_ID
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
uploaded, upload_err = await _upload_chat_files(
|
||||
client, attachments or [], workspace_id=target_workspace_id,
|
||||
)
|
||||
if upload_err:
|
||||
return upload_err
|
||||
payload: dict = {"message": message}
|
||||
if uploaded:
|
||||
payload["attachments"] = uploaded
|
||||
resp = await client.post(
|
||||
f"{PLATFORM_URL}/workspaces/{target_workspace_id}/notify",
|
||||
json=payload,
|
||||
headers=_auth_headers_for_heartbeat(target_workspace_id),
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
if uploaded:
|
||||
return f"Message sent to user with {len(uploaded)} attachment(s)"
|
||||
return "Message sent to user"
|
||||
if resp.status_code == 403:
|
||||
try:
|
||||
body = resp.json()
|
||||
if body.get("error") == "talk_to_user_disabled":
|
||||
hint = body.get("hint", "")
|
||||
return (
|
||||
"Error: this workspace is not allowed to send messages "
|
||||
"directly to the user (talk_to_user is disabled). "
|
||||
+ (hint + " " if hint else "")
|
||||
+ "Use delegate_task to forward your update to a parent "
|
||||
"or supervisor workspace that can reach the user."
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
return f"Error: platform returned {resp.status_code}"
|
||||
except Exception as e:
|
||||
return f"Error sending message: {e}"
|
||||
|
||||
|
||||
async def tool_list_peers(source_workspace_id: str | None = None) -> str:
|
||||
"""List all workspaces this agent can communicate with.
|
||||
|
||||
Behavior:
|
||||
- ``source_workspace_id`` set → list peers of that one workspace.
|
||||
- Unset, single-workspace mode → list peers of WORKSPACE_ID
|
||||
(the legacy path, unchanged).
|
||||
- Unset, multi-workspace mode (MOLECULE_WORKSPACES populated) →
|
||||
aggregate across every registered workspace, prefixing each
|
||||
peer with its source so the agent / user can see the full peer
|
||||
surface in one call.
|
||||
|
||||
Side-effect: populates ``_peer_to_source`` so subsequent
|
||||
``tool_delegate_task(target)`` auto-routes through the correct
|
||||
sending workspace without the agent needing ``source_workspace_id``.
|
||||
"""
|
||||
sources: list[str]
|
||||
aggregate = False
|
||||
if source_workspace_id:
|
||||
sources = [source_workspace_id]
|
||||
else:
|
||||
registered = list_registered_workspaces()
|
||||
if len(registered) > 1:
|
||||
sources = registered
|
||||
aggregate = True
|
||||
else:
|
||||
sources = [WORKSPACE_ID]
|
||||
|
||||
all_peers: list[tuple[str, dict]] = [] # (source, peer_record)
|
||||
diagnostics: list[tuple[str, str]] = [] # (source, diagnostic)
|
||||
for src in sources:
|
||||
peers, diagnostic = await get_peers_with_diagnostic(source_workspace_id=src)
|
||||
if peers:
|
||||
for p in peers:
|
||||
all_peers.append((src, p))
|
||||
elif diagnostic is not None:
|
||||
diagnostics.append((src, diagnostic))
|
||||
|
||||
if not all_peers:
|
||||
if diagnostics:
|
||||
joined = "; ".join(f"[{src[:8]}] {d}" for src, d in diagnostics)
|
||||
return f"No peers found. {joined}"
|
||||
return (
|
||||
"You have no peers in the platform registry. "
|
||||
"(No parent, no children, no siblings registered.)"
|
||||
)
|
||||
|
||||
lines = []
|
||||
for src, p in all_peers:
|
||||
status = p.get("status", "unknown")
|
||||
role = p.get("role", "")
|
||||
peer_id = p["id"]
|
||||
# Cache name for use in delegate_task
|
||||
_peer_names[peer_id] = p["name"]
|
||||
# Cache the source workspace so tool_delegate_task auto-routes
|
||||
_peer_to_source[peer_id] = src
|
||||
if aggregate:
|
||||
lines.append(
|
||||
f"- {p['name']} (ID: {peer_id}, status: {status}, role: {role}, via: {src[:8]})"
|
||||
)
|
||||
else:
|
||||
lines.append(f"- {p['name']} (ID: {peer_id}, status: {status}, role: {role})")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
async def tool_get_workspace_info(source_workspace_id: str | None = None) -> str:
|
||||
"""Get this workspace's own info.
|
||||
|
||||
``source_workspace_id`` selects which registered workspace to
|
||||
introspect when the agent is registered into multiple workspaces.
|
||||
Unset → falls back to module-level WORKSPACE_ID.
|
||||
"""
|
||||
info = await get_workspace_info(source_workspace_id=source_workspace_id)
|
||||
return json.dumps(info, indent=2)
|
||||
|
||||
|
||||
async def tool_chat_history(
|
||||
peer_id: str,
|
||||
limit: int = 20,
|
||||
before_ts: str = "",
|
||||
source_workspace_id: str | None = None,
|
||||
) -> str:
|
||||
"""Fetch the prior conversation with one peer.
|
||||
|
||||
Hits ``/workspaces/<self>/activity?peer_id=<peer>&limit=<N>``
|
||||
against the workspace-server, which returns activity rows where
|
||||
the peer is either the sender (``source_id=peer`` — they sent us
|
||||
the message) or the recipient (``target_id=peer`` — we sent to
|
||||
them) of an A2A turn — both sides of the conversation in
|
||||
chronological order.
|
||||
|
||||
Args:
|
||||
peer_id: The other workspace's UUID. Same value the agent
|
||||
sees as ``peer_id`` on a peer_agent push or ``workspace_id``
|
||||
on a delegate_task call.
|
||||
limit: Maximum rows to return; capped server-side at 500. The
|
||||
default of 20 covers "most recent context for this peer"
|
||||
without flooding the agent's context window.
|
||||
before_ts: Optional RFC3339 timestamp; only rows strictly
|
||||
older are returned. Used to page backward through long
|
||||
histories — pass the oldest ``ts`` from the previous
|
||||
response. Empty (default) returns the most recent ``limit``
|
||||
rows.
|
||||
source_workspace_id: Which registered workspace's activity log
|
||||
to query. Auto-routes via ``_peer_to_source`` cache when
|
||||
unset (the workspace this peer was discovered through);
|
||||
falls back to module-level WORKSPACE_ID for single-workspace
|
||||
operators.
|
||||
|
||||
Returns a JSON-encoded list of activity rows (or an error string
|
||||
starting with ``Error:`` so the agent can branch). Each row carries
|
||||
``activity_type``, ``source_id``, ``target_id``, ``method``,
|
||||
``summary``, ``request_body``, ``response_body``, ``status``,
|
||||
``created_at`` — same shape ``inbox_peek`` and the canvas chat
|
||||
loader already see.
|
||||
"""
|
||||
if not peer_id or not isinstance(peer_id, str):
|
||||
return "Error: peer_id is required"
|
||||
if not isinstance(limit, int) or limit <= 0:
|
||||
limit = 20
|
||||
if limit > 500:
|
||||
limit = 500
|
||||
|
||||
src = source_workspace_id or _peer_to_source.get(peer_id) or WORKSPACE_ID
|
||||
|
||||
params: dict[str, str] = {
|
||||
"peer_id": peer_id,
|
||||
"limit": str(limit),
|
||||
}
|
||||
# Forward verbatim — the server route validates as RFC3339 at the
|
||||
# trust boundary and translates into a `created_at < $X` clause.
|
||||
if before_ts:
|
||||
params["before_ts"] = before_ts
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
resp = await client.get(
|
||||
f"{PLATFORM_URL}/workspaces/{src}/activity",
|
||||
params=params,
|
||||
headers=_auth_headers_for_heartbeat(src),
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
return f"Error: chat_history request failed: {exc}"
|
||||
|
||||
if resp.status_code == 400:
|
||||
# Trust-boundary rejection (malformed peer_id, etc.) — surface
|
||||
# the server's reason verbatim so the agent can correct itself.
|
||||
try:
|
||||
err = resp.json().get("error", "bad request")
|
||||
except Exception: # noqa: BLE001
|
||||
err = "bad request"
|
||||
return f"Error: {err}"
|
||||
if resp.status_code >= 400:
|
||||
return f"Error: chat_history returned HTTP {resp.status_code}"
|
||||
|
||||
try:
|
||||
rows = resp.json()
|
||||
except Exception: # noqa: BLE001
|
||||
return "Error: chat_history response was not JSON"
|
||||
if not isinstance(rows, list):
|
||||
return "Error: chat_history response was not a list"
|
||||
|
||||
# Server returns DESC (most recent first); reverse to chronological
|
||||
# so the agent reads the conversation top-down like a chat log.
|
||||
rows.reverse()
|
||||
return json.dumps(rows)
|
||||
@@ -1,138 +0,0 @@
|
||||
"""RBAC + auth-header helpers shared by all a2a_tools tool handlers.
|
||||
|
||||
Extracted from ``a2a_tools.py`` (RFC #2873 iter 4a). Centralises the
|
||||
"what can this workspace do" + "how do I prove it on a platform call"
|
||||
concerns into a single module so:
|
||||
|
||||
* Future tools added under ``a2a_tools/`` see one obvious helper to
|
||||
call instead of re-implementing the role/tier check.
|
||||
* The role-permission table is in ONE place — adding a new role
|
||||
or capability touches one file, not every tool that gates on it.
|
||||
* Tests targeting these helpers don't have to import the whole
|
||||
991-LOC ``a2a_tools`` surface.
|
||||
|
||||
Public surface:
|
||||
|
||||
* ``ROLE_PERMISSIONS`` — canonical role → action set table.
|
||||
* ``get_workspace_tier()`` — config-resolved tier (0 = root).
|
||||
* ``check_memory_write_permission()`` — boolean.
|
||||
* ``check_memory_read_permission()`` — boolean.
|
||||
* ``is_root_workspace()`` — boolean (tier == 0).
|
||||
* ``auth_headers_for_heartbeat(workspace_id=None)`` — auth-header dict
|
||||
with the multi-workspace registry lookup; tolerates ``platform_auth``
|
||||
missing on older installs (returns ``{}``).
|
||||
|
||||
Underscore-prefixed back-compat aliases (``_ROLE_PERMISSIONS``,
|
||||
``_check_memory_write_permission``, etc.) match the names previously
|
||||
exposed in ``a2a_tools`` so existing tests'
|
||||
``patch("a2a_tools._foo", ...)`` continue to work via the re-exports
|
||||
in ``a2a_tools.py``.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
|
||||
# Mirror ``builtin_tools/audit.py`` for a2a_tools isolation. Listed as a
|
||||
# module-level constant rather than computed lazily so the table is
|
||||
# discoverable in static analysis + ``grep``.
|
||||
ROLE_PERMISSIONS: dict[str, set[str]] = {
|
||||
"admin": {"delegate", "approve", "memory.read", "memory.write"},
|
||||
"operator": {"delegate", "approve", "memory.read", "memory.write"},
|
||||
"read-only": {"memory.read"},
|
||||
"no-delegation": {"approve", "memory.read", "memory.write"},
|
||||
"no-approval": {"delegate", "memory.read", "memory.write"},
|
||||
"memory-readonly": {"memory.read"},
|
||||
}
|
||||
|
||||
|
||||
def get_workspace_tier() -> int:
|
||||
"""Return the workspace tier from config (0 = root, 1+ = tenant)."""
|
||||
try:
|
||||
from config import load_config
|
||||
|
||||
cfg = load_config()
|
||||
return getattr(cfg, "tier", 1)
|
||||
except Exception:
|
||||
return int(os.environ.get("WORKSPACE_TIER", 1))
|
||||
|
||||
|
||||
def _resolve_role_state() -> tuple[list[str], dict]:
|
||||
"""Return (roles, allowed_actions) from config.
|
||||
|
||||
Fail-closed: if config is unavailable, fall back to an "operator"
|
||||
default with no per-role overrides. Operator has memory.read +
|
||||
memory.write but not the elevated approve/delegate over GLOBAL
|
||||
scope, so a config outage doesn't grant unexpected privileges.
|
||||
"""
|
||||
try:
|
||||
from config import load_config
|
||||
|
||||
cfg = load_config()
|
||||
roles = list(getattr(cfg, "rbac", None).roles or ["operator"])
|
||||
allowed = dict(getattr(cfg, "rbac", None).allowed_actions or {})
|
||||
return roles, allowed
|
||||
except Exception:
|
||||
return ["operator"], {}
|
||||
|
||||
|
||||
def check_memory_write_permission() -> bool:
|
||||
"""Return True if this workspace's RBAC roles grant memory.write."""
|
||||
roles, allowed = _resolve_role_state()
|
||||
for role in roles:
|
||||
if role == "admin":
|
||||
return True
|
||||
if role in allowed:
|
||||
if "memory.write" in allowed[role]:
|
||||
return True
|
||||
elif role in ROLE_PERMISSIONS and "memory.write" in ROLE_PERMISSIONS[role]:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def check_memory_read_permission() -> bool:
|
||||
"""Return True if this workspace's RBAC roles grant memory.read."""
|
||||
roles, allowed = _resolve_role_state()
|
||||
for role in roles:
|
||||
if role == "admin":
|
||||
return True
|
||||
if role in allowed:
|
||||
if "memory.read" in allowed[role]:
|
||||
return True
|
||||
elif role in ROLE_PERMISSIONS and "memory.read" in ROLE_PERMISSIONS[role]:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def is_root_workspace() -> bool:
|
||||
"""Return True if this workspace is tier 0 (root/root-org)."""
|
||||
return get_workspace_tier() == 0
|
||||
|
||||
|
||||
def auth_headers_for_heartbeat(workspace_id: str | None = None) -> dict[str, str]:
|
||||
"""Return Phase 30.1 auth headers; tolerate platform_auth being absent
|
||||
in older installs (e.g. during rolling upgrade).
|
||||
|
||||
``workspace_id`` selects the per-workspace token from the multi-
|
||||
workspace registry when set (PR-1: external agent registered in
|
||||
multiple workspaces). With no arg the legacy single-token path is
|
||||
unchanged.
|
||||
"""
|
||||
try:
|
||||
from platform_auth import auth_headers
|
||||
return auth_headers(workspace_id) if workspace_id else auth_headers()
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
|
||||
# ============== Back-compat aliases for the previous a2a_tools names ==============
|
||||
# Tests + downstream call sites refer to the pre-extract names; aliasing
|
||||
# keeps both forms valid. The new public names (no underscore prefix)
|
||||
# are preferred for new code.
|
||||
|
||||
_ROLE_PERMISSIONS = ROLE_PERMISSIONS
|
||||
_get_workspace_tier = get_workspace_tier
|
||||
_check_memory_write_permission = check_memory_write_permission
|
||||
_check_memory_read_permission = check_memory_read_permission
|
||||
_is_root_workspace = is_root_workspace
|
||||
_auth_headers_for_heartbeat = auth_headers_for_heartbeat
|
||||
@@ -1,597 +0,0 @@
|
||||
"""Base adapter interface for agent infrastructure providers."""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import Mapping
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Provider routing — type alias + resolver used by individual adapters.
|
||||
# Each adapter defines its own ProviderRegistry with the providers it accepts.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Maps prefix → (ordered_auth_env_vars, default_base_url).
|
||||
ProviderRegistry = dict[str, tuple[tuple[str, ...], str]]
|
||||
|
||||
|
||||
def resolve_provider_routing(
|
||||
model_str: str,
|
||||
env: Mapping[str, str],
|
||||
*,
|
||||
registry: ProviderRegistry,
|
||||
runtime_config: dict[str, Any] | None = None,
|
||||
) -> tuple[str, str, str]:
|
||||
"""Resolve a ``provider:model`` string to ``(api_key, base_url, bare_model_id)``.
|
||||
|
||||
URL precedence (highest to lowest):
|
||||
1. ``<PREFIX>_BASE_URL`` env var
|
||||
2. ``runtime_config["provider_url"]``
|
||||
3. registry default for the prefix
|
||||
|
||||
Unknown prefixes fall back to OPENAI_API_KEY + api.openai.com.
|
||||
Raises RuntimeError when no API key env var is set for the prefix.
|
||||
"""
|
||||
if ":" in model_str:
|
||||
prefix, model_id = model_str.split(":", 1)
|
||||
else:
|
||||
prefix, model_id = "openai", model_str
|
||||
|
||||
env_vars, default_url = registry.get(
|
||||
prefix, (("OPENAI_API_KEY",), "https://api.openai.com/v1")
|
||||
)
|
||||
api_key = next((env[v] for v in env_vars if env.get(v)), "")
|
||||
if not api_key:
|
||||
raise RuntimeError(
|
||||
f"No API key found for provider {prefix!r} "
|
||||
f"(checked: {', '.join(env_vars)}). Set one in workspace secrets."
|
||||
)
|
||||
|
||||
env_url = env.get(f"{prefix.upper()}_BASE_URL", "")
|
||||
config_url = (runtime_config or {}).get("provider_url", "")
|
||||
base_url = env_url or config_url or default_url
|
||||
|
||||
return api_key, base_url, model_id
|
||||
|
||||
from a2a.server.agent_execution import AgentExecutor
|
||||
|
||||
from event_log import DisabledEventLog, EventLogBackend
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Shared no-op default for adapter.event_log. Safe to share across
|
||||
# adapters because every DisabledEventLog method is a pure no-op with
|
||||
# no per-instance state.
|
||||
_DISABLED_EVENT_LOG: EventLogBackend = DisabledEventLog()
|
||||
|
||||
|
||||
@dataclass
|
||||
class SetupResult:
|
||||
"""Result from the shared _common_setup() pipeline."""
|
||||
system_prompt: str
|
||||
loaded_skills: list # LoadedSkill instances
|
||||
langchain_tools: list # LangChain BaseTool instances
|
||||
is_coordinator: bool
|
||||
children: list # child workspace dicts
|
||||
|
||||
|
||||
@dataclass
|
||||
class AdapterConfig:
|
||||
"""Standardized config passed to every adapter."""
|
||||
model: str # e.g. "anthropic:claude-sonnet-4-6" or "openrouter:google/gemini-2.5-flash"
|
||||
system_prompt: str | None = None # Assembled system prompt text
|
||||
tools: list[str] = field(default_factory=list) # Tool names from config.yaml
|
||||
runtime_config: dict[str, Any] = field(default_factory=dict) # Raw runtime_config block
|
||||
config_path: str = "/configs" # Path to configs directory
|
||||
workspace_id: str = "" # Workspace identifier
|
||||
prompt_files: list[str] = field(default_factory=list) # Ordered prompt file names
|
||||
a2a_port: int = 8000 # Port for A2A server
|
||||
heartbeat: Any = None # HeartbeatLoop instance
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RuntimeCapabilities:
|
||||
"""Adapter-declared ownership of cross-cutting platform capabilities.
|
||||
|
||||
The platform provides FALLBACK implementations of heartbeat, cron,
|
||||
durable session, etc. When a runtime SDK provides one of these
|
||||
natively (e.g. claude-code's streaming session model, hermes-agent's
|
||||
sidecar lifecycle), the adapter sets the corresponding flag to True.
|
||||
The platform reads these flags and skips its fallback for that
|
||||
capability — the adapter is responsible instead.
|
||||
|
||||
Observability is NEVER skipped: A2A protocol, activity_logs, and the
|
||||
broadcaster always run regardless of who owns the capability. These
|
||||
flags only switch WHO IMPLEMENTS the behavior, not whether the
|
||||
platform sees it.
|
||||
|
||||
All defaults are False so introducing this dataclass is a no-op:
|
||||
every existing adapter inherits BaseAdapter.capabilities() which
|
||||
returns RuntimeCapabilities() with everything off, matching today's
|
||||
"platform does it all" behavior. Each capability gets a platform-
|
||||
side consumer in a follow-up PR; this class is the foundation.
|
||||
|
||||
See project memory `project_runtime_native_pluggable.md` for the
|
||||
architecture principle these flags encode.
|
||||
"""
|
||||
# Heartbeat — adapter sends its own keep-alive signal to the platform's
|
||||
# broadcaster instead of relying on workspace/heartbeat.py's 30s loop.
|
||||
# Set True when the SDK already maintains a long-lived session that
|
||||
# produces natural progress events (e.g. claude-code streaming).
|
||||
provides_native_heartbeat: bool = False
|
||||
|
||||
# Cron / schedule — adapter handles scheduled triggers internally
|
||||
# (Temporal workflows, Durable Functions, sidecar daemons). Platform
|
||||
# scheduler skips polling workspace_schedules for this workspace,
|
||||
# avoiding double-fire on restart.
|
||||
provides_native_scheduler: bool = False
|
||||
|
||||
# Durable session — adapter persists in-flight session state across
|
||||
# restarts and exposes it via pre_stop_state/restore_state. When True,
|
||||
# the platform's a2a_queue does not need to enqueue mid-session
|
||||
# requests; the adapter handles QUEUED-state on its own.
|
||||
provides_native_session: bool = False
|
||||
|
||||
# Status lifecycle — adapter reports its own ready/degraded/failed
|
||||
# state (e.g. via heartbeat metadata). Platform respects the adapter
|
||||
# report instead of inferring status from heartbeat error rate.
|
||||
provides_native_status_mgmt: bool = False
|
||||
|
||||
# Retry — adapter handles transient errors (rate limits, 5xx) with
|
||||
# its own backoff. Platform stops re-dispatching A2A requests that
|
||||
# the adapter explicitly marked as "retrying internally".
|
||||
provides_native_retry: bool = False
|
||||
|
||||
# Activity log decoration — adapter contributes runtime-specific
|
||||
# fields (model, token_count, latency breakdown) into activity_log
|
||||
# rows alongside the platform-defined columns.
|
||||
provides_activity_decoration: bool = False
|
||||
|
||||
# Channel dispatch — adapter sends to external channels (Slack,
|
||||
# Lark, etc.) directly instead of routing through platform channels
|
||||
# manager. Used when the SDK has built-in channel integrations.
|
||||
provides_channel_dispatch: bool = False
|
||||
|
||||
def to_dict(self) -> dict[str, bool]:
|
||||
"""Serializable shape for the heartbeat payload + /capabilities
|
||||
endpoint. Plain dict avoids leaking dataclass internals to Go."""
|
||||
return {
|
||||
"heartbeat": self.provides_native_heartbeat,
|
||||
"scheduler": self.provides_native_scheduler,
|
||||
"session": self.provides_native_session,
|
||||
"status_mgmt": self.provides_native_status_mgmt,
|
||||
"retry": self.provides_native_retry,
|
||||
"activity_decoration": self.provides_activity_decoration,
|
||||
"channel_dispatch": self.provides_channel_dispatch,
|
||||
}
|
||||
|
||||
|
||||
class BaseAdapter(ABC):
|
||||
"""Interface every agent infrastructure adapter must implement.
|
||||
|
||||
To add a new agent infra:
|
||||
1. Create a standalone template repo (molecule-ai-workspace-template-<infra>)
|
||||
2. Implement adapter.py with a class extending BaseAdapter
|
||||
3. Add requirements.txt with your infra's dependencies + molecule-runtime
|
||||
4. Set ADAPTER_MODULE in the Dockerfile to your adapter module path
|
||||
|
||||
Cross-cutting capabilities your adapter can opt into:
|
||||
- capabilities() — declare native ownership of heartbeat, scheduler,
|
||||
session, status mgmt, etc. (see RuntimeCapabilities above)
|
||||
- idle_timeout_override() — extend the platform's per-dispatch
|
||||
silence window for SDKs with long synth turns
|
||||
- runtime_wedge.mark_wedged() / clear_wedge() — flip the workspace
|
||||
to `degraded` + auto-recover when your SDK hits a non-recoverable
|
||||
error class. Import directly from `runtime_wedge`; the heartbeat
|
||||
forwards the state to the platform automatically. See the
|
||||
runtime_wedge module docstring for the integration recipe.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def name() -> str: # pragma: no cover
|
||||
"""Return the runtime identifier (e.g. 'langgraph', 'crewai').
|
||||
This must match the 'runtime' field in config.yaml."""
|
||||
...
|
||||
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def display_name() -> str: # pragma: no cover
|
||||
"""Human-readable name for UI display."""
|
||||
...
|
||||
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def description() -> str: # pragma: no cover
|
||||
"""Short description of what this adapter provides."""
|
||||
...
|
||||
|
||||
@staticmethod
|
||||
def get_config_schema() -> dict:
|
||||
"""Return JSON Schema for runtime_config fields this adapter supports.
|
||||
Used by the Config tab UI to render the right form fields.
|
||||
Override in subclasses for adapter-specific settings."""
|
||||
return {}
|
||||
|
||||
def capabilities(self) -> "RuntimeCapabilities":
|
||||
"""Declare which cross-cutting capabilities this adapter owns
|
||||
natively vs delegates to platform fallback.
|
||||
|
||||
Default returns RuntimeCapabilities() — every flag False, meaning
|
||||
the platform owns everything (today's behavior). Adapters override
|
||||
to declare native ownership; e.g. claude-code's adapter returns
|
||||
RuntimeCapabilities(provides_native_heartbeat=True,
|
||||
provides_native_session=True).
|
||||
|
||||
Subsequent platform-side consumers (idle-timeout override,
|
||||
scheduler skip, etc.) read this and route accordingly. See
|
||||
project memory `project_runtime_native_pluggable.md`."""
|
||||
return RuntimeCapabilities()
|
||||
|
||||
def idle_timeout_override(self) -> int | None:
|
||||
"""Per-A2A-dispatch silence window override, in SECONDS.
|
||||
|
||||
Return None to use the platform default (env var
|
||||
A2A_IDLE_TIMEOUT_SECONDS, falling back to 5 minutes — see
|
||||
a2a_proxy.go:defaultIdleTimeoutDuration). Override when this
|
||||
runtime's SDK can legitimately go silent longer than the
|
||||
default before the dispatch should be considered wedged.
|
||||
|
||||
Why this is per-adapter, not just env: the env value is a
|
||||
cluster-wide knob set by ops. Different SDKs have different
|
||||
latency profiles — claude-code synthesis on Opus + tool use
|
||||
legitimately runs 8-10 min between broadcasts; hermes synth
|
||||
with custom providers can be even slower. Hardcoding 5min for
|
||||
everyone either cancels real work (claude-code synth) or
|
||||
leaves wedged runtimes (langgraph) hanging too long.
|
||||
|
||||
Platform reads this from the heartbeat payload and stashes
|
||||
it per-workspace; dispatchA2A consults it before applying the
|
||||
idle timer. None / unset / zero falls through to the global
|
||||
default — same behavior as before this hook landed."""
|
||||
return None
|
||||
|
||||
@property
|
||||
def event_log(self) -> EventLogBackend:
|
||||
"""Pluggable in-process event-log backend.
|
||||
|
||||
Adapters MAY call ``self.event_log.append(kind=..., payload=...)``
|
||||
to record runtime-internal events (tool dispatch, skill load,
|
||||
executor errors, peer-handoff). Readers query the buffer via
|
||||
the platform's ``/workspaces/:id/activity`` endpoint with a
|
||||
cursor — see ``event_log.py`` for the protocol.
|
||||
|
||||
Default: shared ``DisabledEventLog`` no-op, so adapters that
|
||||
never set this still link cleanly. ``main.py`` overrides at boot
|
||||
from the ``observability.event_log`` config block."""
|
||||
return getattr(self, "_event_log", None) or _DISABLED_EVENT_LOG
|
||||
|
||||
@event_log.setter
|
||||
def event_log(self, backend: EventLogBackend) -> None:
|
||||
self._event_log = backend
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Plugin install hooks
|
||||
# ------------------------------------------------------------------
|
||||
# New pipeline: each plugin ships per-runtime adaptors resolved via
|
||||
# `plugins_registry.resolve()`. Adapters expose hooks below that
|
||||
# adaptors call to wire plugin content into the runtime.
|
||||
#
|
||||
# Default implementations are filesystem-only (write to /configs,
|
||||
# append to CLAUDE.md). Runtimes with a dynamic tool registry
|
||||
# (e.g. DeepAgents sub-agents) override the hooks to also register
|
||||
# in-process state.
|
||||
|
||||
def memory_filename(self) -> str:
|
||||
"""File under /configs that the runtime treats as long-lived memory.
|
||||
|
||||
Both Claude Code and DeepAgents read CLAUDE.md natively, so this is
|
||||
the sensible default. Override only if a runtime expects a different
|
||||
filename.
|
||||
"""
|
||||
return "CLAUDE.md"
|
||||
|
||||
def register_tool_hook(self, name: str, fn) -> None:
|
||||
"""Default no-op. Override on runtimes with a dynamic tool registry.
|
||||
|
||||
Runtimes that pick tools up at startup via filesystem scan (Claude
|
||||
Code reads /configs/skills, LangGraph globs **/*.py) don't need to
|
||||
do anything here — the adaptor's file-write step is enough.
|
||||
"""
|
||||
return None
|
||||
|
||||
async def transcript_lines(self, since: int = 0, limit: int = 100) -> dict:
|
||||
"""Return live transcript entries for the most-recent agent session.
|
||||
|
||||
Default implementation returns ``supported: False`` for runtimes
|
||||
that don't expose a per-session log on disk. Override in subclasses
|
||||
that DO (Claude Code reads ``~/.claude/projects/<cwd>/<session>.jsonl``).
|
||||
|
||||
This is the "look over the agent's shoulder" feature — lets canvas /
|
||||
operators see live tool calls + AI thinking instead of waiting for
|
||||
the high-level activity log to flush.
|
||||
|
||||
Args:
|
||||
since: line offset to skip — caller's last cursor (0 = from start)
|
||||
limit: max lines to return (caller-side cap, default 100, max 1000)
|
||||
|
||||
Returns:
|
||||
``{runtime, supported, lines, cursor, more, source}`` where
|
||||
``cursor`` is the new offset to pass on the next poll, ``more``
|
||||
is True if additional lines remain past ``limit``, and ``source``
|
||||
is the file path lines were read from (useful for debugging).
|
||||
"""
|
||||
return {
|
||||
"runtime": self.name(),
|
||||
"supported": False,
|
||||
"lines": [],
|
||||
"cursor": since,
|
||||
"more": False,
|
||||
"source": None,
|
||||
}
|
||||
|
||||
def pre_stop_state(self) -> dict:
|
||||
"""Capture in-memory state for pause/resume serialization.
|
||||
|
||||
Called by main.py's shutdown handler just before the container exits.
|
||||
Returns a dict that will be scrubbed (via lib.snapshot_scrub) and
|
||||
written to /configs/.agent_snapshot.json.
|
||||
|
||||
Default implementation:
|
||||
1. Attempts to read ``self._executor._session_id`` (set by
|
||||
create_executor) and includes it as ``session_id``.
|
||||
2. Includes up to 200 recent transcript lines via transcript_lines().
|
||||
|
||||
Override in adapters that hold additional in-memory state that
|
||||
should survive a container stop.
|
||||
|
||||
Returns:
|
||||
A JSON-serializable dict. All string values are scrubbed before
|
||||
persisting, so it is safe to include raw content from the
|
||||
agent's context.
|
||||
"""
|
||||
from lib.pre_stop import MAX_TRANSCRIPT_LINES
|
||||
|
||||
state: dict = {}
|
||||
|
||||
# Session handle — critical for resuming the Claude Code session.
|
||||
executor = getattr(self, "_executor", None)
|
||||
if executor is not None:
|
||||
session_id = getattr(executor, "_session_id", None)
|
||||
if session_id:
|
||||
state["session_id"] = session_id
|
||||
|
||||
# Recent conversation log — captures where the agent left off.
|
||||
# transcript_lines() may be async; call it synchronously if possible,
|
||||
# otherwise let async adapters override pre_stop_state entirely.
|
||||
try:
|
||||
import inspect as _inspect
|
||||
transcript_fn = self.transcript_lines
|
||||
if _inspect.iscoroutinefunction(transcript_fn):
|
||||
# Async adapter — override pre_stop_state() for transcript access.
|
||||
# The base impl still captures session_id above.
|
||||
pass
|
||||
else:
|
||||
transcript = transcript_fn(since=0, limit=MAX_TRANSCRIPT_LINES)
|
||||
if transcript.get("supported"):
|
||||
state["transcript_lines"] = transcript.get("lines", [])
|
||||
except Exception:
|
||||
# Best-effort: never let transcript capture failure block serialization.
|
||||
pass
|
||||
|
||||
return state
|
||||
|
||||
def restore_state(self, snapshot: dict) -> None:
|
||||
"""Restore in-memory state from a pause/resume snapshot.
|
||||
|
||||
Called by main.py on first boot when /configs/.agent_snapshot.json
|
||||
exists. Gives the adapter a chance to restore session handles,
|
||||
conversation context, or any other in-memory state before the A2A
|
||||
server starts accepting requests.
|
||||
|
||||
Default implementation stores ``snapshot["session_id"]`` and
|
||||
``snapshot["transcript_lines"]`` as ``self._snapshot_session_id``
|
||||
and ``self._snapshot_transcript`` so that ``create_executor()`` or
|
||||
the executor itself can pick them up.
|
||||
|
||||
Args:
|
||||
snapshot: The scrubbed snapshot dict previously written by
|
||||
pre_stop_state(). All secrets have already been redacted.
|
||||
"""
|
||||
self._snapshot_session_id: str | None = snapshot.get("session_id")
|
||||
self._snapshot_transcript: list | None = snapshot.get("transcript_lines")
|
||||
|
||||
def register_subagent_hook(self, name: str, spec: dict) -> None:
|
||||
"""Default no-op. DeepAgents overrides to register a sub-agent."""
|
||||
return None
|
||||
|
||||
def append_to_memory_hook(self, config: AdapterConfig, filename: str, content: str) -> None:
|
||||
"""Append text to /configs/<filename> if the marker isn't already present.
|
||||
|
||||
Idempotent: looks for the first line of `content` as a marker so a
|
||||
re-install doesn't duplicate the block. Adaptors should pass content
|
||||
beginning with a unique header (e.g. ``# Plugin: molecule-dev-conventions``).
|
||||
"""
|
||||
import os
|
||||
target = os.path.join(config.config_path, filename)
|
||||
marker = content.splitlines()[0].strip() if content else ""
|
||||
existing = ""
|
||||
if os.path.exists(target):
|
||||
with open(target) as f:
|
||||
existing = f.read()
|
||||
if marker and marker in existing:
|
||||
logger.info("append_to_memory: %s already contains %r — skipping", filename, marker)
|
||||
return
|
||||
os.makedirs(os.path.dirname(target) or ".", exist_ok=True)
|
||||
with open(target, "a") as f:
|
||||
if existing and not existing.endswith("\n"):
|
||||
f.write("\n")
|
||||
f.write(content if content.endswith("\n") else content + "\n")
|
||||
logger.info("append_to_memory: appended %d chars to %s", len(content), filename)
|
||||
|
||||
async def install_plugins_via_registry(
|
||||
self,
|
||||
config: AdapterConfig,
|
||||
plugins,
|
||||
) -> list:
|
||||
"""Drive the new per-runtime adaptor pipeline for every loaded plugin.
|
||||
|
||||
For each plugin in `plugins.plugins`, resolve the adaptor for this
|
||||
runtime (via :func:`plugins_registry.resolve`) and invoke
|
||||
``install(ctx)``. Returns the list of :class:`InstallResult` so
|
||||
callers can surface warnings (e.g. raw-drop fallback hits).
|
||||
|
||||
Adapters whose runtime supports the new pipeline call this from
|
||||
``setup()`` instead of the legacy ``inject_plugins()``.
|
||||
"""
|
||||
from pathlib import Path
|
||||
from plugins_registry import InstallContext, resolve
|
||||
|
||||
results = []
|
||||
runtime = self.name().replace("-", "_") # e.g. "claude-code" -> "claude_code"
|
||||
|
||||
for plugin in plugins.plugins:
|
||||
adaptor, source = resolve(plugin.name, runtime, Path(plugin.path))
|
||||
ctx = InstallContext(
|
||||
configs_dir=Path(config.config_path),
|
||||
workspace_id=config.workspace_id,
|
||||
runtime=runtime,
|
||||
plugin_root=Path(plugin.path),
|
||||
memory_filename=self.memory_filename(),
|
||||
register_tool=self.register_tool_hook,
|
||||
register_subagent=self.register_subagent_hook,
|
||||
append_to_memory=lambda fn, c, _cfg=config: self.append_to_memory_hook(_cfg, fn, c),
|
||||
)
|
||||
try:
|
||||
result = await adaptor.install(ctx)
|
||||
results.append(result)
|
||||
logger.info(
|
||||
"Plugin %s installed via %s adaptor (warnings: %d)",
|
||||
plugin.name, source, len(result.warnings),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.exception("Plugin %s install via %s failed: %s", plugin.name, source, exc)
|
||||
|
||||
return results
|
||||
|
||||
async def inject_plugins(self, config: AdapterConfig, plugins) -> None:
|
||||
"""Legacy hook — kept for backwards compatibility during migration.
|
||||
|
||||
Default: drive the new per-runtime adaptor pipeline. Adapters not yet
|
||||
migrated may still override this with their own logic.
|
||||
"""
|
||||
await self.install_plugins_via_registry(config, plugins)
|
||||
|
||||
async def _common_setup(self, config: AdapterConfig) -> SetupResult:
|
||||
"""Shared setup pipeline — loads plugins, skills, tools, coordinator, and builds system prompt.
|
||||
|
||||
All adapters can call this to get the full platform feature set.
|
||||
Returns a SetupResult with LangChain BaseTool instances that adapters
|
||||
convert to their native format if needed.
|
||||
"""
|
||||
from plugins import load_plugins
|
||||
from skill_loader.loader import load_skills
|
||||
from coordinator import get_children, build_children_description
|
||||
from prompt import build_system_prompt, get_peer_capabilities, get_platform_instructions
|
||||
from builtin_tools.approval import request_approval
|
||||
from builtin_tools.delegation import delegate_task, delegate_task_async, check_task_status
|
||||
from builtin_tools.memory import commit_memory, recall_memory
|
||||
from builtin_tools.sandbox import run_code
|
||||
|
||||
platform_url = os.environ.get("PLATFORM_URL", "http://host.docker.internal:8080")
|
||||
|
||||
# Load plugins from per-workspace dir first, then shared fallback
|
||||
workspace_plugins_dir = os.path.join(config.config_path, "plugins")
|
||||
plugins = load_plugins(
|
||||
workspace_plugins_dir=workspace_plugins_dir,
|
||||
shared_plugins_dir=os.environ.get("PLUGINS_DIR", "/plugins"),
|
||||
)
|
||||
await self.inject_plugins(config, plugins)
|
||||
if plugins.plugin_names:
|
||||
logger.info(f"Plugins: {', '.join(plugins.plugin_names)}")
|
||||
|
||||
# Load skills (workspace + plugin skills, deduped). Pass the runtime
|
||||
# name so SKILL.md frontmatter `runtime: [...]` can opt skills out
|
||||
# of incompatible adapters (hermes won't load claude-code-only
|
||||
# skills, etc.).
|
||||
runtime_name = type(self).name()
|
||||
loaded_skills = load_skills(config.config_path, config.tools, current_runtime=runtime_name)
|
||||
seen_skill_ids = {s.metadata.id for s in loaded_skills}
|
||||
for plugin_skills_dir in plugins.skill_dirs:
|
||||
plugin_skill_names = [
|
||||
d for d in os.listdir(plugin_skills_dir)
|
||||
if os.path.isdir(os.path.join(plugin_skills_dir, d))
|
||||
]
|
||||
for skill in load_skills(plugin_skills_dir, plugin_skill_names, current_runtime=runtime_name):
|
||||
if skill.metadata.id not in seen_skill_ids:
|
||||
loaded_skills.append(skill)
|
||||
seen_skill_ids.add(skill.metadata.id)
|
||||
logger.info(f"Loaded {len(loaded_skills)} skills: {[s.metadata.id for s in loaded_skills]}")
|
||||
|
||||
# Core platform tools — names mirror the platform_tools registry,
|
||||
# so the names referenced in get_a2a_instructions/get_hma_instructions
|
||||
# are guaranteed to exist as @tool symbols here. The structural
|
||||
# alignment test in tests/test_platform_tools.py pins this.
|
||||
all_tools = [
|
||||
delegate_task, delegate_task_async, check_task_status,
|
||||
request_approval, commit_memory, recall_memory, run_code,
|
||||
]
|
||||
for skill in loaded_skills:
|
||||
all_tools.extend(skill.tools)
|
||||
|
||||
# Coordinator mode: detect children and add routing tool
|
||||
children = await get_children()
|
||||
is_coordinator = len(children) > 0
|
||||
if is_coordinator:
|
||||
from coordinator import route_task_to_team
|
||||
logger.info(f"Coordinator mode: {len(children)} children")
|
||||
all_tools.append(route_task_to_team)
|
||||
|
||||
# Build system prompt with all context. Parent→child knowledge sharing
|
||||
# was previously handled by `shared_context` (parent's config.yaml file
|
||||
# paths injected into the child's prompt at boot). That path was removed
|
||||
# — agents now pull team-scoped knowledge via memory v2's team:<id>
|
||||
# namespace (recall_memory) on demand instead of paying for it on every
|
||||
# boot regardless of need. See RFC #2789 for the future shared-file
|
||||
# storage that complements this for large blob-shaped artefacts.
|
||||
peers = await get_peer_capabilities(platform_url, config.workspace_id)
|
||||
platform_instructions = await get_platform_instructions(platform_url, config.workspace_id)
|
||||
coordinator_prompt = build_children_description(children) if is_coordinator else ""
|
||||
extra_prompts = list(plugins.prompt_fragments)
|
||||
if coordinator_prompt:
|
||||
extra_prompts.append(coordinator_prompt)
|
||||
|
||||
system_prompt = build_system_prompt(
|
||||
config.config_path, config.workspace_id, loaded_skills, peers,
|
||||
prompt_files=config.prompt_files,
|
||||
plugin_rules=plugins.rules,
|
||||
plugin_prompts=extra_prompts,
|
||||
platform_instructions=platform_instructions,
|
||||
)
|
||||
|
||||
return SetupResult(
|
||||
system_prompt=system_prompt,
|
||||
loaded_skills=loaded_skills,
|
||||
langchain_tools=all_tools,
|
||||
is_coordinator=is_coordinator,
|
||||
children=children,
|
||||
)
|
||||
|
||||
@abstractmethod
|
||||
async def setup(self, config: AdapterConfig) -> None:
|
||||
"""One-time setup: validate config, prepare internal state.
|
||||
Called after deps are installed but before create_executor().
|
||||
Raise RuntimeError if setup fails (missing deps, bad config, etc.)."""
|
||||
... # pragma: no cover
|
||||
|
||||
@abstractmethod
|
||||
async def create_executor(self, config: AdapterConfig) -> AgentExecutor:
|
||||
"""Create and return an AgentExecutor ready for A2A integration.
|
||||
The returned executor's execute() method will be called by the
|
||||
A2A server's DefaultRequestHandler.
|
||||
|
||||
Subclasses should also store the returned executor as ``self._executor``
|
||||
so ``pre_stop_state()`` can access it for serialization.
|
||||
"""
|
||||
... # pragma: no cover
|
||||
@@ -1,22 +0,0 @@
|
||||
"""Adapter registry shim.
|
||||
|
||||
Adapters extracted to standalone repos (molecule-ai-workspace-template-*).
|
||||
ADAPTER_MODULE env var is the primary discovery mechanism in production.
|
||||
This shim provides backward-compatible imports for local dev + tests.
|
||||
"""
|
||||
import importlib
|
||||
import os
|
||||
import logging
|
||||
from adapter_base import BaseAdapter, AdapterConfig
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def get_adapter(runtime: str) -> type[BaseAdapter]:
|
||||
adapter_module = os.environ.get("ADAPTER_MODULE")
|
||||
if adapter_module:
|
||||
mod = importlib.import_module(adapter_module)
|
||||
return getattr(mod, "Adapter")
|
||||
raise KeyError(
|
||||
f"No ADAPTER_MODULE set for runtime '{runtime}'. "
|
||||
"Adapters now live in standalone template repos."
|
||||
)
|
||||
@@ -1,2 +0,0 @@
|
||||
"""Re-export from adapter_base for backward compat."""
|
||||
from adapter_base import * # noqa: F401,F403
|
||||
@@ -1,130 +0,0 @@
|
||||
# Google ADK Adapter
|
||||
|
||||
Molecule AI workspace adapter for [Google Agent Development Kit (ADK)](https://github.com/google/adk-python) — Google's official multi-agent Python SDK (~19k ⭐, Apache-2.0).
|
||||
|
||||
## Overview
|
||||
|
||||
This adapter bridges the A2A protocol used by the Molecule AI platform to Google ADK's runner/session model. Agents are backed by Google Gemini models via AI Studio or Vertex AI. Each workspace gets an `LlmAgent` wrapped in a `Runner` with an `InMemorySessionService`; sessions are tied to A2A task context IDs for stable, isolated per-conversation state.
|
||||
|
||||
**Runtime key:** `google-adk`
|
||||
|
||||
## Installation
|
||||
|
||||
The adapter dependencies are installed automatically by `entrypoint.sh` from this directory's `requirements.txt`:
|
||||
|
||||
```bash
|
||||
pip install -r adapters/google-adk/requirements.txt
|
||||
```
|
||||
|
||||
You'll also need a Google API key (AI Studio) or Vertex AI credentials.
|
||||
|
||||
## Configuration
|
||||
|
||||
### `config.yaml`
|
||||
|
||||
```yaml
|
||||
runtime: google-adk
|
||||
model: google:gemini-2.0-flash # or gemini-1.5-pro, gemini-2.5-flash, etc.
|
||||
runtime_config:
|
||||
agent_name: my-agent # optional, default: molecule-adk-agent
|
||||
max_output_tokens: 8192 # optional, default: 8192
|
||||
temperature: 1.0 # optional, default: 1.0
|
||||
```
|
||||
|
||||
### Environment Variables
|
||||
|
||||
| Variable | Required | Description |
|
||||
|----------|----------|-------------|
|
||||
| `GOOGLE_API_KEY` | Yes (unless Vertex AI) | Google AI Studio API key |
|
||||
| `GOOGLE_GENAI_USE_VERTEXAI` | No | Set to `"1"` to use Vertex AI instead of AI Studio |
|
||||
| `GOOGLE_CLOUD_PROJECT` | When using Vertex AI | GCP project ID |
|
||||
| `GOOGLE_CLOUD_LOCATION` | When using Vertex AI | GCP region, e.g. `"us-central1"` |
|
||||
|
||||
## Usage Example
|
||||
|
||||
```python
|
||||
import asyncio
|
||||
from adapter_base import AdapterConfig
|
||||
from adapters.google_adk.adapter import GoogleADKAdapter
|
||||
|
||||
async def main():
|
||||
config = AdapterConfig(
|
||||
model="google:gemini-2.0-flash",
|
||||
system_prompt="You are a helpful assistant.",
|
||||
runtime_config={
|
||||
"agent_name": "demo-agent",
|
||||
"max_output_tokens": 1024,
|
||||
"temperature": 0.7,
|
||||
},
|
||||
workspace_id="ws-demo",
|
||||
)
|
||||
|
||||
adapter = GoogleADKAdapter()
|
||||
await adapter.setup(config) # validates keys, loads plugins/skills
|
||||
|
||||
executor = await adapter.create_executor(config) # returns GoogleADKA2AExecutor
|
||||
# executor.execute(context, event_queue) is called by the A2A server per turn
|
||||
print(f"Adapter: {adapter.display_name()} — model {config.model}")
|
||||
|
||||
asyncio.run(main())
|
||||
```
|
||||
|
||||
### Running via A2A
|
||||
|
||||
Once the workspace is provisioned, send A2A messages as normal:
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:8000 \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{
|
||||
"method": "message/send",
|
||||
"params": {
|
||||
"message": {
|
||||
"role": "user",
|
||||
"parts": [{"kind": "text", "text": "What is 2 + 2?"}]
|
||||
}
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
## Supported Models
|
||||
|
||||
Any model supported by Google ADK and available through your credential path:
|
||||
|
||||
| Model | Notes |
|
||||
|-------|-------|
|
||||
| `gemini-2.0-flash` | Recommended — fast, cost-effective |
|
||||
| `gemini-2.5-flash` | Latest preview, strong reasoning |
|
||||
| `gemini-1.5-pro` | Higher capability, higher latency |
|
||||
| `gemini-1.5-flash` | Fast, lower cost |
|
||||
|
||||
Use the `google:` prefix in `config.yaml` — the adapter strips it before passing the model name to ADK.
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
A2A Request
|
||||
│
|
||||
▼
|
||||
GoogleADKA2AExecutor.execute()
|
||||
│
|
||||
├── extract_message_text() ← shared_runtime helper
|
||||
├── _ensure_session() ← create/reuse InMemorySessionService session
|
||||
├── _build_content() ← wrap text in google.genai.types.Content
|
||||
│
|
||||
▼
|
||||
runner.run_async(session_id, user_id, new_message)
|
||||
│
|
||||
▼
|
||||
ADK Event stream → filter is_final_response() → extract text
|
||||
│
|
||||
▼
|
||||
event_queue.enqueue_event(new_agent_text_message(reply))
|
||||
│
|
||||
▼
|
||||
A2A Response
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
Apache-2.0 — same as [google/adk-python](https://github.com/google/adk-python).
|
||||
@@ -1,408 +0,0 @@
|
||||
"""Google ADK adapter for Molecule AI workspace runtime.
|
||||
|
||||
Wraps Google's Agent Development Kit (google-adk v1.x) as a Molecule AI
|
||||
WorkspaceAdapter, bridging the A2A protocol to Google ADK's runner/session
|
||||
model.
|
||||
|
||||
Google ADK concepts used
|
||||
------------------------
|
||||
- ``google.adk.agents.LlmAgent`` — An LLM-backed agent with instructions and
|
||||
optional tools. Declared with ``model``, ``name``, and ``instruction``.
|
||||
- ``google.adk.runners.Runner`` — Drives one or more agents inside a session;
|
||||
``run_async()`` streams ``Event`` objects, including the final response text.
|
||||
- ``google.adk.sessions.InMemorySessionService`` — Manages session state in
|
||||
memory. Each ``Runner`` owns a single ``InMemorySessionService`` instance.
|
||||
|
||||
Runtime-config keys (all optional)
|
||||
------------------------------------
|
||||
``max_output_tokens`` — int, default 8192. Forwarded to the ADK ``GenerateContentConfig``.
|
||||
``temperature`` — float, default 1.0.
|
||||
``agent_name`` — str, default ``"molecule-adk-agent"``.
|
||||
|
||||
Environment variables
|
||||
---------------------
|
||||
``GOOGLE_API_KEY`` — Google AI Studio key (required for ``gemini-*`` models).
|
||||
``GOOGLE_GENAI_USE_VERTEXAI`` — set to ``"1"`` to use Vertex AI instead of AI
|
||||
Studio. In that case supply
|
||||
``GOOGLE_CLOUD_PROJECT`` and
|
||||
``GOOGLE_CLOUD_LOCATION`` as well.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from a2a.server.agent_execution import AgentExecutor, RequestContext
|
||||
from a2a.server.events import EventQueue
|
||||
from a2a.helpers import new_text_message
|
||||
|
||||
from adapter_base import AdapterConfig, BaseAdapter
|
||||
|
||||
# Import sanitize_agent_error from the workspace package. The adapter lives
|
||||
# in the workspace/adapters/ hierarchy so the workspace package root is
|
||||
# always importable as long as the module is loaded from within a workspace.
|
||||
# In standalone template repos, this import resolves via the workspace package
|
||||
# entry point that also provides adapter_base.
|
||||
try:
|
||||
from executor_helpers import sanitize_agent_error # type: ignore[attr-defined]
|
||||
except ImportError: # pragma: no cover
|
||||
sanitize_agent_error = None # fallback: below handler falls back to class-name only
|
||||
|
||||
if TYPE_CHECKING:
|
||||
pass
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_DEFAULT_AGENT_NAME = "molecule-adk-agent"
|
||||
_DEFAULT_MAX_OUTPUT_TOKENS = 8192
|
||||
_DEFAULT_TEMPERATURE = 1.0
|
||||
_NO_TEXT_MSG = "Error: message contained no text content."
|
||||
_NO_RESPONSE_MSG = "(no response generated)"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GoogleADKA2AExecutor
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class GoogleADKA2AExecutor(AgentExecutor):
|
||||
"""A2A executor backed by a Google ADK ``Runner``.
|
||||
|
||||
Each executor instance owns a single ``Runner`` and ``InMemorySessionService``.
|
||||
Sessions are created on first use and reused across subsequent turns
|
||||
(the session_id is derived from the A2A context_id so each task gets a
|
||||
stable, isolated session).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
model:
|
||||
ADK model identifier, e.g. ``"gemini-2.0-flash"`` or
|
||||
``"gemini-1.5-pro"``.
|
||||
system_prompt:
|
||||
Optional instruction prepended to every conversation. Passed to
|
||||
``LlmAgent(instruction=...)``.
|
||||
agent_name:
|
||||
Internal ADK agent name. Defaults to ``_DEFAULT_AGENT_NAME``.
|
||||
max_output_tokens:
|
||||
Token cap forwarded to ``GenerateContentConfig``.
|
||||
temperature:
|
||||
Sampling temperature forwarded to ``GenerateContentConfig``.
|
||||
heartbeat:
|
||||
Optional ``HeartbeatLoop`` instance (unused directly but stored for
|
||||
future heartbeat integration).
|
||||
_runner:
|
||||
Inject a pre-built ``Runner`` — for testing only. When provided,
|
||||
the real ADK ``Runner`` is never constructed.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model: str,
|
||||
system_prompt: str | None = None,
|
||||
agent_name: str = _DEFAULT_AGENT_NAME,
|
||||
max_output_tokens: int = _DEFAULT_MAX_OUTPUT_TOKENS,
|
||||
temperature: float = _DEFAULT_TEMPERATURE,
|
||||
heartbeat: Any = None,
|
||||
_runner: Any = None,
|
||||
) -> None:
|
||||
self.model = model
|
||||
self.system_prompt = system_prompt
|
||||
self.agent_name = agent_name
|
||||
self.max_output_tokens = max_output_tokens
|
||||
self.temperature = temperature
|
||||
self._heartbeat = heartbeat
|
||||
self._sessions_created: set[str] = set()
|
||||
|
||||
if _runner is not None:
|
||||
# Test injection — skip building the real ADK objects.
|
||||
self._runner = _runner
|
||||
else:
|
||||
self._runner = self._build_runner()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internal helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _build_runner(self) -> Any: # pragma: no cover — requires real ADK
|
||||
"""Construct a Google ADK ``Runner`` with an ``LlmAgent``.
|
||||
|
||||
Lazy-imports ``google.adk`` so the rest of the workspace runtime
|
||||
doesn't pull in google-adk on startup (it's only needed when this
|
||||
executor is actually instantiated by ``GoogleADKAdapter.create_executor``).
|
||||
"""
|
||||
from google.adk.agents import LlmAgent
|
||||
from google.adk.runners import Runner
|
||||
from google.adk.sessions import InMemorySessionService
|
||||
|
||||
agent = LlmAgent(
|
||||
name=self.agent_name,
|
||||
model=self.model,
|
||||
instruction=self.system_prompt or "",
|
||||
)
|
||||
|
||||
session_service = InMemorySessionService()
|
||||
runner = Runner(
|
||||
agent=agent,
|
||||
app_name=self.agent_name,
|
||||
session_service=session_service,
|
||||
)
|
||||
return runner
|
||||
|
||||
async def _ensure_session(self, session_id: str, user_id: str) -> None:
|
||||
"""Create a session in the service if it doesn't exist yet."""
|
||||
if session_id in self._sessions_created:
|
||||
return
|
||||
session_service = self._runner.session_service
|
||||
existing = await session_service.get_session(
|
||||
app_name=self.agent_name,
|
||||
user_id=user_id,
|
||||
session_id=session_id,
|
||||
)
|
||||
if existing is None:
|
||||
await session_service.create_session(
|
||||
app_name=self.agent_name,
|
||||
user_id=user_id,
|
||||
session_id=session_id,
|
||||
)
|
||||
self._sessions_created.add(session_id)
|
||||
|
||||
def _extract_text(self, context: RequestContext) -> str:
|
||||
"""Pull plain text out of the A2A message parts."""
|
||||
from shared_runtime import extract_message_text
|
||||
return extract_message_text(context)
|
||||
|
||||
def _build_content(self, user_text: str) -> Any:
|
||||
"""Wrap user text in an ADK-compatible ``Content`` object."""
|
||||
from google.genai.types import Content, Part
|
||||
return Content(role="user", parts=[Part(text=user_text)])
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# AgentExecutor interface
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def execute(self, context: RequestContext, event_queue: EventQueue) -> None:
|
||||
"""Run a single ADK turn and enqueue the reply as an A2A Message.
|
||||
|
||||
Sequence:
|
||||
1. Extract user text from A2A message parts.
|
||||
2. Ensure an ADK session exists for this context_id.
|
||||
3. Call ``runner.run_async()`` and collect all response events.
|
||||
4. Concatenate final-response text; fall back to ``_NO_RESPONSE_MSG``
|
||||
when the model produces no output.
|
||||
5. Enqueue the reply via ``event_queue``.
|
||||
"""
|
||||
user_text = self._extract_text(context)
|
||||
if not user_text:
|
||||
parts = getattr(getattr(context, "message", None), "parts", None)
|
||||
logger.warning("GoogleADKA2AExecutor: no text in message parts: %s", parts)
|
||||
await event_queue.enqueue_event(new_text_message(_NO_TEXT_MSG))
|
||||
return
|
||||
|
||||
session_id = getattr(context, "context_id", None) or "default-session"
|
||||
user_id = "molecule-user"
|
||||
|
||||
try:
|
||||
await self._ensure_session(session_id, user_id)
|
||||
|
||||
content = self._build_content(user_text)
|
||||
response_parts: list[str] = []
|
||||
|
||||
async for event in self._runner.run_async(
|
||||
session_id=session_id,
|
||||
user_id=user_id,
|
||||
new_message=content,
|
||||
):
|
||||
# Collect text from final-response events
|
||||
if not getattr(event, "is_final_response", lambda: False)():
|
||||
continue
|
||||
candidate_response = getattr(event, "response", None)
|
||||
if candidate_response is None:
|
||||
continue
|
||||
for part in getattr(
|
||||
getattr(candidate_response, "content", None) or MissingContent(),
|
||||
"parts", []
|
||||
):
|
||||
text = getattr(part, "text", None)
|
||||
if text:
|
||||
response_parts.append(text)
|
||||
|
||||
final_text = "".join(response_parts).strip() or _NO_RESPONSE_MSG
|
||||
await event_queue.enqueue_event(new_text_message(final_text))
|
||||
|
||||
except Exception as exc:
|
||||
logger.error(
|
||||
"GoogleADKA2AExecutor: execution error [model=%s]: %s",
|
||||
self.model,
|
||||
type(exc).__name__,
|
||||
exc_info=True,
|
||||
)
|
||||
# Include exception detail (first ~1 KB) in the A2A error response so
|
||||
# callers get actionable context without needing workspace log access.
|
||||
# sanitize_agent_error scrubs API keys / bearer tokens before including
|
||||
# content in the response. Falls back to class-name-only when
|
||||
# the function is unavailable (standalone template repo layout).
|
||||
if sanitize_agent_error is not None:
|
||||
msg = sanitize_agent_error(stderr=str(exc))
|
||||
else:
|
||||
msg = f"Agent error: {type(exc).__name__}"
|
||||
await event_queue.enqueue_event(new_text_message(msg))
|
||||
|
||||
async def cancel(self, context: RequestContext, event_queue: EventQueue) -> None:
|
||||
"""Cancel a running task — emits canceled state per A2A protocol."""
|
||||
from a2a.types import TaskState, TaskStatus, TaskStatusUpdateEvent
|
||||
|
||||
await event_queue.enqueue_event(
|
||||
TaskStatusUpdateEvent(
|
||||
status=TaskStatus(state=TaskState.TASK_STATE_CANCELED),
|
||||
final=True,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
class MissingContent:
|
||||
"""Sentinel to avoid AttributeError when response.content is None."""
|
||||
parts: list = []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GoogleADKAdapter
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class GoogleADKAdapter(BaseAdapter):
|
||||
"""Molecule AI workspace adapter for Google ADK (google-adk v1.x).
|
||||
|
||||
Implements the full ``BaseAdapter`` lifecycle:
|
||||
- ``setup()`` — validates config and runs ``_common_setup()``.
|
||||
- ``create_executor()`` — returns a ``GoogleADKA2AExecutor`` configured
|
||||
from ``AdapterConfig``.
|
||||
"""
|
||||
|
||||
# Stored by setup(); consumed by create_executor()
|
||||
_setup_result: Any = None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Identity
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def name() -> str:
|
||||
"""Runtime identifier — matches the ``runtime`` field in config.yaml."""
|
||||
return "google-adk"
|
||||
|
||||
@staticmethod
|
||||
def display_name() -> str:
|
||||
"""Human-readable name shown in the Molecule AI UI."""
|
||||
return "Google ADK"
|
||||
|
||||
@staticmethod
|
||||
def description() -> str:
|
||||
"""Short description of this adapter's capabilities."""
|
||||
return (
|
||||
"Google Agent Development Kit (ADK) adapter. "
|
||||
"Runs LLM agents via Google Gemini models using the official "
|
||||
"google-adk Python SDK (Apache-2.0)."
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def get_config_schema() -> dict:
|
||||
"""JSON Schema for runtime_config fields rendered in the Config tab."""
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"agent_name": {
|
||||
"type": "string",
|
||||
"default": _DEFAULT_AGENT_NAME,
|
||||
"description": "Internal ADK agent name",
|
||||
},
|
||||
"max_output_tokens": {
|
||||
"type": "integer",
|
||||
"default": _DEFAULT_MAX_OUTPUT_TOKENS,
|
||||
"description": "Maximum output tokens for the Gemini model",
|
||||
},
|
||||
"temperature": {
|
||||
"type": "number",
|
||||
"default": _DEFAULT_TEMPERATURE,
|
||||
"minimum": 0.0,
|
||||
"maximum": 2.0,
|
||||
"description": "Sampling temperature",
|
||||
},
|
||||
},
|
||||
"additionalProperties": False,
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Lifecycle
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def setup(self, config: AdapterConfig) -> None:
|
||||
"""Validate config and run the shared platform setup pipeline.
|
||||
|
||||
Raises ``RuntimeError`` if the required API key is not set and
|
||||
Vertex AI mode is not active.
|
||||
|
||||
Args:
|
||||
config: ``AdapterConfig`` populated by the workspace runtime.
|
||||
"""
|
||||
use_vertex = os.environ.get("GOOGLE_GENAI_USE_VERTEXAI", "").strip() in ("1", "true", "True")
|
||||
api_key = os.environ.get("GOOGLE_API_KEY", "").strip()
|
||||
|
||||
if not use_vertex and not api_key:
|
||||
raise RuntimeError(
|
||||
"GoogleADKAdapter requires GOOGLE_API_KEY (for AI Studio) or "
|
||||
"GOOGLE_GENAI_USE_VERTEXAI=1 with GOOGLE_CLOUD_PROJECT set."
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"GoogleADKAdapter.setup: model=%s vertex=%s", config.model, use_vertex
|
||||
)
|
||||
|
||||
self._setup_result = await self._common_setup(config)
|
||||
|
||||
async def create_executor(self, config: AdapterConfig) -> GoogleADKA2AExecutor:
|
||||
"""Build and return a ``GoogleADKA2AExecutor`` for A2A integration.
|
||||
|
||||
Uses the system prompt assembled by ``_common_setup()`` in ``setup()``.
|
||||
Runtime-config keys ``agent_name``, ``max_output_tokens``, and
|
||||
``temperature`` are respected when present.
|
||||
|
||||
Args:
|
||||
config: ``AdapterConfig`` populated by the workspace runtime.
|
||||
|
||||
Returns:
|
||||
A ready-to-use ``GoogleADKA2AExecutor`` instance.
|
||||
"""
|
||||
rc = config.runtime_config or {}
|
||||
|
||||
# Strip provider prefix from model, e.g. "google:gemini-2.0-flash" → "gemini-2.0-flash"
|
||||
model = config.model
|
||||
if ":" in model:
|
||||
model = model.split(":", 1)[1]
|
||||
|
||||
system_prompt = (
|
||||
self._setup_result.system_prompt
|
||||
if self._setup_result is not None
|
||||
else config.system_prompt or ""
|
||||
)
|
||||
|
||||
return GoogleADKA2AExecutor(
|
||||
model=model,
|
||||
system_prompt=system_prompt,
|
||||
agent_name=rc.get("agent_name", _DEFAULT_AGENT_NAME),
|
||||
max_output_tokens=int(rc.get("max_output_tokens", _DEFAULT_MAX_OUTPUT_TOKENS)),
|
||||
temperature=float(rc.get("temperature", _DEFAULT_TEMPERATURE)),
|
||||
heartbeat=config.heartbeat,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Module-level alias required by the adapter autodiscovery loader
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
Adapter = GoogleADKAdapter
|
||||
@@ -1,7 +0,0 @@
|
||||
# Google ADK adapter dependencies
|
||||
# Pin to the latest stable release — update when a new version is verified.
|
||||
google-adk==1.30.0
|
||||
|
||||
# google-adk transitively requires google-genai; pin explicitly for
|
||||
# reproducibility (same pinning convention as other adapter requirements.txt).
|
||||
google-genai>=1.16.0
|
||||
@@ -1,993 +0,0 @@
|
||||
"""Unit tests for adapters/google-adk/adapter.py.
|
||||
|
||||
Coverage targets (100%)
|
||||
-----------------------
|
||||
- Module constants: _DEFAULT_AGENT_NAME, _DEFAULT_MAX_OUTPUT_TOKENS, etc.
|
||||
- MissingContent sentinel class
|
||||
- GoogleADKA2AExecutor.__init__ — field assignment + runner injection
|
||||
- GoogleADKA2AExecutor._extract_text
|
||||
- GoogleADKA2AExecutor._build_content
|
||||
- GoogleADKA2AExecutor._ensure_session — first call (create), subsequent call (skip)
|
||||
- GoogleADKA2AExecutor.execute — happy path, empty input, API error,
|
||||
no final_response events, partial text
|
||||
- GoogleADKA2AExecutor.cancel — TaskStatusUpdateEvent emitted
|
||||
- GoogleADKAdapter.name / display_name / description / get_config_schema
|
||||
- GoogleADKAdapter.setup — success, missing key, vertex override
|
||||
- GoogleADKAdapter.create_executor — model stripping, defaults, rc overrides
|
||||
- Adapter alias
|
||||
|
||||
All google-adk, google-genai, and shared_runtime calls are mocked.
|
||||
No live API calls are made.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from types import ModuleType
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stub heavy external modules BEFORE the adapter is imported.
|
||||
# conftest.py already stubs: a2a, builtin_tools, langchain_core.
|
||||
# We need to additionally stub: google.adk, google.genai, shared_runtime.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _make_a2a_stubs() -> None:
|
||||
"""Register minimal a2a SDK stubs in sys.modules.
|
||||
|
||||
Mirrors what workspace/tests/conftest.py does; needed because
|
||||
this test file lives outside the ``tests/`` directory and conftest.py
|
||||
is not automatically loaded for it.
|
||||
"""
|
||||
if "a2a" in sys.modules:
|
||||
# Already mocked by conftest — just ensure new_agent_text_message is passthrough
|
||||
a2a_utils = sys.modules.get("a2a.utils")
|
||||
if a2a_utils and callable(getattr(a2a_utils, "new_agent_text_message", None)):
|
||||
a2a_utils.new_agent_text_message = lambda text, **kwargs: text
|
||||
return
|
||||
|
||||
agent_execution_mod = ModuleType("a2a.server.agent_execution")
|
||||
|
||||
class AgentExecutor:
|
||||
pass
|
||||
|
||||
class RequestContext:
|
||||
pass
|
||||
|
||||
agent_execution_mod.AgentExecutor = AgentExecutor
|
||||
agent_execution_mod.RequestContext = RequestContext
|
||||
|
||||
events_mod = ModuleType("a2a.server.events")
|
||||
|
||||
class EventQueue:
|
||||
pass
|
||||
|
||||
events_mod.EventQueue = EventQueue
|
||||
|
||||
tasks_mod = ModuleType("a2a.server.tasks")
|
||||
types_mod = ModuleType("a2a.types")
|
||||
|
||||
class Part:
|
||||
# v1: Part takes text= directly; root= retained for compat during transition
|
||||
def __init__(self, text=None, root=None, **kwargs):
|
||||
self.text = text
|
||||
|
||||
types_mod.Part = Part
|
||||
|
||||
# a2a.helpers (v1: moved from a2a.utils)
|
||||
helpers_mod = ModuleType("a2a.helpers")
|
||||
# Passthrough so tests can assert on the plain text string, matching the
|
||||
# hermes_executor test convention from conftest.py.
|
||||
helpers_mod.new_agent_text_message = lambda text, **kwargs: text
|
||||
|
||||
a2a_mod = ModuleType("a2a")
|
||||
a2a_server_mod = ModuleType("a2a.server")
|
||||
|
||||
sys.modules["a2a"] = a2a_mod
|
||||
sys.modules["a2a.server"] = a2a_server_mod
|
||||
sys.modules["a2a.server.agent_execution"] = agent_execution_mod
|
||||
sys.modules["a2a.server.events"] = events_mod
|
||||
sys.modules["a2a.server.tasks"] = tasks_mod
|
||||
sys.modules["a2a.types"] = types_mod
|
||||
sys.modules["a2a.helpers"] = helpers_mod
|
||||
|
||||
|
||||
def _make_google_adk_stubs() -> None:
|
||||
"""Register minimal google.adk and google.genai stubs in sys.modules."""
|
||||
# google (top-level namespace package)
|
||||
google_mod = sys.modules.get("google") or ModuleType("google")
|
||||
google_mod.__path__ = []
|
||||
sys.modules.setdefault("google", google_mod)
|
||||
|
||||
# google.genai
|
||||
google_genai_mod = ModuleType("google.genai")
|
||||
google_genai_mod.__path__ = []
|
||||
|
||||
google_genai_types_mod = ModuleType("google.genai.types")
|
||||
|
||||
class _Content:
|
||||
def __init__(self, role="user", parts=None):
|
||||
self.role = role
|
||||
self.parts = parts or []
|
||||
|
||||
class _Part:
|
||||
def __init__(self, text=""):
|
||||
self.text = text
|
||||
|
||||
google_genai_types_mod.Content = _Content
|
||||
google_genai_types_mod.Part = _Part
|
||||
|
||||
sys.modules["google.genai"] = google_genai_mod
|
||||
sys.modules["google.genai.types"] = google_genai_types_mod
|
||||
|
||||
# google.adk
|
||||
google_adk_mod = ModuleType("google.adk")
|
||||
google_adk_mod.__path__ = []
|
||||
|
||||
# google.adk.agents
|
||||
google_adk_agents_mod = ModuleType("google.adk.agents")
|
||||
|
||||
class _LlmAgent:
|
||||
def __init__(self, name="", model="", instruction="", tools=None):
|
||||
self.name = name
|
||||
self.model = model
|
||||
self.instruction = instruction
|
||||
self.tools = tools or []
|
||||
|
||||
google_adk_agents_mod.LlmAgent = _LlmAgent
|
||||
|
||||
# google.adk.runners
|
||||
google_adk_runners_mod = ModuleType("google.adk.runners")
|
||||
|
||||
class _Runner:
|
||||
def __init__(self, agent=None, app_name="", session_service=None):
|
||||
self.agent = agent
|
||||
self.app_name = app_name
|
||||
self.session_service = session_service
|
||||
|
||||
async def run_async(self, session_id, user_id, new_message):
|
||||
# Stub — tests override this via mock runner
|
||||
return
|
||||
yield # make it an async generator
|
||||
|
||||
google_adk_runners_mod.Runner = _Runner
|
||||
|
||||
# google.adk.sessions
|
||||
google_adk_sessions_mod = ModuleType("google.adk.sessions")
|
||||
|
||||
class _InMemorySessionService:
|
||||
def __init__(self):
|
||||
self._sessions: dict = {}
|
||||
|
||||
async def get_session(self, app_name, user_id, session_id):
|
||||
return self._sessions.get((app_name, user_id, session_id))
|
||||
|
||||
async def create_session(self, app_name, user_id, session_id):
|
||||
self._sessions[(app_name, user_id, session_id)] = {"id": session_id}
|
||||
return self._sessions[(app_name, user_id, session_id)]
|
||||
|
||||
google_adk_sessions_mod.InMemorySessionService = _InMemorySessionService
|
||||
|
||||
sys.modules["google.adk"] = google_adk_mod
|
||||
sys.modules["google.adk.agents"] = google_adk_agents_mod
|
||||
sys.modules["google.adk.runners"] = google_adk_runners_mod
|
||||
sys.modules["google.adk.sessions"] = google_adk_sessions_mod
|
||||
|
||||
|
||||
def _make_shared_runtime_stub() -> None:
|
||||
"""Register shared_runtime stub with extract_message_text."""
|
||||
if "shared_runtime" not in sys.modules:
|
||||
mod = ModuleType("shared_runtime")
|
||||
|
||||
def _extract_message_text(ctx) -> str:
|
||||
parts = getattr(getattr(ctx, "message", None), "parts", None)
|
||||
if parts is None:
|
||||
parts = ctx
|
||||
texts = []
|
||||
for p in parts or []:
|
||||
t = getattr(p, "text", None) or getattr(
|
||||
getattr(p, "root", None), "text", None
|
||||
) or ""
|
||||
if t:
|
||||
texts.append(t)
|
||||
return " ".join(texts).strip()
|
||||
|
||||
mod.extract_message_text = _extract_message_text
|
||||
sys.modules["shared_runtime"] = mod
|
||||
|
||||
|
||||
def _make_adapter_base_stub() -> None:
|
||||
"""Register adapter_base stub in sys.modules."""
|
||||
if "adapter_base" not in sys.modules:
|
||||
mod = ModuleType("adapter_base")
|
||||
from dataclasses import dataclass, field
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
@dataclass
|
||||
class AdapterConfig:
|
||||
model: str = "google:gemini-2.0-flash"
|
||||
system_prompt: str | None = None
|
||||
tools: list = field(default_factory=list)
|
||||
runtime_config: dict = field(default_factory=dict)
|
||||
config_path: str = "/configs"
|
||||
workspace_id: str = ""
|
||||
prompt_files: list = field(default_factory=list)
|
||||
a2a_port: int = 8000
|
||||
heartbeat: object = None
|
||||
|
||||
class BaseAdapter(ABC):
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def name() -> str: ... # pragma: no cover
|
||||
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def display_name() -> str: ... # pragma: no cover
|
||||
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def description() -> str: ... # pragma: no cover
|
||||
|
||||
@staticmethod
|
||||
def get_config_schema() -> dict:
|
||||
return {}
|
||||
|
||||
def memory_filename(self) -> str:
|
||||
return "CLAUDE.md"
|
||||
|
||||
def register_tool_hook(self, name, fn): return None # noqa
|
||||
|
||||
async def transcript_lines(self, since=0, limit=100): return {"supported": False} # noqa
|
||||
|
||||
def register_subagent_hook(self, name, spec): return None # noqa
|
||||
|
||||
def append_to_memory_hook(self, config, filename, content): pass # noqa
|
||||
|
||||
async def install_plugins_via_registry(self, config, plugins): return [] # noqa
|
||||
|
||||
async def inject_plugins(self, config, plugins):
|
||||
await self.install_plugins_via_registry(config, plugins)
|
||||
|
||||
async def _common_setup(self, config):
|
||||
from types import SimpleNamespace
|
||||
return SimpleNamespace(
|
||||
system_prompt="mocked system prompt",
|
||||
loaded_skills=[],
|
||||
langchain_tools=[],
|
||||
is_coordinator=False,
|
||||
children=[],
|
||||
)
|
||||
|
||||
@abstractmethod
|
||||
async def setup(self, config) -> None: ... # pragma: no cover
|
||||
|
||||
@abstractmethod
|
||||
async def create_executor(self, config): ... # pragma: no cover
|
||||
|
||||
mod.AdapterConfig = AdapterConfig
|
||||
mod.BaseAdapter = BaseAdapter
|
||||
mod.SetupResult = None
|
||||
sys.modules["adapter_base"] = mod
|
||||
|
||||
|
||||
# Install all stubs before importing the module under test
|
||||
# Order matters: a2a must be stubbed before adapter.py is imported so that
|
||||
# `from a2a.utils import new_agent_text_message` resolves to the passthrough.
|
||||
_make_a2a_stubs()
|
||||
_make_google_adk_stubs()
|
||||
_make_shared_runtime_stub()
|
||||
_make_adapter_base_stub()
|
||||
|
||||
# Now safe to import the adapter
|
||||
import sys as _sys
|
||||
import os as _os
|
||||
_adapter_dir = _os.path.dirname(_os.path.abspath(__file__))
|
||||
if _adapter_dir not in _sys.path:
|
||||
_sys.path.insert(0, _adapter_dir)
|
||||
|
||||
from adapter import ( # noqa: E402
|
||||
Adapter,
|
||||
GoogleADKA2AExecutor,
|
||||
GoogleADKAdapter,
|
||||
MissingContent,
|
||||
_DEFAULT_AGENT_NAME,
|
||||
_DEFAULT_MAX_OUTPUT_TOKENS,
|
||||
_DEFAULT_TEMPERATURE,
|
||||
_NO_RESPONSE_MSG,
|
||||
_NO_TEXT_MSG,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures and helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _make_context(text: str, context_id: str = "ctx-test") -> MagicMock:
|
||||
"""Return a mock RequestContext with the given text in message.parts."""
|
||||
part = MagicMock()
|
||||
part.text = text
|
||||
ctx = MagicMock()
|
||||
ctx.message.parts = [part]
|
||||
ctx.context_id = context_id
|
||||
return ctx
|
||||
|
||||
|
||||
def _make_empty_context() -> MagicMock:
|
||||
"""Return a context whose message parts contain no text."""
|
||||
part = MagicMock(spec=[])
|
||||
part.root = MagicMock(spec=[])
|
||||
ctx = MagicMock()
|
||||
ctx.message.parts = [part]
|
||||
ctx.context_id = "ctx-empty"
|
||||
return ctx
|
||||
|
||||
|
||||
def _make_event(is_final: bool, text: str | None = None) -> MagicMock:
|
||||
"""Build a mock ADK Event that optionally is a final response."""
|
||||
event = MagicMock()
|
||||
event.is_final_response = MagicMock(return_value=is_final)
|
||||
if text is not None:
|
||||
part = MagicMock()
|
||||
part.text = text
|
||||
event.response = MagicMock()
|
||||
event.response.content = MagicMock()
|
||||
event.response.content.parts = [part]
|
||||
else:
|
||||
event.response = None
|
||||
return event
|
||||
|
||||
|
||||
async def _async_gen(*events):
|
||||
"""Yield events one by one as an async generator."""
|
||||
for e in events:
|
||||
yield e
|
||||
|
||||
|
||||
def _make_runner(events=None) -> MagicMock:
|
||||
"""Return a mock Runner whose run_async yields the given events."""
|
||||
runner = MagicMock()
|
||||
runner.session_service = AsyncMock()
|
||||
runner.session_service.get_session = AsyncMock(return_value=None)
|
||||
runner.session_service.create_session = AsyncMock(return_value={"id": "s1"})
|
||||
evts = events or []
|
||||
runner.run_async = MagicMock(return_value=_async_gen(*evts))
|
||||
return runner
|
||||
|
||||
|
||||
def _make_executor(
|
||||
model: str = "gemini-2.0-flash",
|
||||
system_prompt: str | None = "You are helpful.",
|
||||
runner: MagicMock | None = None,
|
||||
) -> GoogleADKA2AExecutor:
|
||||
"""Create a GoogleADKA2AExecutor with an injected mock runner."""
|
||||
return GoogleADKA2AExecutor(
|
||||
model=model,
|
||||
system_prompt=system_prompt,
|
||||
_runner=runner or _make_runner(),
|
||||
)
|
||||
|
||||
|
||||
def _make_adapter_config(**kwargs) -> object:
|
||||
"""Return an AdapterConfig with sensible defaults."""
|
||||
from adapter_base import AdapterConfig
|
||||
defaults = dict(
|
||||
model="google:gemini-2.0-flash",
|
||||
system_prompt="Test prompt.",
|
||||
runtime_config={},
|
||||
workspace_id="ws-test",
|
||||
)
|
||||
defaults.update(kwargs)
|
||||
return AdapterConfig(**defaults)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_default_agent_name():
|
||||
assert _DEFAULT_AGENT_NAME == "molecule-adk-agent"
|
||||
|
||||
|
||||
def test_default_max_output_tokens():
|
||||
assert _DEFAULT_MAX_OUTPUT_TOKENS == 8192
|
||||
|
||||
|
||||
def test_default_temperature():
|
||||
assert _DEFAULT_TEMPERATURE == 1.0
|
||||
|
||||
|
||||
def test_no_text_msg_constant():
|
||||
assert "no text" in _NO_TEXT_MSG.lower()
|
||||
|
||||
|
||||
def test_no_response_msg_constant():
|
||||
assert "no response" in _NO_RESPONSE_MSG.lower()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# MissingContent sentinel
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_missing_content_has_empty_parts():
|
||||
mc = MissingContent()
|
||||
assert mc.parts == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GoogleADKA2AExecutor — construction
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_constructor_stores_fields():
|
||||
runner = _make_runner()
|
||||
executor = GoogleADKA2AExecutor(
|
||||
model="gemini-1.5-pro",
|
||||
system_prompt="Hello",
|
||||
agent_name="my-agent",
|
||||
max_output_tokens=4096,
|
||||
temperature=0.5,
|
||||
_runner=runner,
|
||||
)
|
||||
assert executor.model == "gemini-1.5-pro"
|
||||
assert executor.system_prompt == "Hello"
|
||||
assert executor.agent_name == "my-agent"
|
||||
assert executor.max_output_tokens == 4096
|
||||
assert executor.temperature == 0.5
|
||||
assert executor._runner is runner
|
||||
assert executor._sessions_created == set()
|
||||
|
||||
|
||||
def test_constructor_defaults():
|
||||
executor = GoogleADKA2AExecutor(model="gemini-2.0-flash", _runner=_make_runner())
|
||||
assert executor.system_prompt is None
|
||||
assert executor.agent_name == _DEFAULT_AGENT_NAME
|
||||
assert executor.max_output_tokens == _DEFAULT_MAX_OUTPUT_TOKENS
|
||||
assert executor.temperature == _DEFAULT_TEMPERATURE
|
||||
assert executor._heartbeat is None
|
||||
|
||||
|
||||
def test_constructor_uses_injected_runner():
|
||||
stub = MagicMock()
|
||||
stub.session_service = MagicMock()
|
||||
executor = GoogleADKA2AExecutor(model="gemini-2.0-flash", _runner=stub)
|
||||
assert executor._runner is stub
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GoogleADKA2AExecutor — _extract_text
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_extract_text_returns_message_text():
|
||||
executor = _make_executor()
|
||||
ctx = _make_context("Hello world")
|
||||
result = executor._extract_text(ctx)
|
||||
assert result == "Hello world"
|
||||
|
||||
|
||||
def test_extract_text_empty_context():
|
||||
executor = _make_executor()
|
||||
ctx = _make_empty_context()
|
||||
result = executor._extract_text(ctx)
|
||||
assert result == ""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GoogleADKA2AExecutor — _build_content
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_build_content_creates_content_object():
|
||||
executor = _make_executor()
|
||||
content = executor._build_content("test message")
|
||||
assert content.role == "user"
|
||||
assert len(content.parts) == 1
|
||||
assert content.parts[0].text == "test message"
|
||||
|
||||
|
||||
def test_build_content_empty_string():
|
||||
executor = _make_executor()
|
||||
content = executor._build_content("")
|
||||
assert content.parts[0].text == ""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GoogleADKA2AExecutor — _ensure_session
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ensure_session_creates_when_not_exists():
|
||||
runner = _make_runner()
|
||||
runner.session_service.get_session = AsyncMock(return_value=None)
|
||||
executor = GoogleADKA2AExecutor(
|
||||
model="gemini-2.0-flash", agent_name="test-agent", _runner=runner
|
||||
)
|
||||
await executor._ensure_session("session-1", "user-1")
|
||||
runner.session_service.create_session.assert_called_once_with(
|
||||
app_name="test-agent",
|
||||
user_id="user-1",
|
||||
session_id="session-1",
|
||||
)
|
||||
assert "session-1" in executor._sessions_created
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ensure_session_skips_if_already_tracked():
|
||||
runner = _make_runner()
|
||||
executor = GoogleADKA2AExecutor(
|
||||
model="gemini-2.0-flash", _runner=runner
|
||||
)
|
||||
executor._sessions_created.add("session-x")
|
||||
await executor._ensure_session("session-x", "user-1")
|
||||
# Neither get_session nor create_session should be called
|
||||
runner.session_service.get_session.assert_not_called()
|
||||
runner.session_service.create_session.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_ensure_session_skips_create_when_existing():
|
||||
runner = _make_runner()
|
||||
runner.session_service.get_session = AsyncMock(return_value={"id": "s1"})
|
||||
executor = GoogleADKA2AExecutor(
|
||||
model="gemini-2.0-flash", agent_name="test-agent", _runner=runner
|
||||
)
|
||||
await executor._ensure_session("session-existing", "user-1")
|
||||
runner.session_service.create_session.assert_not_called()
|
||||
assert "session-existing" in executor._sessions_created
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GoogleADKA2AExecutor — execute: happy path
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_returns_response_text():
|
||||
event = _make_event(is_final=True, text="The answer is 42.")
|
||||
runner = _make_runner(events=[event])
|
||||
executor = _make_executor(runner=runner)
|
||||
|
||||
ctx = _make_context("What is 6×7?")
|
||||
eq = AsyncMock()
|
||||
await executor.execute(ctx, eq)
|
||||
|
||||
eq.enqueue_event.assert_called_once_with("The answer is 42.")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_concatenates_multiple_final_parts():
|
||||
part1 = MagicMock()
|
||||
part1.text = "Hello "
|
||||
part2 = MagicMock()
|
||||
part2.text = "world"
|
||||
event = MagicMock()
|
||||
event.is_final_response = MagicMock(return_value=True)
|
||||
event.response = MagicMock()
|
||||
event.response.content = MagicMock()
|
||||
event.response.content.parts = [part1, part2]
|
||||
|
||||
runner = _make_runner(events=[event])
|
||||
executor = _make_executor(runner=runner)
|
||||
|
||||
ctx = _make_context("Hi")
|
||||
eq = AsyncMock()
|
||||
await executor.execute(ctx, eq)
|
||||
|
||||
eq.enqueue_event.assert_called_once_with("Hello world")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_skips_non_final_events():
|
||||
non_final = _make_event(is_final=False, text="intermediate")
|
||||
final = _make_event(is_final=True, text="final answer")
|
||||
runner = _make_runner(events=[non_final, final])
|
||||
executor = _make_executor(runner=runner)
|
||||
|
||||
ctx = _make_context("question")
|
||||
eq = AsyncMock()
|
||||
await executor.execute(ctx, eq)
|
||||
|
||||
enqueued = eq.enqueue_event.call_args[0][0]
|
||||
assert enqueued == "final answer"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_fallback_when_no_final_response_events():
|
||||
non_final = _make_event(is_final=False)
|
||||
runner = _make_runner(events=[non_final])
|
||||
executor = _make_executor(runner=runner)
|
||||
|
||||
ctx = _make_context("hello")
|
||||
eq = AsyncMock()
|
||||
await executor.execute(ctx, eq)
|
||||
|
||||
eq.enqueue_event.assert_called_once_with(_NO_RESPONSE_MSG)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_fallback_when_response_is_none():
|
||||
event = MagicMock()
|
||||
event.is_final_response = MagicMock(return_value=True)
|
||||
event.response = None # no response object
|
||||
|
||||
runner = _make_runner(events=[event])
|
||||
executor = _make_executor(runner=runner)
|
||||
|
||||
ctx = _make_context("ping")
|
||||
eq = AsyncMock()
|
||||
await executor.execute(ctx, eq)
|
||||
|
||||
eq.enqueue_event.assert_called_once_with(_NO_RESPONSE_MSG)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_fallback_when_parts_have_no_text():
|
||||
part = MagicMock()
|
||||
part.text = None # no text on the part
|
||||
event = MagicMock()
|
||||
event.is_final_response = MagicMock(return_value=True)
|
||||
event.response = MagicMock()
|
||||
event.response.content = MagicMock()
|
||||
event.response.content.parts = [part]
|
||||
|
||||
runner = _make_runner(events=[event])
|
||||
executor = _make_executor(runner=runner)
|
||||
|
||||
ctx = _make_context("ping")
|
||||
eq = AsyncMock()
|
||||
await executor.execute(ctx, eq)
|
||||
|
||||
eq.enqueue_event.assert_called_once_with(_NO_RESPONSE_MSG)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_fallback_when_response_content_is_none():
|
||||
event = MagicMock()
|
||||
event.is_final_response = MagicMock(return_value=True)
|
||||
event.response = MagicMock()
|
||||
event.response.content = None # content is None → MissingContent sentinel
|
||||
|
||||
runner = _make_runner(events=[event])
|
||||
executor = _make_executor(runner=runner)
|
||||
|
||||
ctx = _make_context("ping")
|
||||
eq = AsyncMock()
|
||||
await executor.execute(ctx, eq)
|
||||
|
||||
eq.enqueue_event.assert_called_once_with(_NO_RESPONSE_MSG)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_uses_context_id_as_session_id():
|
||||
event = _make_event(is_final=True, text="ok")
|
||||
runner = _make_runner(events=[event])
|
||||
executor = _make_executor(runner=runner)
|
||||
|
||||
ctx = _make_context("hello", context_id="ctx-abc-123")
|
||||
eq = AsyncMock()
|
||||
await executor.execute(ctx, eq)
|
||||
|
||||
runner.run_async.assert_called_once()
|
||||
call_kwargs = runner.run_async.call_args[1]
|
||||
assert call_kwargs["session_id"] == "ctx-abc-123"
|
||||
assert call_kwargs["user_id"] == "molecule-user"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_falls_back_to_default_session_id_when_context_id_is_none():
|
||||
event = _make_event(is_final=True, text="ok")
|
||||
runner = _make_runner(events=[event])
|
||||
executor = _make_executor(runner=runner)
|
||||
|
||||
ctx = _make_context("hello")
|
||||
ctx.context_id = None # override
|
||||
eq = AsyncMock()
|
||||
await executor.execute(ctx, eq)
|
||||
|
||||
call_kwargs = runner.run_async.call_args[1]
|
||||
assert call_kwargs["session_id"] == "default-session"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GoogleADKA2AExecutor — execute: empty input
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_empty_input_returns_error():
|
||||
runner = _make_runner()
|
||||
executor = _make_executor(runner=runner)
|
||||
|
||||
ctx = _make_empty_context()
|
||||
eq = AsyncMock()
|
||||
await executor.execute(ctx, eq)
|
||||
|
||||
eq.enqueue_event.assert_called_once_with(_NO_TEXT_MSG)
|
||||
runner.run_async.assert_not_called()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GoogleADKA2AExecutor — execute: error handling
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_api_error_returns_sanitized_message():
|
||||
runner = _make_runner()
|
||||
|
||||
class _FakeAPIError(Exception):
|
||||
pass
|
||||
|
||||
async def _raise(*args, **kwargs):
|
||||
raise _FakeAPIError("api_key=secret token_limit_exceeded")
|
||||
yield # make it an async generator
|
||||
|
||||
runner.run_async = MagicMock(return_value=_raise())
|
||||
executor = _make_executor(runner=runner)
|
||||
|
||||
eq = AsyncMock()
|
||||
await executor.execute(_make_context("hello"), eq)
|
||||
|
||||
enqueued = eq.enqueue_event.call_args[0][0]
|
||||
assert enqueued == "Agent error: _FakeAPIError"
|
||||
assert "secret" not in enqueued
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_api_error_is_logged(caplog):
|
||||
import logging
|
||||
|
||||
runner = _make_runner()
|
||||
|
||||
async def _raise(*args, **kwargs):
|
||||
raise ValueError("bad request")
|
||||
yield # make it an async generator
|
||||
|
||||
runner.run_async = MagicMock(return_value=_raise())
|
||||
executor = _make_executor(runner=runner)
|
||||
|
||||
with caplog.at_level(logging.ERROR, logger="adapter"):
|
||||
await executor.execute(_make_context("hello"), AsyncMock())
|
||||
|
||||
assert any("execution error" in r.message.lower() for r in caplog.records)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GoogleADKA2AExecutor — cancel
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cancel_emits_canceled_event():
|
||||
executor = _make_executor()
|
||||
|
||||
import a2a.types as a2a_types
|
||||
|
||||
class _TaskState:
|
||||
canceled = "canceled"
|
||||
|
||||
class _TaskStatus:
|
||||
def __init__(self, state):
|
||||
self.state = state
|
||||
|
||||
class _TaskStatusUpdateEvent:
|
||||
def __init__(self, status, final):
|
||||
self.status = status
|
||||
self.final = final
|
||||
|
||||
a2a_types.TaskState = _TaskState
|
||||
a2a_types.TaskStatus = _TaskStatus
|
||||
a2a_types.TaskStatusUpdateEvent = _TaskStatusUpdateEvent
|
||||
|
||||
eq = AsyncMock()
|
||||
ctx = MagicMock()
|
||||
await executor.cancel(ctx, eq)
|
||||
|
||||
eq.enqueue_event.assert_called_once()
|
||||
event = eq.enqueue_event.call_args[0][0]
|
||||
assert isinstance(event, _TaskStatusUpdateEvent)
|
||||
assert event.status.state == "canceled"
|
||||
assert event.final is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GoogleADKAdapter — identity methods
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_adapter_name():
|
||||
assert GoogleADKAdapter.name() == "google-adk"
|
||||
|
||||
|
||||
def test_adapter_display_name():
|
||||
assert "Google ADK" in GoogleADKAdapter.display_name()
|
||||
|
||||
|
||||
def test_adapter_description():
|
||||
desc = GoogleADKAdapter.description()
|
||||
assert "ADK" in desc or "Google" in desc
|
||||
|
||||
|
||||
def test_adapter_get_config_schema():
|
||||
schema = GoogleADKAdapter.get_config_schema()
|
||||
assert schema["type"] == "object"
|
||||
assert "agent_name" in schema["properties"]
|
||||
assert "max_output_tokens" in schema["properties"]
|
||||
assert "temperature" in schema["properties"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GoogleADKAdapter — setup
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_setup_succeeds_with_api_key(monkeypatch):
|
||||
monkeypatch.setenv("GOOGLE_API_KEY", "fake-api-key")
|
||||
monkeypatch.delenv("GOOGLE_GENAI_USE_VERTEXAI", raising=False)
|
||||
|
||||
adapter = GoogleADKAdapter()
|
||||
config = _make_adapter_config()
|
||||
|
||||
await adapter.setup(config)
|
||||
|
||||
assert adapter._setup_result is not None
|
||||
assert adapter._setup_result.system_prompt == "mocked system prompt"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_setup_succeeds_with_vertex_ai(monkeypatch):
|
||||
monkeypatch.delenv("GOOGLE_API_KEY", raising=False)
|
||||
monkeypatch.setenv("GOOGLE_GENAI_USE_VERTEXAI", "1")
|
||||
|
||||
adapter = GoogleADKAdapter()
|
||||
config = _make_adapter_config()
|
||||
|
||||
await adapter.setup(config)
|
||||
|
||||
assert adapter._setup_result is not None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_setup_succeeds_with_vertex_ai_true_string(monkeypatch):
|
||||
monkeypatch.delenv("GOOGLE_API_KEY", raising=False)
|
||||
monkeypatch.setenv("GOOGLE_GENAI_USE_VERTEXAI", "True")
|
||||
|
||||
adapter = GoogleADKAdapter()
|
||||
config = _make_adapter_config()
|
||||
|
||||
await adapter.setup(config)
|
||||
assert adapter._setup_result is not None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_setup_raises_without_credentials(monkeypatch):
|
||||
monkeypatch.delenv("GOOGLE_API_KEY", raising=False)
|
||||
monkeypatch.delenv("GOOGLE_GENAI_USE_VERTEXAI", raising=False)
|
||||
|
||||
adapter = GoogleADKAdapter()
|
||||
config = _make_adapter_config()
|
||||
|
||||
with pytest.raises(RuntimeError, match="GOOGLE_API_KEY"):
|
||||
await adapter.setup(config)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GoogleADKAdapter — create_executor
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_executor_strips_google_prefix(monkeypatch):
|
||||
monkeypatch.setenv("GOOGLE_API_KEY", "key")
|
||||
adapter = GoogleADKAdapter()
|
||||
config = _make_adapter_config(model="google:gemini-2.0-flash")
|
||||
await adapter.setup(config)
|
||||
|
||||
executor = await adapter.create_executor(config)
|
||||
assert executor.model == "gemini-2.0-flash"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_executor_no_prefix_passthrough(monkeypatch):
|
||||
monkeypatch.setenv("GOOGLE_API_KEY", "key")
|
||||
adapter = GoogleADKAdapter()
|
||||
config = _make_adapter_config(model="gemini-1.5-pro")
|
||||
await adapter.setup(config)
|
||||
|
||||
executor = await adapter.create_executor(config)
|
||||
assert executor.model == "gemini-1.5-pro"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_executor_uses_setup_system_prompt(monkeypatch):
|
||||
monkeypatch.setenv("GOOGLE_API_KEY", "key")
|
||||
adapter = GoogleADKAdapter()
|
||||
config = _make_adapter_config()
|
||||
await adapter.setup(config)
|
||||
|
||||
executor = await adapter.create_executor(config)
|
||||
assert executor.system_prompt == "mocked system prompt"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_executor_runtime_config_overrides(monkeypatch):
|
||||
monkeypatch.setenv("GOOGLE_API_KEY", "key")
|
||||
adapter = GoogleADKAdapter()
|
||||
config = _make_adapter_config(
|
||||
runtime_config={
|
||||
"agent_name": "custom-agent",
|
||||
"max_output_tokens": 512,
|
||||
"temperature": 0.3,
|
||||
}
|
||||
)
|
||||
await adapter.setup(config)
|
||||
|
||||
executor = await adapter.create_executor(config)
|
||||
assert executor.agent_name == "custom-agent"
|
||||
assert executor.max_output_tokens == 512
|
||||
assert executor.temperature == 0.3
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_executor_defaults_without_runtime_config(monkeypatch):
|
||||
monkeypatch.setenv("GOOGLE_API_KEY", "key")
|
||||
adapter = GoogleADKAdapter()
|
||||
config = _make_adapter_config(runtime_config={})
|
||||
await adapter.setup(config)
|
||||
|
||||
executor = await adapter.create_executor(config)
|
||||
assert executor.agent_name == _DEFAULT_AGENT_NAME
|
||||
assert executor.max_output_tokens == _DEFAULT_MAX_OUTPUT_TOKENS
|
||||
assert executor.temperature == _DEFAULT_TEMPERATURE
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_executor_without_setup_uses_config_system_prompt(monkeypatch):
|
||||
"""create_executor without prior setup falls back to config.system_prompt."""
|
||||
monkeypatch.setenv("GOOGLE_API_KEY", "key")
|
||||
adapter = GoogleADKAdapter()
|
||||
config = _make_adapter_config(system_prompt="fallback prompt")
|
||||
# Intentionally skip setup() — _setup_result remains None
|
||||
|
||||
executor = await adapter.create_executor(config)
|
||||
assert executor.system_prompt == "fallback prompt"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_executor_without_setup_no_system_prompt(monkeypatch):
|
||||
"""create_executor without setup and no system_prompt → empty string."""
|
||||
monkeypatch.setenv("GOOGLE_API_KEY", "key")
|
||||
adapter = GoogleADKAdapter()
|
||||
config = _make_adapter_config(system_prompt=None)
|
||||
# Skip setup()
|
||||
|
||||
executor = await adapter.create_executor(config)
|
||||
assert executor.system_prompt == ""
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_executor_heartbeat_passed(monkeypatch):
|
||||
monkeypatch.setenv("GOOGLE_API_KEY", "key")
|
||||
adapter = GoogleADKAdapter()
|
||||
heartbeat = MagicMock()
|
||||
config = _make_adapter_config(heartbeat=heartbeat)
|
||||
await adapter.setup(config)
|
||||
|
||||
executor = await adapter.create_executor(config)
|
||||
assert executor._heartbeat is heartbeat
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Adapter alias
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_adapter_alias_is_google_adk_adapter():
|
||||
assert Adapter is GoogleADKAdapter
|
||||
@@ -1,2 +0,0 @@
|
||||
"""Re-export from shared_runtime for backward compat."""
|
||||
from shared_runtime import * # noqa: F401,F403
|
||||
@@ -1,32 +0,0 @@
|
||||
"""Smolagents adapter for Molecule AI workspace runtime.
|
||||
|
||||
Provides env sanitization and safe executor/messaging primitives for use
|
||||
with HuggingFace's smolagents library.
|
||||
|
||||
Two env-sanitization strategies are available:
|
||||
|
||||
* **Allowlist** (recommended) — :mod:`adapters.smolagents.env_sanitize`:
|
||||
only explicitly-safe variables pass through. Stricter but requires keeping
|
||||
the allowlist up-to-date as new safe vars are needed.
|
||||
|
||||
* **Denylist** (simple) — :mod:`adapters.smolagents.safe_env`:
|
||||
well-known secret names plus ``*_API_KEY`` / ``*_TOKEN`` suffix patterns
|
||||
are stripped. Easier to start with; less exhaustive.
|
||||
|
||||
Quick start::
|
||||
|
||||
# Allowlist approach (stricter)
|
||||
from adapters.smolagents.env_sanitize import make_safe_env, SafeLocalPythonExecutor
|
||||
|
||||
# Denylist approach (simpler)
|
||||
from adapters.smolagents.safe_env import make_safe_env
|
||||
|
||||
# Safe messaging
|
||||
from adapters.smolagents.send_message_wrapper import safe_send_message
|
||||
"""
|
||||
|
||||
# Re-export the allowlist-based make_safe_env as the default (most secure).
|
||||
from adapters.smolagents.env_sanitize import SafeLocalPythonExecutor, make_safe_env
|
||||
from adapters.smolagents.send_message_wrapper import safe_send_message
|
||||
|
||||
__all__ = ["make_safe_env", "SafeLocalPythonExecutor", "safe_send_message"]
|
||||
@@ -1,226 +0,0 @@
|
||||
"""Allowlist-based environment sanitization for smolagents (#826 — C3 CRITICAL).
|
||||
|
||||
Security model
|
||||
--------------
|
||||
We use an **allowlist** (not a denylist) — only variables explicitly
|
||||
enumerated as safe are passed through to agent-executed code. Any key not
|
||||
on the list is silently dropped.
|
||||
|
||||
This is intentionally strict: adding a new safe variable is a deliberate
|
||||
engineering act that surfaces in code review, rather than hoping a regex
|
||||
denylist catches every new secret name.
|
||||
|
||||
Thread safety
|
||||
-------------
|
||||
``SafeLocalPythonExecutor.__call__`` mutates ``os.environ`` temporarily.
|
||||
``_ENV_PATCH_LOCK`` serialises concurrent calls so simultaneous executions
|
||||
do not see each other's env patches.
|
||||
|
||||
Extending the allowlist
|
||||
-----------------------
|
||||
Set ``SMOLAGENTS_ENV_EXTRA_ALLOWLIST`` to a comma-separated list of
|
||||
additional uppercase env var names that should be passed through. This is
|
||||
intended for workspace-specific non-secret variables (e.g. ``WORKSPACE_ID``
|
||||
that you know are safe):
|
||||
|
||||
SMOLAGENTS_ENV_EXTRA_ALLOWLIST="MY_COMPANY_ENV,REGION"
|
||||
|
||||
Never add secret names here — use workspace secrets injection instead.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import threading
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Allowlist configuration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Core safe env variables — non-secret system and runtime variables that
|
||||
# agent code may legitimately need (e.g. PATH for subprocess-free tools,
|
||||
# PYTHONPATH for module resolution, TZ for datetime ops).
|
||||
_SAFE_ENV_ALLOWLIST: frozenset = frozenset(
|
||||
[
|
||||
# Shell / system fundamentals
|
||||
"PATH",
|
||||
"HOME",
|
||||
"USER",
|
||||
"LOGNAME",
|
||||
"SHELL",
|
||||
"TERM",
|
||||
"TZ",
|
||||
"TMPDIR",
|
||||
"TEMP",
|
||||
"TMP",
|
||||
# Language / locale
|
||||
"LANG",
|
||||
"LANGUAGE",
|
||||
"LC_ALL",
|
||||
"LC_CTYPE",
|
||||
"LC_MESSAGES",
|
||||
"LC_NUMERIC",
|
||||
"LC_TIME",
|
||||
# Python runtime
|
||||
"PYTHONPATH",
|
||||
"PYTHONHOME",
|
||||
"PYTHONDONTWRITEBYTECODE",
|
||||
"PYTHONUNBUFFERED",
|
||||
"PYTHONIOENCODING",
|
||||
# Molecule workspace non-secret identity vars
|
||||
"WORKSPACE_ID",
|
||||
"WORKSPACE_NAME",
|
||||
"PLATFORM_URL",
|
||||
]
|
||||
)
|
||||
|
||||
# Imports permanently excluded from the executor's authorized list.
|
||||
# These are well-known sandbox-escape vectors.
|
||||
_BANNED_IMPORTS: frozenset = frozenset(
|
||||
["subprocess", "socket", "ctypes", "importlib", "importlib.util"]
|
||||
)
|
||||
|
||||
# Baseline imports every SafeLocalPythonExecutor allows — pure-computation
|
||||
# modules with no I/O escape surface.
|
||||
_BASELINE_SAFE_IMPORTS: List[str] = [
|
||||
"math",
|
||||
"json",
|
||||
"re",
|
||||
"datetime",
|
||||
"collections",
|
||||
"itertools",
|
||||
"functools",
|
||||
"typing",
|
||||
"string",
|
||||
"textwrap",
|
||||
"decimal",
|
||||
"fractions",
|
||||
"statistics",
|
||||
"random",
|
||||
"hashlib",
|
||||
"base64",
|
||||
"urllib.parse",
|
||||
"copy",
|
||||
"dataclasses",
|
||||
"enum",
|
||||
"abc",
|
||||
"io",
|
||||
]
|
||||
|
||||
# Thread lock for env patching
|
||||
_ENV_PATCH_LOCK = threading.Lock()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def make_safe_env(
|
||||
extra_allowed: Optional[List[str]] = None,
|
||||
) -> Dict[str, str]:
|
||||
"""Return a *copy* of the environment containing only allowlisted keys.
|
||||
|
||||
``os.environ`` is **never mutated** by this function.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
extra_allowed:
|
||||
Additional variable names to include beyond the built-in allowlist.
|
||||
Also merged with the ``SMOLAGENTS_ENV_EXTRA_ALLOWLIST`` env var.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict
|
||||
A copy of ``os.environ`` filtered to allowlisted keys only.
|
||||
Keys not on the list are silently dropped.
|
||||
"""
|
||||
allowed = set(_SAFE_ENV_ALLOWLIST)
|
||||
|
||||
# Merge caller-provided extras
|
||||
if extra_allowed:
|
||||
allowed.update(k.upper() for k in extra_allowed)
|
||||
|
||||
# Merge env-var-configured extras
|
||||
env_extra = os.environ.get("SMOLAGENTS_ENV_EXTRA_ALLOWLIST", "")
|
||||
if env_extra:
|
||||
for key in env_extra.split(","):
|
||||
key = key.strip().upper()
|
||||
if key:
|
||||
allowed.add(key)
|
||||
|
||||
return {k: v for k, v in os.environ.items() if k in allowed}
|
||||
|
||||
|
||||
class SafeLocalPythonExecutor:
|
||||
"""Allowlist-gated wrapper around smolagents ``LocalPythonExecutor``.
|
||||
|
||||
Guarantees that agent-generated code cannot read secret environment
|
||||
variables (``ANTHROPIC_API_KEY``, ``GH_TOKEN``, ``DATABASE_URL``, etc.)
|
||||
because they are absent from ``os.environ`` during execution.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
additional_imports:
|
||||
Extra module names to allow beyond ``_BASELINE_SAFE_IMPORTS``.
|
||||
``_BANNED_IMPORTS`` takes precedence — listed names are silently
|
||||
removed.
|
||||
extra_allowed_env:
|
||||
Extra variable names to pass through beyond the core allowlist.
|
||||
_inner:
|
||||
Inject a mock ``LocalPythonExecutor`` for tests. When ``None``,
|
||||
the real smolagents executor is constructed lazily.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
additional_imports: Optional[List[str]] = None,
|
||||
extra_allowed_env: Optional[List[str]] = None,
|
||||
*,
|
||||
_inner: Any = None,
|
||||
) -> None:
|
||||
# Compute final import list (baseline + extras − banned)
|
||||
combined = list(_BASELINE_SAFE_IMPORTS)
|
||||
if additional_imports:
|
||||
for imp in additional_imports:
|
||||
if imp not in _BANNED_IMPORTS:
|
||||
combined.append(imp)
|
||||
|
||||
self._authorized_imports: List[str] = combined
|
||||
self._extra_allowed_env: Optional[List[str]] = extra_allowed_env
|
||||
self._inner = _inner # may be None until first call
|
||||
|
||||
def _get_inner(self) -> Any:
|
||||
"""Lazy-construct the real executor on first use (avoids import errors in tests)."""
|
||||
if self._inner is None:
|
||||
from smolagents import LocalPythonExecutor # type: ignore[import]
|
||||
|
||||
self._inner = LocalPythonExecutor(
|
||||
additional_authorized_imports=self._authorized_imports
|
||||
)
|
||||
return self._inner
|
||||
|
||||
def __call__(self, code: str, *args: Any, **kwargs: Any) -> Any:
|
||||
"""Execute ``code`` with only allowlisted env vars visible.
|
||||
|
||||
All keys not on the allowlist are removed from ``os.environ`` for
|
||||
the duration of execution and restored afterward, even on exception.
|
||||
The lock ensures thread safety across concurrent calls.
|
||||
"""
|
||||
safe_env = make_safe_env(self._extra_allowed_env)
|
||||
inner = self._get_inner()
|
||||
|
||||
with _ENV_PATCH_LOCK:
|
||||
# Snapshot full current env
|
||||
original_env = dict(os.environ)
|
||||
# Remove everything not in the safe set
|
||||
keys_to_remove = [k for k in os.environ if k not in safe_env]
|
||||
for k in keys_to_remove:
|
||||
del os.environ[k]
|
||||
try:
|
||||
return inner(code, *args, **kwargs)
|
||||
finally:
|
||||
# Always restore
|
||||
os.environ.clear()
|
||||
os.environ.update(original_env)
|
||||
@@ -1,61 +0,0 @@
|
||||
"""Denylist-based environment sanitization for smolagents (issue #826 — C3 CRITICAL).
|
||||
|
||||
This module provides a simple denylist approach: well-known secret variable
|
||||
names plus ``*_API_KEY`` and ``*_TOKEN`` suffix patterns are stripped before
|
||||
env is passed to agent-executed code.
|
||||
|
||||
For a stricter allowlist-based alternative that only passes explicitly-safe
|
||||
variables through, see :mod:`adapters.smolagents.env_sanitize`.
|
||||
|
||||
Usage::
|
||||
|
||||
from adapters.smolagents.safe_env import make_safe_env
|
||||
|
||||
executor = LocalPythonExecutor(...)
|
||||
# Pass only the sanitised env to the subprocess / exec context:
|
||||
safe = make_safe_env()
|
||||
"""
|
||||
|
||||
import copy
|
||||
import os
|
||||
|
||||
# Named API keys and tokens known to be used by smolagents / LLM clients.
|
||||
# These are removed regardless of the suffix-pattern below.
|
||||
SMOLAGENTS_ENV_DENYLIST: frozenset = frozenset(
|
||||
{
|
||||
"OPENAI_API_KEY",
|
||||
"ANTHROPIC_API_KEY",
|
||||
"GROQ_API_KEY",
|
||||
"CEREBRAS_API_KEY",
|
||||
"QIANFAN_API_KEY",
|
||||
"LANGFUSE_SECRET_KEY",
|
||||
"LANGFUSE_PUBLIC_KEY",
|
||||
"HF_TOKEN",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def make_safe_env() -> dict:
|
||||
"""Return a sanitised copy of ``os.environ`` with secrets removed.
|
||||
|
||||
Removes any key that:
|
||||
- Is in :data:`SMOLAGENTS_ENV_DENYLIST`, OR
|
||||
- Ends with ``_API_KEY``, OR
|
||||
- Ends with ``_TOKEN``
|
||||
|
||||
``os.environ`` is **never mutated** — a fresh ``dict`` copy is returned.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict
|
||||
A copy of the current environment with secret keys removed.
|
||||
"""
|
||||
env = copy.copy(dict(os.environ))
|
||||
for key in list(env.keys()):
|
||||
if (
|
||||
key in SMOLAGENTS_ENV_DENYLIST
|
||||
or key.endswith("_API_KEY")
|
||||
or key.endswith("_TOKEN")
|
||||
):
|
||||
del env[key]
|
||||
return env
|
||||
@@ -1,71 +0,0 @@
|
||||
"""Safe send_message wrapper for smolagents (issue #827 — C1 HIGH).
|
||||
|
||||
Prevents social-engineering attacks where agent-generated content could
|
||||
impersonate platform messages, inject HTML, or flood the user chat.
|
||||
|
||||
Guarantees
|
||||
----------
|
||||
1. Every message is prefixed with ``[smolagents]`` so recipients can
|
||||
attribute it to the agent and cannot be mistaken for platform UI.
|
||||
2. Truncated to 2000 characters to prevent log/UI floods.
|
||||
3. HTML entities (``<``, ``>``, ``&``, ``"``, ``'``) are escaped so
|
||||
rendered UIs that interpret HTML cannot be injected into.
|
||||
|
||||
Usage::
|
||||
|
||||
from adapters.smolagents.send_message_wrapper import safe_send_message
|
||||
|
||||
safe_send_message("Hello world", send_fn=platform_client.send)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import html
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Maximum character length for the *user-visible* portion of the message
|
||||
# (label prefix does not count toward this cap).
|
||||
_MAX_TEXT_LEN: int = 2000
|
||||
|
||||
# Label prepended to every outbound message.
|
||||
_LABEL: str = "[smolagents]"
|
||||
|
||||
|
||||
def safe_send_message(text: str, send_fn) -> None:
|
||||
"""Sanitise *text* and deliver it via *send_fn*.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
text:
|
||||
The raw message text produced by the agent.
|
||||
send_fn:
|
||||
Callable that delivers the message (e.g. ``platform_client.send``
|
||||
or a WebSocket broadcast function). Called with the final,
|
||||
sanitised string as its sole positional argument.
|
||||
|
||||
Side effects
|
||||
------------
|
||||
- Logs a warning when truncation occurs.
|
||||
- Logs a debug entry with the final payload length.
|
||||
"""
|
||||
if not isinstance(text, str):
|
||||
text = str(text)
|
||||
|
||||
# Strip HTML entities to prevent injection into rendered UIs.
|
||||
sanitised = html.escape(text, quote=True)
|
||||
|
||||
# Truncate to cap (before adding label so cap applies to content).
|
||||
if len(sanitised) > _MAX_TEXT_LEN:
|
||||
logger.warning(
|
||||
"safe_send_message: truncating message from %d to %d chars",
|
||||
len(sanitised),
|
||||
_MAX_TEXT_LEN,
|
||||
)
|
||||
sanitised = sanitised[:_MAX_TEXT_LEN]
|
||||
|
||||
payload = f"{_LABEL} {sanitised}"
|
||||
|
||||
logger.debug("safe_send_message: delivering %d-char payload", len(payload))
|
||||
send_fn(payload)
|
||||
@@ -1,133 +0,0 @@
|
||||
"""Create the Deep Agent with model + skills + tools."""
|
||||
|
||||
import os
|
||||
import logging
|
||||
|
||||
from langgraph.prebuilt import create_react_agent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def create_agent(model_str: str, tools: list, system_prompt: str):
|
||||
"""Create a LangGraph ReAct agent.
|
||||
|
||||
Args:
|
||||
model_str: LangChain-compatible model string (e.g., 'anthropic:claude-sonnet-4-6')
|
||||
tools: List of tool functions
|
||||
system_prompt: The system prompt for the agent
|
||||
"""
|
||||
# Parse provider:model format
|
||||
if ":" in model_str:
|
||||
provider, model_name = model_str.split(":", 1)
|
||||
else:
|
||||
provider = "anthropic"
|
||||
model_name = model_str
|
||||
|
||||
# Import the provider package
|
||||
try:
|
||||
if provider in ("anthropic",):
|
||||
from langchain_anthropic import ChatAnthropic as LLMClass
|
||||
elif provider in ("openai", "openrouter", "groq", "cerebras", "qianfan"):
|
||||
from langchain_openai import ChatOpenAI as LLMClass
|
||||
elif provider == "google_genai":
|
||||
from langchain_google_genai import ChatGoogleGenerativeAI as LLMClass
|
||||
elif provider == "ollama":
|
||||
from langchain_ollama import ChatOllama as LLMClass
|
||||
else:
|
||||
raise ValueError(f"Unsupported model provider: {provider}")
|
||||
except ImportError as e:
|
||||
pkg = "langchain-openai" if provider == "openrouter" else f"langchain-{provider}"
|
||||
raise ImportError(f"Provider '{provider}' requires package '{pkg}'. Install: pip install {pkg}") from e
|
||||
|
||||
# Instantiate the LLM
|
||||
if provider == "anthropic":
|
||||
llm_kwargs = {"model": model_name}
|
||||
anthropic_base_url = os.environ.get("ANTHROPIC_BASE_URL", "")
|
||||
if anthropic_base_url:
|
||||
llm_kwargs["anthropic_api_url"] = anthropic_base_url
|
||||
llm = LLMClass(**llm_kwargs)
|
||||
elif provider == "openrouter":
|
||||
api_key = os.environ.get("OPENROUTER_API_KEY", os.environ.get("OPENAI_API_KEY", ""))
|
||||
max_tokens = int(os.environ.get("MAX_TOKENS", "2048"))
|
||||
llm = LLMClass(
|
||||
model=model_name,
|
||||
openai_api_key=api_key,
|
||||
openai_api_base="https://openrouter.ai/api/v1",
|
||||
max_tokens=max_tokens,
|
||||
)
|
||||
elif provider == "groq":
|
||||
api_key = os.environ.get("GROQ_API_KEY", "")
|
||||
llm = LLMClass(
|
||||
model=model_name,
|
||||
openai_api_key=api_key,
|
||||
openai_api_base="https://api.groq.com/openai/v1",
|
||||
)
|
||||
elif provider == "cerebras":
|
||||
api_key = os.environ.get("CEREBRAS_API_KEY", "")
|
||||
llm = LLMClass(
|
||||
model=model_name,
|
||||
openai_api_key=api_key,
|
||||
openai_api_base="https://api.cerebras.ai/v1",
|
||||
)
|
||||
elif provider == "qianfan":
|
||||
api_key = os.environ.get("QIANFAN_API_KEY", os.environ.get("AISTUDIO_API_KEY", ""))
|
||||
llm = LLMClass(
|
||||
model=model_name,
|
||||
openai_api_key=api_key,
|
||||
openai_api_base="https://qianfan.baidubce.com/v2",
|
||||
)
|
||||
elif provider == "openai":
|
||||
llm_kwargs = {"model": model_name}
|
||||
openai_base_url = os.environ.get("OPENAI_BASE_URL", "")
|
||||
if openai_base_url:
|
||||
llm_kwargs["openai_api_base"] = openai_base_url
|
||||
llm = LLMClass(**llm_kwargs)
|
||||
else:
|
||||
llm = LLMClass(model=model_name)
|
||||
|
||||
# Auto-inject Langfuse tracing if env vars are present
|
||||
callbacks = _setup_langfuse()
|
||||
if callbacks:
|
||||
llm.callbacks = callbacks
|
||||
|
||||
agent = create_react_agent(
|
||||
model=llm,
|
||||
tools=tools,
|
||||
prompt=system_prompt,
|
||||
)
|
||||
|
||||
return agent
|
||||
|
||||
|
||||
def _setup_langfuse():
|
||||
"""Set up Langfuse tracing if LANGFUSE_* env vars are present.
|
||||
|
||||
Returns list of callbacks to pass to agent invocations, or empty list.
|
||||
"""
|
||||
langfuse_host = os.environ.get("LANGFUSE_HOST")
|
||||
langfuse_public = os.environ.get("LANGFUSE_PUBLIC_KEY")
|
||||
langfuse_secret = os.environ.get("LANGFUSE_SECRET_KEY")
|
||||
|
||||
if not (langfuse_host and langfuse_public and langfuse_secret):
|
||||
return []
|
||||
|
||||
try:
|
||||
from langfuse.callback import CallbackHandler
|
||||
|
||||
handler = CallbackHandler(
|
||||
host=langfuse_host,
|
||||
public_key=langfuse_public,
|
||||
secret_key=langfuse_secret,
|
||||
)
|
||||
logger.info("Langfuse tracing enabled: %s", langfuse_host)
|
||||
|
||||
# Also set LANGSMITH_TRACING for LangGraph native integration
|
||||
os.environ.setdefault("LANGSMITH_TRACING", "true")
|
||||
|
||||
return [handler]
|
||||
except ImportError:
|
||||
logger.warning("Langfuse env vars set but langfuse package not installed")
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.warning("Langfuse setup failed: %s", e)
|
||||
return []
|
||||
@@ -1,74 +0,0 @@
|
||||
"""AGENTS.md auto-generation for Molecule AI workspaces.
|
||||
|
||||
Implements the AAIF / Linux Foundation AGENTS.md standard so that peer agents
|
||||
and orchestration tools can discover this workspace's identity, role, A2A
|
||||
endpoint, and available tools without reading the full system prompt.
|
||||
|
||||
Usage::
|
||||
|
||||
from agents_md import generate_agents_md
|
||||
|
||||
generate_agents_md(config_dir="/configs", output_path="/workspace/AGENTS.md")
|
||||
|
||||
The function is called automatically at container startup (see main.py).
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def generate_agents_md(config_dir: str, output_path: str) -> None:
|
||||
"""Generate (or regenerate) AGENTS.md from the workspace config.yaml.
|
||||
|
||||
Always overwrites ``output_path`` — no stale-file guard. Re-calling
|
||||
after editing config.yaml produces a fresh file reflecting the changes.
|
||||
|
||||
Args:
|
||||
config_dir: Directory containing config.yaml (same convention as
|
||||
``load_config`` in config.py).
|
||||
output_path: Absolute path where AGENTS.md will be written.
|
||||
The parent directory is expected to exist.
|
||||
"""
|
||||
from config import load_config
|
||||
|
||||
cfg = load_config(config_dir)
|
||||
|
||||
# ── A2A Endpoint ─────────────────────────────────────────────────────────
|
||||
# AGENT_URL env var takes priority (production deployments behind a proxy).
|
||||
# Otherwise derive from the configured a2a.port (default 8000).
|
||||
endpoint = os.environ.get("AGENT_URL") or f"http://localhost:{cfg.a2a.port}/a2a"
|
||||
|
||||
# ── Role ─────────────────────────────────────────────────────────────────
|
||||
# Fall back to description when the role field is absent so legacy
|
||||
# config.yaml files (without a role key) still produce meaningful output.
|
||||
role = cfg.role if cfg.role else cfg.description
|
||||
|
||||
# ── MCP Tools ────────────────────────────────────────────────────────────
|
||||
# tools (skill names) + plugins (installed plugin names) form the combined
|
||||
# capability surface visible to peer agents.
|
||||
all_tools = list(cfg.tools) + list(cfg.plugins)
|
||||
if all_tools:
|
||||
tools_section = "\n".join(f"- {t}" for t in all_tools)
|
||||
else:
|
||||
tools_section = "None"
|
||||
|
||||
content = (
|
||||
f"# {cfg.name}\n"
|
||||
f"\n"
|
||||
f"**Role:** {role}\n"
|
||||
f"\n"
|
||||
f"## Description\n"
|
||||
f"{cfg.description}\n"
|
||||
f"\n"
|
||||
f"## A2A Endpoint\n"
|
||||
f"{endpoint}\n"
|
||||
f"\n"
|
||||
f"## MCP Tools\n"
|
||||
f"{tools_section}\n"
|
||||
)
|
||||
|
||||
Path(output_path).write_text(content, encoding="utf-8")
|
||||
logger.info("Generated AGENTS.md at %s for workspace %r", output_path, cfg.name)
|
||||
@@ -1,31 +0,0 @@
|
||||
# Publish-runtime pipeline verification — 2026-05-11
|
||||
|
||||
Marker file for the canonical end-to-end pipeline verification after
|
||||
`publish-runtime-bot` provisioning (internal#327) + stale-tag drift
|
||||
resolution (`runtime-v0.1.131` deleted from main).
|
||||
|
||||
## Purpose
|
||||
|
||||
Triggers `workspace/**` path filter on `publish-runtime-autobump.yml`,
|
||||
exercising the full pipeline:
|
||||
|
||||
1. `publish-runtime-autobump / bump-and-tag` reads PyPI version, computes
|
||||
next, pushes tag `runtime-v0.1.131` (or higher) using new bot scope.
|
||||
2. `publish-runtime.yml` fires on tag, builds + publishes to PyPI.
|
||||
3. Cascade autobump: 9 template repos get their `.runtime-version`
|
||||
pinned to the new version.
|
||||
|
||||
## Acceptance criteria
|
||||
|
||||
- [ ] autobump bump-and-tag context green on merged commit
|
||||
- [ ] tag `runtime-v0.1.131` (or computed next) exists on molecule-core
|
||||
- [ ] publish-runtime.yml run green
|
||||
- [ ] PyPI molecule-ai-workspace-runtime updated from 0.1.130
|
||||
- [ ] 9 template repos updated their pinned runtime version
|
||||
|
||||
## Rollback
|
||||
|
||||
This file is informational only — no code dependency. Safe to delete
|
||||
in any future PR once pipeline is proven stable.
|
||||
|
||||
— core-devops (per Hongming "long-term proper robust" directive 2026-05-11 19:48-19:50Z)
|
||||
@@ -1,84 +0,0 @@
|
||||
"""Build the Starlette routes for a workspace from its (card, adapter
|
||||
state) pair.
|
||||
|
||||
Pairs with PR #2756, which decoupled ``/.well-known/agent-card.json`` from
|
||||
``adapter.setup()`` failure. main.py was the only consumer and was
|
||||
``# pragma: no cover`` — so the wiring (card-route mounted unconditionally,
|
||||
JSON-RPC route swapped between DefaultRequestHandler and the
|
||||
not-configured handler based on ``adapter_ready``) had no pytest coverage.
|
||||
|
||||
A future refactor that re-couples the two would silently bypass PR #2756
|
||||
and shipped the original "stuck booting forever" UX again. That gap is
|
||||
what closes here: extract the route-assembly into a pure function whose
|
||||
behaviour is unit-testable with Starlette's TestClient, and have main.py
|
||||
call it. Issue molecule-core#2761.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from starlette.routing import Route
|
||||
|
||||
from not_configured_handler import make_not_configured_handler
|
||||
|
||||
# Heavy a2a-sdk imports are lazy: deferred to inside build_routes so
|
||||
# tests that exercise only the not-configured branch (no executor) don't
|
||||
# need a2a.server.request_handlers / routes stubbed in their conftest.
|
||||
# Production boot pays the import cost once, on workspace startup.
|
||||
|
||||
|
||||
def build_routes(
|
||||
agent_card: Any,
|
||||
executor: Any | None,
|
||||
adapter_error: str | None,
|
||||
) -> list:
|
||||
"""Return the list of Starlette routes for this workspace.
|
||||
|
||||
Always mounts ``/.well-known/agent-card.json`` from ``agent_card``.
|
||||
|
||||
JSON-RPC route at ``/`` swaps based on adapter state:
|
||||
|
||||
* ``executor`` is non-None → ``DefaultRequestHandler`` with the
|
||||
executor (production happy-path).
|
||||
* ``executor`` is None → ``not_configured_handler`` returning JSON-RPC
|
||||
``-32603`` with ``adapter_error`` in ``error.data``. The
|
||||
workspace stays REACHABLE (operator can introspect, deprovision,
|
||||
redeploy with corrected env) instead of crash-looping invisibly.
|
||||
|
||||
The two branches are mutually exclusive — caller passes one or the
|
||||
other, never both. Test coverage at ``tests/test_boot_routes.py``
|
||||
pins the contract.
|
||||
"""
|
||||
from a2a.server.routes import create_agent_card_routes
|
||||
|
||||
routes: list = []
|
||||
routes.extend(create_agent_card_routes(agent_card))
|
||||
|
||||
if executor is not None:
|
||||
from a2a.server.request_handlers import DefaultRequestHandler
|
||||
from a2a.server.routes import create_jsonrpc_routes
|
||||
from a2a.server.tasks import InMemoryTaskStore
|
||||
|
||||
handler = DefaultRequestHandler(
|
||||
agent_executor=executor,
|
||||
task_store=InMemoryTaskStore(),
|
||||
agent_card=agent_card,
|
||||
)
|
||||
# enable_v0_3_compat=True is the JSON-RPC wire-compat path: clients
|
||||
# using v0.3-shaped payloads (`"role": "user"` lowercase + camelCase
|
||||
# Pydantic field names) can talk to us without re-deploying.
|
||||
# Outbound payloads must also use v0.3 shape — see main.py's
|
||||
# original comment block for the full a2a-sdk 1.x migration note.
|
||||
routes.extend(
|
||||
create_jsonrpc_routes(
|
||||
request_handler=handler,
|
||||
rpc_url="/",
|
||||
enable_v0_3_compat=True,
|
||||
)
|
||||
)
|
||||
else:
|
||||
routes.append(
|
||||
Route("/", make_not_configured_handler(adapter_error), methods=["POST"])
|
||||
)
|
||||
|
||||
return routes
|
||||
@@ -1,37 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# build-all.sh — Rebuild base image and optionally adapter images.
|
||||
#
|
||||
# NOTE: Adapters have been extracted to standalone template repos:
|
||||
# https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-<runtime>
|
||||
#
|
||||
# This script now only builds the base image from workspace/Dockerfile.
|
||||
# Each adapter repo has its own Dockerfile that installs molecule-ai-workspace-runtime
|
||||
# from PyPI and the adapter-specific deps.
|
||||
#
|
||||
# Usage:
|
||||
# bash workspace/build-all.sh # Build base image only
|
||||
#
|
||||
# Standalone adapter repos still reference the legacy base image for local dev
|
||||
# (e.g. FROM workspace-template:base). To build those locally, clone the adapter
|
||||
# repo and run `docker build -t workspace-template:<runtime> .` from its root.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
cd "$SCRIPT_DIR"
|
||||
|
||||
GREEN='\033[0;32m'
|
||||
RED='\033[0;31m'
|
||||
NC='\033[0m'
|
||||
|
||||
log() { echo -e "${GREEN}[build]${NC} $1" >&2; }
|
||||
err() { echo -e "${RED}[error]${NC} $1" >&2; }
|
||||
|
||||
# Build base image
|
||||
log "Building workspace-template:base ..."
|
||||
if ! docker build -t workspace-template:base -f Dockerfile . ; then
|
||||
err "Base image build failed"
|
||||
exit 1
|
||||
fi
|
||||
log "Base image built"
|
||||
log "Done. Adapters are in standalone template repos — see docs/workspace-runtime-package.md"
|
||||
@@ -1,139 +0,0 @@
|
||||
"""A2A communication tools — framework-agnostic delegation and peer discovery.
|
||||
|
||||
These are plain async functions that any adapter can wrap in its native tool format.
|
||||
The LangChain @tool versions are in tools/delegation.py.
|
||||
"""
|
||||
|
||||
import os
|
||||
import uuid
|
||||
|
||||
import httpx
|
||||
|
||||
# OFFSEC-003: peer-controlled text MUST be wrapped with sanitize_a2a_result
|
||||
# before being returned to the LLM. This module's delegate_task() is one of
|
||||
# the trust-boundary entry points where peer output crosses into our agent's
|
||||
# context — same surface as a2a_tools_delegation.py:325 (fixed via #492).
|
||||
# Issue #537.
|
||||
from _sanitize_a2a import sanitize_a2a_result
|
||||
|
||||
PLATFORM_URL = os.environ.get("PLATFORM_URL", "http://host.docker.internal:8080")
|
||||
WORKSPACE_ID = os.environ.get("WORKSPACE_ID", "")
|
||||
|
||||
|
||||
async def list_peers() -> list[dict]:
|
||||
"""Get this workspace's peers from the platform registry."""
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
try:
|
||||
resp = await client.get(f"{PLATFORM_URL}/registry/{WORKSPACE_ID}/peers")
|
||||
if resp.status_code == 200:
|
||||
return resp.json()
|
||||
return []
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
||||
async def delegate_task(workspace_id: str, task: str) -> str:
|
||||
"""Send a task to a peer workspace via A2A and return the response text."""
|
||||
# Task #190 / #193 — Self-delegation guard. Without this, a workspace
|
||||
# delegating to its own UUID round-trips through the platform proxy back
|
||||
# into the sender; the synchronous handler waits on the same lock the
|
||||
# caller holds, the request times out, and the platform writes an
|
||||
# a2a_receive activity row with source_id=our own workspace UUID. The
|
||||
# inbox poller then surfaces that row as kind="peer_agent" and the agent
|
||||
# sees the timeout echoed back as a peer instructing it (#190).
|
||||
#
|
||||
# The sibling guards live in:
|
||||
# - workspace-server/internal/handlers/delegation.go (Go API gate)
|
||||
# - workspace/a2a_tools_delegation.py (MCP path guard)
|
||||
# This module is the framework-agnostic adapter surface used by adapters
|
||||
# that don't go through a2a_tools_delegation.py — it needs its own guard.
|
||||
if WORKSPACE_ID and workspace_id == WORKSPACE_ID:
|
||||
return (
|
||||
"Error: self-delegation rejected (cannot delegate_task to your own "
|
||||
"workspace). There is no peer who is also you — the platform proxy "
|
||||
"would deadlock and the timeout would echo back as a peer_agent "
|
||||
"message from yourself (#190). Do the work directly, or use "
|
||||
"commit_memory / send_message_to_user instead."
|
||||
)
|
||||
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
# Discover target URL
|
||||
try:
|
||||
resp = await client.get(
|
||||
f"{PLATFORM_URL}/registry/discover/{workspace_id}",
|
||||
headers={"X-Workspace-ID": WORKSPACE_ID},
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
return f"Error: cannot reach workspace {workspace_id} (status {resp.status_code})"
|
||||
target_url = resp.json().get("url", "")
|
||||
if not target_url:
|
||||
return f"Error: workspace {workspace_id} has no URL"
|
||||
except Exception as e:
|
||||
return f"Error discovering workspace: {e}"
|
||||
|
||||
# Send A2A message. X-Workspace-ID identifies us as the source —
|
||||
# without it the platform's a2a_receive logger writes
|
||||
# source_id=NULL and the recipient's My Chat tab renders the
|
||||
# delegation as if a human user typed it. Same hazard fixed
|
||||
# in heartbeat.py / a2a_client.py / main.py initial+idle flows.
|
||||
try:
|
||||
a2a_resp = await client.post(
|
||||
target_url,
|
||||
headers={"X-Workspace-ID": WORKSPACE_ID},
|
||||
json={
|
||||
"jsonrpc": "2.0",
|
||||
"id": str(uuid.uuid4()),
|
||||
"method": "message/send",
|
||||
"params": {
|
||||
"message": {
|
||||
"role": "user",
|
||||
"messageId": str(uuid.uuid4()),
|
||||
"parts": [{"kind": "text", "text": task}],
|
||||
},
|
||||
},
|
||||
},
|
||||
)
|
||||
data = a2a_resp.json()
|
||||
if "result" in data:
|
||||
result = data["result"]
|
||||
parts = result.get("parts", []) if isinstance(result, dict) else []
|
||||
if parts and isinstance(parts[0], dict):
|
||||
# OFFSEC-003: wrap peer-controlled text before returning
|
||||
# to LLM context. Issue #537.
|
||||
return sanitize_a2a_result(parts[0].get("text", "(no text)"))
|
||||
# Empty parts list (e.g. {"parts": []}) should return str(result),
|
||||
# not "(no text)" — preserves pre-fix behavior (#279 regression fix).
|
||||
if isinstance(result, dict) and result.get("parts") == []:
|
||||
return sanitize_a2a_result(str(result))
|
||||
return sanitize_a2a_result(str(result) if isinstance(result, str) else "(no text)")
|
||||
elif "error" in data:
|
||||
err = data["error"]
|
||||
# Handle both string-form errors ("error": "some string")
|
||||
# and object-form errors ("error": {"message": "...", "code": ...}).
|
||||
msg = ""
|
||||
if isinstance(err, dict):
|
||||
msg = err.get("message", "")
|
||||
elif isinstance(err, str):
|
||||
msg = err
|
||||
else:
|
||||
msg = str(err)
|
||||
# OFFSEC-003: peer-controlled error message; wrap before return.
|
||||
return sanitize_a2a_result(f"Error: {msg}")
|
||||
return sanitize_a2a_result(str(data))
|
||||
except Exception as e:
|
||||
return f"Error sending A2A message: {e}"
|
||||
|
||||
|
||||
async def get_peers_summary() -> str:
|
||||
"""Return a formatted string of available peers for system prompts."""
|
||||
peers = await list_peers()
|
||||
if not peers:
|
||||
return "No peers available."
|
||||
lines = []
|
||||
for p in peers:
|
||||
name = p.get("name", "Unknown")
|
||||
pid = p.get("id", "")
|
||||
role = p.get("role", "")
|
||||
status = p.get("status", "")
|
||||
lines.append(f"- {name} (ID: {pid}) — {role} [{status}]")
|
||||
return "Available peers:\n" + "\n".join(lines)
|
||||
@@ -1,320 +0,0 @@
|
||||
"""Approval tool for human-in-the-loop workflows.
|
||||
|
||||
When an agent encounters a destructive, expensive, or unauthorized action,
|
||||
it calls request_approval() which creates a request and waits for a decision.
|
||||
|
||||
## Notification strategy
|
||||
|
||||
By default this module uses a **WebSocket subscription** (APPROVAL_USE_WEBSOCKET=true
|
||||
or when the ``websockets`` package is installed). The platform pushes an
|
||||
``APPROVAL_DECIDED`` event to the workspace WebSocket as soon as a human
|
||||
clicks Approve / Deny on the canvas — no polling required, instant delivery.
|
||||
|
||||
If WebSocket is unavailable (env var opt-out or import error) the module
|
||||
falls back to a **polling loop** so existing deployments without WebSocket
|
||||
support continue to work without any config change.
|
||||
|
||||
RBAC enforcement
|
||||
----------------
|
||||
The calling workspace must hold a role that grants the ``"approve"`` action.
|
||||
Roles are read from ``config.yaml`` under ``rbac.roles`` (default: operator).
|
||||
|
||||
Audit trail
|
||||
-----------
|
||||
Every approval lifecycle emits structured JSON Lines records:
|
||||
|
||||
1. ``approval / approve / requested`` — request submitted to platform
|
||||
2. ``approval / approve / granted`` — human approved (actor = decided_by)
|
||||
3. ``approval / approve / denied`` — human denied (actor = decided_by)
|
||||
4. ``approval / approve / timeout`` — no decision within APPROVAL_TIMEOUT
|
||||
|
||||
RBAC denials emit an ``rbac / rbac.deny / denied`` event instead.
|
||||
|
||||
Environment variables
|
||||
---------------------
|
||||
PLATFORM_URL Platform base URL (default: http://platform:8080)
|
||||
WORKSPACE_ID This workspace's ID (default: "")
|
||||
APPROVAL_TIMEOUT Max wait in seconds (default: 300)
|
||||
APPROVAL_POLL_INTERVAL Polling interval in seconds (default: 5, polling path only)
|
||||
APPROVAL_USE_WEBSOCKET "true" to force WS, "false"
|
||||
to force polling (default: auto-detect)
|
||||
AUDIT_LOG_PATH Path for JSON Lines audit log (default: /var/log/molecule/audit.jsonl)
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import uuid
|
||||
|
||||
import httpx
|
||||
from langchain_core.tools import tool
|
||||
|
||||
from builtin_tools.audit import check_permission, get_workspace_roles, log_event
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
PLATFORM_URL = os.environ.get("PLATFORM_URL", "http://host.docker.internal:8080")
|
||||
WORKSPACE_ID = os.environ.get("WORKSPACE_ID", "")
|
||||
APPROVAL_POLL_INTERVAL = float(os.environ.get("APPROVAL_POLL_INTERVAL", "5"))
|
||||
APPROVAL_TIMEOUT = float(os.environ.get("APPROVAL_TIMEOUT", "300"))
|
||||
|
||||
# Auto-detect WebSocket support; can be overridden with env var
|
||||
_ws_env = os.environ.get("APPROVAL_USE_WEBSOCKET", "").lower()
|
||||
if _ws_env == "false":
|
||||
_USE_WEBSOCKET_DEFAULT = False
|
||||
elif _ws_env == "true":
|
||||
_USE_WEBSOCKET_DEFAULT = True
|
||||
else:
|
||||
try:
|
||||
import websockets as _ws_probe # noqa: F401
|
||||
_USE_WEBSOCKET_DEFAULT = True
|
||||
except ImportError:
|
||||
_USE_WEBSOCKET_DEFAULT = False
|
||||
|
||||
# Module-level reference so tests can monkeypatch it
|
||||
try:
|
||||
import websockets
|
||||
except ImportError:
|
||||
websockets = None # type: ignore[assignment]
|
||||
|
||||
# Expose for test introspection
|
||||
APPROVAL_USE_WEBSOCKET = _USE_WEBSOCKET_DEFAULT
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Internal helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def _create_approval_request(action: str, reason: str) -> dict:
|
||||
"""POST to the platform to create an approval request.
|
||||
|
||||
Returns {"approval_id": str} on success or {"error": str} on failure.
|
||||
"""
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
try:
|
||||
resp = await client.post(
|
||||
f"{PLATFORM_URL}/workspaces/{WORKSPACE_ID}/approvals",
|
||||
json={"action": action, "reason": reason},
|
||||
)
|
||||
if resp.status_code != 201:
|
||||
return {"error": f"Failed to create request: {resp.status_code}"}
|
||||
try:
|
||||
approval_id = resp.json().get("approval_id")
|
||||
except (ValueError, Exception):
|
||||
return {"error": f"Platform returned invalid JSON (status {resp.status_code})"}
|
||||
logger.info("Approval requested: %s (id=%s)", action, approval_id)
|
||||
return {"approval_id": approval_id}
|
||||
except Exception as e:
|
||||
return {"error": f"Failed to request approval: {e}"}
|
||||
|
||||
|
||||
async def _wait_websocket(approval_id: str, timeout: float) -> dict:
|
||||
"""Subscribe to the platform WebSocket and wait for APPROVAL_DECIDED event.
|
||||
|
||||
Returns the decision dict or raises asyncio.TimeoutError on expiry.
|
||||
"""
|
||||
ws_url = (
|
||||
PLATFORM_URL.replace("http://", "ws://").replace("https://", "wss://")
|
||||
+ "/ws"
|
||||
)
|
||||
headers = {"X-Workspace-ID": WORKSPACE_ID}
|
||||
|
||||
logger.debug("Approval %s: waiting via WebSocket %s", approval_id, ws_url)
|
||||
|
||||
async with websockets.connect(ws_url, additional_headers=headers) as ws:
|
||||
async for raw_message in ws:
|
||||
try:
|
||||
event = json.loads(raw_message)
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
if event.get("event") != "APPROVAL_DECIDED":
|
||||
continue
|
||||
if event.get("approval_id") != approval_id:
|
||||
continue
|
||||
|
||||
status = event.get("status")
|
||||
decided_by = event.get("decided_by", "")
|
||||
logger.info("Approval %s decided via WebSocket: %s by %s",
|
||||
approval_id, status, decided_by)
|
||||
|
||||
if status == "approved":
|
||||
return {
|
||||
"approved": True,
|
||||
"approval_id": approval_id,
|
||||
"decided_by": decided_by,
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"approved": False,
|
||||
"approval_id": approval_id,
|
||||
"decided_by": decided_by,
|
||||
"message": "Denied by human",
|
||||
}
|
||||
|
||||
|
||||
async def _wait_polling(approval_id: str, timeout: float) -> dict:
|
||||
"""Legacy polling loop — checks platform REST endpoint every APPROVAL_POLL_INTERVAL seconds."""
|
||||
elapsed = 0.0
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
while elapsed < timeout:
|
||||
await asyncio.sleep(APPROVAL_POLL_INTERVAL)
|
||||
elapsed += APPROVAL_POLL_INTERVAL
|
||||
try:
|
||||
resp = await client.get(
|
||||
f"{PLATFORM_URL}/workspaces/{WORKSPACE_ID}/approvals",
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
for a in resp.json():
|
||||
if a.get("id") == approval_id:
|
||||
status = a.get("status")
|
||||
if status == "approved":
|
||||
logger.info("Approval granted (poll): %s", approval_id)
|
||||
return {
|
||||
"approved": True,
|
||||
"approval_id": approval_id,
|
||||
"decided_by": a.get("decided_by"),
|
||||
}
|
||||
elif status == "denied":
|
||||
logger.info("Approval denied (poll): %s", approval_id)
|
||||
return {
|
||||
"approved": False,
|
||||
"approval_id": approval_id,
|
||||
"decided_by": a.get("decided_by"),
|
||||
"message": "Denied by human",
|
||||
}
|
||||
except Exception:
|
||||
pass # transient error — keep retrying
|
||||
|
||||
raise asyncio.TimeoutError()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public tool
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@tool
|
||||
async def request_approval(
|
||||
action: str,
|
||||
reason: str,
|
||||
) -> dict:
|
||||
"""Request human approval before proceeding with a sensitive action.
|
||||
|
||||
Use this when you're about to do something destructive, expensive,
|
||||
or outside your normal authority. The request is sent to the canvas
|
||||
where a human can approve or deny it.
|
||||
|
||||
Args:
|
||||
action: Short description of what you want to do
|
||||
reason: Why this action is necessary
|
||||
"""
|
||||
# One trace_id links every audit event for this approval lifecycle.
|
||||
trace_id = str(uuid.uuid4())
|
||||
|
||||
# --- RBAC check -----------------------------------------------------------
|
||||
roles, custom_perms = get_workspace_roles()
|
||||
if not check_permission("approve", roles, custom_perms):
|
||||
log_event(
|
||||
event_type="rbac",
|
||||
action="rbac.deny",
|
||||
resource=action,
|
||||
outcome="denied",
|
||||
trace_id=trace_id,
|
||||
attempted_action="approve",
|
||||
roles=roles,
|
||||
)
|
||||
return {
|
||||
"approved": False,
|
||||
"error": (
|
||||
"RBAC: this workspace does not have the 'approve' permission. "
|
||||
f"Current roles: {roles}"
|
||||
),
|
||||
}
|
||||
|
||||
# Step 1: Create the approval request
|
||||
creation = await _create_approval_request(action, reason)
|
||||
if "error" in creation:
|
||||
log_event(
|
||||
event_type="approval",
|
||||
action="approve",
|
||||
resource=action,
|
||||
outcome="failure",
|
||||
trace_id=trace_id,
|
||||
reason="submit_failed",
|
||||
error=creation["error"],
|
||||
)
|
||||
return {"approved": False, "error": creation["error"]}
|
||||
|
||||
approval_id = creation["approval_id"]
|
||||
log_event(
|
||||
event_type="approval",
|
||||
action="approve",
|
||||
resource=action,
|
||||
outcome="requested",
|
||||
trace_id=trace_id,
|
||||
approval_id=approval_id,
|
||||
reason_text=reason,
|
||||
)
|
||||
|
||||
timeout = float(os.environ.get("APPROVAL_TIMEOUT", str(APPROVAL_TIMEOUT)))
|
||||
|
||||
# Step 2: Wait for decision — WebSocket preferred, polling as fallback
|
||||
use_ws = APPROVAL_USE_WEBSOCKET and websockets is not None
|
||||
|
||||
try:
|
||||
if use_ws:
|
||||
try:
|
||||
result = await asyncio.wait_for(
|
||||
_wait_websocket(approval_id, timeout),
|
||||
timeout=timeout,
|
||||
)
|
||||
except Exception as ws_err:
|
||||
# WebSocket failed (connection error, etc.) — fall through to polling
|
||||
logger.warning(
|
||||
"WebSocket approval wait failed (%s), falling back to polling",
|
||||
ws_err,
|
||||
)
|
||||
result = await asyncio.wait_for(
|
||||
_wait_polling(approval_id, timeout),
|
||||
timeout=timeout + APPROVAL_POLL_INTERVAL,
|
||||
)
|
||||
else:
|
||||
# Polling path (primary when WS disabled)
|
||||
result = await asyncio.wait_for(
|
||||
_wait_polling(approval_id, timeout),
|
||||
timeout=timeout + APPROVAL_POLL_INTERVAL, # slight grace period
|
||||
)
|
||||
|
||||
# Log the human decision
|
||||
decided_by = result.get("decided_by")
|
||||
outcome = "granted" if result.get("approved") else "denied"
|
||||
log_event(
|
||||
event_type="approval",
|
||||
action="approve",
|
||||
resource=action,
|
||||
outcome=outcome,
|
||||
# Record the human identity as actor when available
|
||||
actor=decided_by or WORKSPACE_ID,
|
||||
trace_id=trace_id,
|
||||
approval_id=approval_id,
|
||||
decided_by=decided_by,
|
||||
)
|
||||
return result
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning("Approval timed out after %.0fs: %s", timeout, approval_id)
|
||||
log_event(
|
||||
event_type="approval",
|
||||
action="approve",
|
||||
resource=action,
|
||||
outcome="timeout",
|
||||
trace_id=trace_id,
|
||||
approval_id=approval_id,
|
||||
timeout_seconds=timeout,
|
||||
)
|
||||
return {
|
||||
"approved": False,
|
||||
"approval_id": approval_id,
|
||||
"error": f"Timed out after {timeout}s waiting for human decision",
|
||||
}
|
||||
@@ -1,274 +0,0 @@
|
||||
"""Immutable append-only audit log for EU AI Act compliance.
|
||||
|
||||
Fulfils Article 12 (record-keeping), Article 13 (transparency), and
|
||||
Article 17 (quality-management system) requirements for high-risk AI systems.
|
||||
|
||||
Log format: JSON Lines (one UTF-8 JSON object per line), suitable for direct
|
||||
ingestion by any SIEM (Splunk, Elastic, Datadog, etc.).
|
||||
|
||||
Required event fields
|
||||
---------------------
|
||||
timestamp ISO 8601 UTC datetime with timezone offset
|
||||
event_type Coarse category: "delegation", "approval", "memory", "rbac"
|
||||
workspace_id Workspace that generated this event
|
||||
actor Entity that triggered the action; defaults to workspace_id for
|
||||
automated events, or the human identity for approval decisions
|
||||
action Verb describing what was attempted:
|
||||
delegate | approve | memory.read | memory.write | rbac.deny
|
||||
resource Object of the action: target workspace ID, memory scope,
|
||||
approval action string, etc.
|
||||
outcome One of: allowed | denied | success | failure | timeout |
|
||||
requested | granted
|
||||
trace_id UUID v4 correlating related events across workspaces
|
||||
|
||||
The log file is opened in append mode ("a") on every write — it is NEVER
|
||||
truncated, rewritten, or deleted by this module. Rotate externally using
|
||||
logrotate (with ``copytruncate`` disabled) or ship to a SIEM before rotating.
|
||||
|
||||
Configuration
|
||||
-------------
|
||||
AUDIT_LOG_PATH env var — full path to the JSONL file
|
||||
default: /var/log/molecule/audit.jsonl
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import functools
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
if TYPE_CHECKING:
|
||||
pass # avoid circular import at runtime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Configuration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
AUDIT_LOG_PATH: str = os.environ.get(
|
||||
"AUDIT_LOG_PATH", "/var/log/molecule/audit.jsonl"
|
||||
)
|
||||
WORKSPACE_ID: str = os.environ.get("WORKSPACE_ID", "")
|
||||
|
||||
# Protects the open() + write() sequence; prevents interleaved JSON lines
|
||||
# when multiple async tasks run in the same event-loop thread.
|
||||
_write_lock = threading.Lock()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Built-in role → permitted-action mappings
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
#: Maps each built-in role name to the set of actions it grants.
|
||||
#: Custom roles can be added in config.yaml under ``rbac.allowed_actions``.
|
||||
ROLE_PERMISSIONS: dict[str, set[str]] = {
|
||||
# Full access — shortcircuits all other checks
|
||||
"admin": {"delegate", "approve", "memory.read", "memory.write"},
|
||||
# Standard agent role
|
||||
"operator": {"delegate", "approve", "memory.read", "memory.write"},
|
||||
# Read-only observer — no writes, no delegation, no approvals
|
||||
"read-only": {"memory.read"},
|
||||
# Can approve and write memory, but cannot delegate
|
||||
"no-delegation": {"approve", "memory.read", "memory.write"},
|
||||
# Can delegate and write memory, but cannot invoke approval gate
|
||||
"no-approval": {"delegate", "memory.read", "memory.write"},
|
||||
# Memory reads only (useful for analytic sidecars)
|
||||
"memory-readonly": {"memory.read"},
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Config loader (lazy, cached per process)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@functools.lru_cache(maxsize=1)
|
||||
def _load_workspace_config():
|
||||
"""Return the WorkspaceConfig or None if it cannot be loaded."""
|
||||
try:
|
||||
from config import load_config # local import avoids circular deps
|
||||
return load_config()
|
||||
except Exception as exc:
|
||||
logger.warning("audit: could not load workspace config for RBAC: %s", exc)
|
||||
return None
|
||||
|
||||
|
||||
def get_workspace_roles() -> tuple[list[str], dict[str, list[str]]]:
|
||||
"""Return ``(roles, custom_permissions)`` from the workspace config.
|
||||
|
||||
Falls back to ``["operator"]`` / ``{}`` when the config is unavailable so
|
||||
that agents remain functional in degraded environments.
|
||||
"""
|
||||
cfg = _load_workspace_config()
|
||||
if cfg is None:
|
||||
return ["operator"], {}
|
||||
return list(cfg.rbac.roles), dict(cfg.rbac.allowed_actions)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# RBAC helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def check_permission(
|
||||
action: str,
|
||||
roles: list[str],
|
||||
custom_permissions: dict[str, list[str]] | None = None,
|
||||
) -> bool:
|
||||
"""Return True if *any* of ``roles`` grants ``action``.
|
||||
|
||||
Evaluation order
|
||||
~~~~~~~~~~~~~~~~
|
||||
1. ``"admin"`` shortcircuits — always grants everything.
|
||||
2. Custom role definitions (from ``rbac.allowed_actions`` in config.yaml).
|
||||
3. Built-in :data:`ROLE_PERMISSIONS` table.
|
||||
|
||||
When a role appears in *custom_permissions* its built-in definition is
|
||||
**ignored** — the custom list is the complete permission set for that role.
|
||||
|
||||
Args:
|
||||
action: Action to authorise, e.g. ``"delegate"``.
|
||||
roles: Roles assigned to the calling workspace.
|
||||
custom_permissions: Optional ``{role: [action, ...]}`` mapping loaded
|
||||
from ``WorkspaceConfig.rbac.allowed_actions``.
|
||||
|
||||
Returns:
|
||||
``True`` if the action is permitted, ``False`` otherwise.
|
||||
|
||||
Examples::
|
||||
|
||||
>>> check_permission("delegate", ["operator"])
|
||||
True
|
||||
>>> check_permission("delegate", ["read-only"])
|
||||
False
|
||||
>>> check_permission("deploy", ["developer"], {"developer": ["deploy"]})
|
||||
True
|
||||
"""
|
||||
for role in roles:
|
||||
if role == "admin":
|
||||
return True
|
||||
if custom_permissions and role in custom_permissions:
|
||||
# Custom entry is definitive for this role
|
||||
if action in custom_permissions[role]:
|
||||
return True
|
||||
continue # Don't fall through to built-ins for custom roles
|
||||
if role in ROLE_PERMISSIONS and action in ROLE_PERMISSIONS[role]:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public audit API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def log_event(
|
||||
event_type: str,
|
||||
action: str,
|
||||
resource: str,
|
||||
outcome: str,
|
||||
actor: str | None = None,
|
||||
trace_id: str | None = None,
|
||||
**extra: Any,
|
||||
) -> str:
|
||||
"""Append one audit event to the immutable JSON Lines log.
|
||||
|
||||
Args:
|
||||
event_type: Coarse category — ``"delegation"``, ``"approval"``,
|
||||
``"memory"``, or ``"rbac"``.
|
||||
action: Verb — ``"delegate"``, ``"approve"``, ``"memory.write"``,
|
||||
``"memory.read"``, ``"rbac.deny"``.
|
||||
resource: Object of the action — target workspace ID, memory scope,
|
||||
approval action string, etc.
|
||||
outcome: Terminal state — one of ``"allowed"``, ``"denied"``,
|
||||
``"success"``, ``"failure"``, ``"timeout"``,
|
||||
``"requested"``, ``"granted"``.
|
||||
actor: Identity that triggered the event. Defaults to
|
||||
``WORKSPACE_ID`` (the running workspace) for automated
|
||||
events. Pass ``decided_by`` for human approval decisions.
|
||||
trace_id: Caller-supplied UUID v4 for cross-event correlation.
|
||||
A fresh UUID is generated when omitted.
|
||||
**extra: Additional key-value pairs appended verbatim to the JSON
|
||||
object (e.g. ``target_workspace_id``, ``memory_scope``,
|
||||
``attempt``). Built-in keys cannot be overridden.
|
||||
|
||||
Returns:
|
||||
The ``trace_id`` used for this event, enabling callers to chain
|
||||
related events under a single correlation identifier.
|
||||
|
||||
Example::
|
||||
|
||||
trace = log_event(
|
||||
event_type="delegation",
|
||||
action="delegate",
|
||||
resource="billing-agent",
|
||||
outcome="success",
|
||||
target_workspace_id="billing-agent",
|
||||
attempt=1,
|
||||
)
|
||||
"""
|
||||
if trace_id is None:
|
||||
trace_id = str(uuid.uuid4())
|
||||
|
||||
event: dict[str, Any] = {
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"event_type": event_type,
|
||||
"workspace_id": WORKSPACE_ID,
|
||||
"actor": actor if actor is not None else WORKSPACE_ID,
|
||||
"action": action,
|
||||
"resource": resource,
|
||||
"outcome": outcome,
|
||||
"trace_id": trace_id,
|
||||
}
|
||||
|
||||
# Merge extra fields — built-in keys are not overridable
|
||||
for key, value in extra.items():
|
||||
if key not in event:
|
||||
event[key] = value
|
||||
|
||||
_write_event(event)
|
||||
return trace_id
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Internal writer
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _ensure_log_dir(path: str) -> None:
|
||||
"""Create the parent directory for *path* if it does not already exist."""
|
||||
Path(path).parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
def _write_event(event: dict[str, Any]) -> None:
|
||||
"""Serialise *event* as a JSON line and fsync-append it to the log file.
|
||||
|
||||
The write is atomic with respect to other threads in this process: the
|
||||
lock ensures that no two JSON objects are interleaved on the same line.
|
||||
|
||||
Failures are emitted to the standard Python logger at WARNING level but
|
||||
are **never** re-raised — the application must not crash because audit
|
||||
logging is temporarily unavailable (e.g. disk full, permission error).
|
||||
In production, consider wiring an alert on WARNING messages from this
|
||||
module so that missing audit records are detected quickly.
|
||||
"""
|
||||
try:
|
||||
log_path = AUDIT_LOG_PATH
|
||||
_ensure_log_dir(log_path)
|
||||
line = json.dumps(event, default=str, ensure_ascii=False) + "\n"
|
||||
with _write_lock:
|
||||
with open(log_path, "a", encoding="utf-8") as fh:
|
||||
fh.write(line)
|
||||
fh.flush()
|
||||
os.fsync(fh.fileno())
|
||||
except Exception as exc: # pylint: disable=broad-except
|
||||
logger.warning(
|
||||
"Audit log write failed — event NOT persisted "
|
||||
"(trace_id=%s, action=%s): %s",
|
||||
event.get("trace_id", "?"),
|
||||
event.get("action", "?"),
|
||||
exc,
|
||||
)
|
||||
@@ -1,122 +0,0 @@
|
||||
"""Workspace-scoped awareness backend wrapper.
|
||||
|
||||
The agent-facing memory tools keep their existing signatures and delegate
|
||||
to this helper when workspace awareness is configured.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
from types import SimpleNamespace
|
||||
from typing import Any
|
||||
|
||||
from policies.namespaces import resolve_awareness_namespace
|
||||
|
||||
try: # pragma: no cover - optional runtime dependency in lightweight test envs
|
||||
import httpx # type: ignore
|
||||
except ImportError: # pragma: no cover
|
||||
httpx = SimpleNamespace(AsyncClient=None)
|
||||
|
||||
|
||||
DEFAULT_AWARENESS_TIMEOUT = 10.0
|
||||
|
||||
|
||||
def get_awareness_config() -> dict[str, str] | None:
|
||||
"""Return awareness connection settings if the workspace is configured."""
|
||||
base_url = os.environ.get("AWARENESS_URL", "").rstrip("/")
|
||||
workspace_id = os.environ.get("WORKSPACE_ID", "")
|
||||
configured_namespace = os.environ.get("AWARENESS_NAMESPACE", "")
|
||||
if not base_url:
|
||||
return None
|
||||
if not workspace_id and not configured_namespace:
|
||||
return None
|
||||
namespace = resolve_awareness_namespace(workspace_id, configured_namespace)
|
||||
return {
|
||||
"base_url": base_url,
|
||||
"namespace": namespace,
|
||||
}
|
||||
|
||||
|
||||
class AwarenessClient:
|
||||
"""Small HTTP client for workspace-scoped awareness memory operations."""
|
||||
|
||||
def __init__(self, base_url: str, namespace: str, timeout: float = DEFAULT_AWARENESS_TIMEOUT):
|
||||
self.base_url = base_url.rstrip("/")
|
||||
self.namespace = namespace
|
||||
self.timeout = timeout
|
||||
|
||||
def _memories_url(self) -> str:
|
||||
# Keep the awareness path isolated in one helper so the contract can
|
||||
# be adjusted later without touching the agent-facing tools.
|
||||
return f"{self.base_url}/api/v1/namespaces/{self.namespace}/memories"
|
||||
|
||||
async def commit(self, content: str, scope: str) -> dict[str, Any]:
|
||||
client_cls = _resolve_async_client()
|
||||
async with client_cls(timeout=self.timeout) as client:
|
||||
resp = await client.post(
|
||||
self._memories_url(),
|
||||
json={"content": content, "scope": scope},
|
||||
)
|
||||
return _parse_commit_response(resp, scope)
|
||||
|
||||
async def search(self, query: str = "", scope: str = "") -> dict[str, Any]:
|
||||
params: dict[str, str] = {}
|
||||
if query:
|
||||
params["q"] = query
|
||||
if scope:
|
||||
params["scope"] = scope
|
||||
|
||||
client_cls = _resolve_async_client()
|
||||
async with client_cls(timeout=self.timeout) as client:
|
||||
resp = await client.get(self._memories_url(), params=params)
|
||||
return _parse_search_response(resp)
|
||||
|
||||
|
||||
def build_awareness_client() -> AwarenessClient | None:
|
||||
"""Create an awareness client from the current workspace environment."""
|
||||
config = get_awareness_config()
|
||||
if not config:
|
||||
return None
|
||||
return AwarenessClient(config["base_url"], config["namespace"])
|
||||
|
||||
|
||||
def _parse_commit_response(resp: httpx.Response, scope: str) -> dict[str, Any]:
|
||||
data = _safe_json(resp)
|
||||
if resp.status_code in (200, 201):
|
||||
return {"success": True, "id": data.get("id"), "scope": scope}
|
||||
return {"success": False, "error": data.get("error", resp.text)}
|
||||
|
||||
|
||||
def _parse_search_response(resp: httpx.Response) -> dict[str, Any]:
|
||||
data = _safe_json(resp)
|
||||
if resp.status_code == 200:
|
||||
memories = data if isinstance(data, list) else data.get("memories", [])
|
||||
return {
|
||||
"success": True,
|
||||
"count": len(memories),
|
||||
"memories": memories,
|
||||
}
|
||||
return {"success": False, "error": data.get("error", resp.text)}
|
||||
|
||||
|
||||
def _safe_json(resp: httpx.Response) -> dict[str, Any] | list[Any]:
|
||||
try:
|
||||
return resp.json()
|
||||
except ValueError:
|
||||
return {"error": resp.text}
|
||||
|
||||
|
||||
def _resolve_async_client():
|
||||
client_cls = getattr(httpx, "AsyncClient", None)
|
||||
if client_cls is not None:
|
||||
return client_cls
|
||||
|
||||
memory_module = sys.modules.get("builtin_tools.memory")
|
||||
if memory_module is not None:
|
||||
memory_httpx = getattr(memory_module, "httpx", None)
|
||||
client_cls = getattr(memory_httpx, "AsyncClient", None)
|
||||
if client_cls is not None:
|
||||
return client_cls
|
||||
|
||||
raise RuntimeError("httpx.AsyncClient is unavailable")
|
||||
@@ -1,359 +0,0 @@
|
||||
"""OWASP Top 10 for Agentic Applications compliance enforcement (Dec 2025).
|
||||
|
||||
Enable via config.yaml::
|
||||
|
||||
compliance:
|
||||
mode: owasp_agentic
|
||||
prompt_injection: detect # detect | block
|
||||
max_tool_calls_per_task: 50
|
||||
max_task_duration_seconds: 300
|
||||
|
||||
When ``mode`` is absent or empty, this module is a no-op — no overhead, no
|
||||
behaviour change. This makes it safe to import unconditionally.
|
||||
|
||||
Coverage
|
||||
--------
|
||||
|
||||
OA-01 Prompt Injection (``sanitize_input``)
|
||||
Scans user-supplied text for instruction-override patterns, role-hijacking
|
||||
attempts, system-prompt delimiter injection, and known jailbreak keywords.
|
||||
|
||||
- ``detect`` (default): log an audit event, return the original text so
|
||||
the agent still processes the input. Operators are alerted without
|
||||
breaking legitimate use-cases that happen to contain trigger words.
|
||||
|
||||
- ``block``: raise ``PromptInjectionError`` before the agent sees the text.
|
||||
|
||||
OA-03 Excessive Agency (``check_agency_limits``)
|
||||
Tracks the number of tool calls and wall-clock time elapsed per task.
|
||||
When a limit is exceeded, ``ExcessiveAgencyError`` is raised. The caller
|
||||
(``a2a_executor.py``) catches it and terminates the task gracefully.
|
||||
|
||||
OA-02 / OA-06 Insecure Output / Sensitive Data Exposure (``redact_pii``)
|
||||
Scans agent output for credit-card numbers, SSNs, API keys, AWS access
|
||||
keys, and e-mail addresses. Detected values are replaced with
|
||||
``[REDACTED:<type>]`` tokens before the response reaches the caller.
|
||||
An audit event records the PII types found (not the values themselves).
|
||||
|
||||
Note on streaming: ``redact_pii`` is applied to the *final accumulated
|
||||
text* before the terminal ``Message`` event is emitted. Token-by-token
|
||||
SSE artifacts that have already been sent to streaming clients are not
|
||||
retroactively redacted. For full streaming redaction, integrate
|
||||
``redact_pii`` at the ``TaskArtifactUpdateEvent`` level.
|
||||
|
||||
Compliance posture report (``get_compliance_posture``)
|
||||
Returns the current effective compliance configuration as a plain ``dict``
|
||||
suitable for a health or audit endpoint, letting operators verify that the
|
||||
correct settings are active without reading config files.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from builtin_tools.audit import log_event
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public exceptions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class PromptInjectionError(ValueError):
|
||||
"""Raised when prompt injection is detected and ``prompt_injection=block``."""
|
||||
|
||||
|
||||
class ExcessiveAgencyError(RuntimeError):
|
||||
"""Raised when the tool-call count or task-duration limit is exceeded."""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# OA-01 — Prompt Injection detection
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
#: Compiled patterns matched against normalised (lowercased + collapsed) input.
|
||||
#: Add workspace-specific patterns in config if needed.
|
||||
_INJECTION_PATTERNS: list[tuple[re.Pattern[str], str]] = [
|
||||
# Instruction override
|
||||
(re.compile(r"ignore\s+(all\s+)?previous\s+instructions?", re.I), "instruction_override"),
|
||||
(re.compile(r"disregard\s+(all\s+)?previous", re.I), "instruction_override"),
|
||||
(re.compile(r"forget\s+(all\s+)?previous", re.I), "instruction_override"),
|
||||
(re.compile(r"override\s+(your\s+)?(instructions?|guidelines?|rules?)", re.I), "instruction_override"),
|
||||
# Role hijacking
|
||||
(re.compile(r"you\s+are\s+now\s+\w", re.I), "role_hijack"),
|
||||
(re.compile(r"act\s+as\s+(a\s+)?(new\s+|different\s+|unrestricted\s+)", re.I), "role_hijack"),
|
||||
(re.compile(r"roleplay\s+as", re.I), "role_hijack"),
|
||||
(re.compile(r"pretend\s+(you\s+are|to\s+be)\b", re.I), "role_hijack"),
|
||||
(re.compile(r"from\s+now\s+on\s+(you\s+are|act\s+as)", re.I), "role_hijack"),
|
||||
# System-prompt delimiter injection (LLM-specific tokens)
|
||||
(re.compile(r"<\|?\s*(system|im_start|im_end|endoftext)\s*\|?>", re.I), "delimiter_injection"),
|
||||
(re.compile(r"\[INST\]|\[/INST\]|\[\[SYS\]\]|\[\[/SYS\]\]", re.I), "delimiter_injection"),
|
||||
(re.compile(r"<</SYS>>|<<SYS>>", re.I), "delimiter_injection"),
|
||||
# DAN / jailbreak keywords
|
||||
(re.compile(r"\bDAN\b.{0,30}(mode|now|enabled|activated)", re.I), "jailbreak"),
|
||||
(re.compile(r"do\s+anything\s+now", re.I), "jailbreak"),
|
||||
(re.compile(r"\bjailbreak\b", re.I), "jailbreak"),
|
||||
(re.compile(r"developer\s+mode\s+(enabled|on)", re.I), "jailbreak"),
|
||||
# Prompt exfiltration
|
||||
(re.compile(r"(repeat|print|output|show|reveal|display)\s+(your\s+)?(system\s+prompt|initial\s+instructions?)", re.I), "prompt_exfiltration"),
|
||||
(re.compile(r"what\s+(are\s+)?your\s+(instructions?|system\s+prompt)", re.I), "prompt_exfiltration"),
|
||||
]
|
||||
|
||||
|
||||
def detect_prompt_injection(text: str) -> list[tuple[str, str]]:
|
||||
"""Return a list of ``(pattern_description, category)`` for each match.
|
||||
|
||||
Args:
|
||||
text: Raw user input to scan.
|
||||
|
||||
Returns:
|
||||
List of ``(matched_pattern, category)`` tuples; empty means clean.
|
||||
"""
|
||||
matches: list[tuple[str, str]] = []
|
||||
for pattern, category in _INJECTION_PATTERNS:
|
||||
m = pattern.search(text)
|
||||
if m:
|
||||
matches.append((m.group(0)[:80], category))
|
||||
return matches
|
||||
|
||||
|
||||
def sanitize_input(
|
||||
text: str,
|
||||
*,
|
||||
prompt_injection_mode: str = "detect",
|
||||
context_id: str = "",
|
||||
) -> str:
|
||||
"""Check *text* for prompt injection and enforce the configured response.
|
||||
|
||||
Args:
|
||||
text: User-supplied input to the agent.
|
||||
prompt_injection_mode: ``"detect"`` or ``"block"``.
|
||||
context_id: Task/context identifier for audit correlation.
|
||||
|
||||
Returns:
|
||||
The original *text* unchanged (``detect`` mode always returns input).
|
||||
|
||||
Raises:
|
||||
:class:`PromptInjectionError`: only when ``prompt_injection_mode="block"``
|
||||
and at least one injection pattern is matched.
|
||||
"""
|
||||
matches = detect_prompt_injection(text)
|
||||
if not matches:
|
||||
return text
|
||||
|
||||
categories = list({cat for _, cat in matches})
|
||||
trace_id = str(uuid.uuid4())
|
||||
|
||||
log_event(
|
||||
event_type="compliance",
|
||||
action="prompt_injection.detect",
|
||||
resource="user_input",
|
||||
outcome="detected" if prompt_injection_mode == "detect" else "blocked",
|
||||
trace_id=trace_id,
|
||||
context_id=context_id,
|
||||
categories=categories,
|
||||
match_count=len(matches),
|
||||
# Log category + truncated match, never the full raw text (OA-06)
|
||||
matches=[{"category": cat, "snippet": snippet} for snippet, cat in matches[:5]],
|
||||
)
|
||||
|
||||
if prompt_injection_mode == "block":
|
||||
raise PromptInjectionError(
|
||||
f"Prompt injection detected ({', '.join(categories)}). "
|
||||
"Request blocked by compliance policy."
|
||||
)
|
||||
|
||||
# detect mode — log and continue
|
||||
logger.warning(
|
||||
"Prompt injection patterns detected (context_id=%s, categories=%s) — "
|
||||
"passing to agent in detect mode",
|
||||
context_id,
|
||||
categories,
|
||||
)
|
||||
return text
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# OA-03 — Excessive Agency
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgencyTracker:
|
||||
"""Per-task mutable state for excessive-agency enforcement.
|
||||
|
||||
Instantiate once per ``execute()`` call and pass to
|
||||
:func:`check_agency_limits` at each tool-start event.
|
||||
"""
|
||||
|
||||
max_tool_calls: int = 50
|
||||
max_duration_seconds: float = 300.0
|
||||
tool_call_count: int = field(default=0, init=False)
|
||||
start_time: float = field(default_factory=time.monotonic, init=False)
|
||||
|
||||
def on_tool_call(self, tool_name: str = "", context_id: str = "") -> None:
|
||||
"""Increment counter and enforce limits.
|
||||
|
||||
Raises:
|
||||
:class:`ExcessiveAgencyError`: if either limit is exceeded.
|
||||
"""
|
||||
self.tool_call_count += 1
|
||||
elapsed = time.monotonic() - self.start_time
|
||||
|
||||
if self.tool_call_count > self.max_tool_calls:
|
||||
log_event(
|
||||
event_type="compliance",
|
||||
action="excessive_agency.tool_limit",
|
||||
resource=tool_name or "unknown_tool",
|
||||
outcome="blocked",
|
||||
context_id=context_id,
|
||||
tool_call_count=self.tool_call_count,
|
||||
limit=self.max_tool_calls,
|
||||
elapsed_seconds=round(elapsed, 2),
|
||||
)
|
||||
raise ExcessiveAgencyError(
|
||||
f"Tool call limit exceeded: {self.tool_call_count} calls > "
|
||||
f"max {self.max_tool_calls} per task"
|
||||
)
|
||||
|
||||
if elapsed > self.max_duration_seconds:
|
||||
log_event(
|
||||
event_type="compliance",
|
||||
action="excessive_agency.duration_limit",
|
||||
resource=tool_name or "unknown_tool",
|
||||
outcome="blocked",
|
||||
context_id=context_id,
|
||||
tool_call_count=self.tool_call_count,
|
||||
elapsed_seconds=round(elapsed, 2),
|
||||
limit_seconds=self.max_duration_seconds,
|
||||
)
|
||||
raise ExcessiveAgencyError(
|
||||
f"Task duration limit exceeded: {elapsed:.0f}s > "
|
||||
f"max {self.max_duration_seconds:.0f}s per task"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# OA-02 / OA-06 — PII redaction
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
#: ``(compiled_pattern, replacement_token)`` pairs applied in order.
|
||||
#: The replacement tokens are SIEM-friendly: ``[REDACTED:type]``.
|
||||
_PII_PATTERNS: list[tuple[re.Pattern[str], str]] = [
|
||||
# Formatted credit cards: XXXX-XXXX-XXXX-XXXX or XXXX XXXX XXXX XXXX
|
||||
(re.compile(r"\b\d{4}[\s\-]\d{4}[\s\-]\d{4}[\s\-]\d{4}\b"), "[REDACTED:credit_card]"),
|
||||
# US Social Security Numbers: XXX-XX-XXXX
|
||||
(re.compile(r"\b\d{3}-\d{2}-\d{4}\b"), "[REDACTED:ssn]"),
|
||||
# OpenAI-style keys: sk-... (≥ 32 chars after prefix)
|
||||
(re.compile(r"\bsk-[A-Za-z0-9_\-]{32,}\b"), "[REDACTED:api_key]"),
|
||||
# Generic API/secret keys with common prefixes
|
||||
(re.compile(r"\b(?:sk|pk|api|secret|token|auth)[-_][A-Za-z0-9_\-]{20,}\b", re.I), "[REDACTED:api_key]"),
|
||||
# AWS Access Key IDs
|
||||
(re.compile(r"\bAKIA[0-9A-Z]{16}\b"), "[REDACTED:aws_key]"),
|
||||
# GitHub personal access tokens — classic format (36-char alphanumeric suffix)
|
||||
(re.compile(r"\bghp_[A-Za-z0-9]{36}\b"), "[REDACTED:github_token]"),
|
||||
# GitHub personal access tokens — fine-grained format (82-char alphanumeric+underscore suffix)
|
||||
(re.compile(r"\bgithub_pat_[A-Za-z0-9_]{82}\b"), "[REDACTED:github_token]"),
|
||||
# Email addresses
|
||||
(re.compile(r"\b[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}\b"), "[REDACTED:email]"),
|
||||
]
|
||||
|
||||
|
||||
def redact_pii(text: str) -> tuple[str, list[str]]:
|
||||
"""Redact PII from *text* and return ``(redacted_text, pii_types_found)``.
|
||||
|
||||
Each unique PII type is reported at most once in ``pii_types_found``.
|
||||
The replacement tokens (``[REDACTED:type]``) are SIEM-indexable and
|
||||
preserve the structural context of the output while hiding sensitive data.
|
||||
|
||||
Args:
|
||||
text: Agent output text to scan.
|
||||
|
||||
Returns:
|
||||
Tuple of ``(redacted_text, list_of_pii_type_strings)``. The list is
|
||||
empty when no PII is detected (the common case).
|
||||
|
||||
Examples::
|
||||
|
||||
>>> redacted, types = redact_pii("Call me at test@example.com sk-abc123...")
|
||||
>>> "email" in types
|
||||
True
|
||||
>>> "[REDACTED:email]" in redacted
|
||||
True
|
||||
"""
|
||||
found: list[str] = []
|
||||
result = text
|
||||
for pattern, replacement in _PII_PATTERNS:
|
||||
new_result = pattern.sub(replacement, result)
|
||||
if new_result != result:
|
||||
# Extract type from "[REDACTED:type]"
|
||||
pii_type = replacement[len("[REDACTED:"):-1]
|
||||
if pii_type not in found:
|
||||
found.append(pii_type)
|
||||
result = new_result
|
||||
return result, found
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Compliance posture report
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def get_compliance_posture() -> dict[str, Any]:
|
||||
"""Return the current compliance configuration as a serialisable dict.
|
||||
|
||||
Loads ``WorkspaceConfig`` lazily (cached) and returns a snapshot of the
|
||||
active compliance settings. Safe to call from a health endpoint.
|
||||
|
||||
Returns a dict with these keys::
|
||||
|
||||
{
|
||||
"compliance_mode": "owasp_agentic" | "",
|
||||
"enabled": true | false,
|
||||
"prompt_injection": "detect" | "block",
|
||||
"max_tool_calls_per_task": 50,
|
||||
"max_task_duration_seconds": 300,
|
||||
"pii_redaction_enabled": true,
|
||||
"security_scan_mode": "warn" | "block" | "off",
|
||||
"rbac_roles": ["operator"],
|
||||
}
|
||||
"""
|
||||
try:
|
||||
from builtin_tools.audit import _load_workspace_config
|
||||
cfg = _load_workspace_config()
|
||||
except Exception:
|
||||
cfg = None
|
||||
|
||||
if cfg is None:
|
||||
return {
|
||||
"compliance_mode": "",
|
||||
"enabled": False,
|
||||
"prompt_injection": "detect",
|
||||
"max_tool_calls_per_task": 50,
|
||||
"max_task_duration_seconds": 300,
|
||||
"pii_redaction_enabled": False,
|
||||
"security_scan_mode": "warn",
|
||||
"rbac_roles": [],
|
||||
"note": "config unavailable",
|
||||
}
|
||||
|
||||
c = cfg.compliance
|
||||
enabled = c.mode == "owasp_agentic"
|
||||
return {
|
||||
"compliance_mode": c.mode,
|
||||
"enabled": enabled,
|
||||
"prompt_injection": c.prompt_injection,
|
||||
"max_tool_calls_per_task": c.max_tool_calls_per_task,
|
||||
"max_task_duration_seconds": c.max_task_duration_seconds,
|
||||
# PII redaction is active whenever compliance mode is on
|
||||
"pii_redaction_enabled": enabled,
|
||||
"security_scan_mode": cfg.security_scan.mode,
|
||||
"rbac_roles": list(cfg.rbac.roles),
|
||||
}
|
||||
@@ -1,550 +0,0 @@
|
||||
"""Async delegation tool for sending tasks to peer workspaces via A2A.
|
||||
|
||||
Delegations are non-blocking: the tool fires the A2A request in the background
|
||||
and returns immediately with a task_id. The agent can check status anytime via
|
||||
check_task_status, or just continue working and check later.
|
||||
|
||||
When the delegate responds, the result is stored and the agent is notified
|
||||
via a status update.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
import httpx
|
||||
from langchain_core.tools import tool
|
||||
|
||||
from builtin_tools.audit import check_permission, get_workspace_roles, log_event
|
||||
from builtin_tools.telemetry import (
|
||||
A2A_SOURCE_WORKSPACE,
|
||||
A2A_TARGET_WORKSPACE,
|
||||
A2A_TASK_ID,
|
||||
WORKSPACE_ID_ATTR,
|
||||
get_current_traceparent,
|
||||
get_tracer,
|
||||
inject_trace_headers,
|
||||
)
|
||||
|
||||
PLATFORM_URL = os.environ.get("PLATFORM_URL", "http://host.docker.internal:8080")
|
||||
WORKSPACE_ID = os.environ.get("WORKSPACE_ID", "")
|
||||
DELEGATION_RETRY_ATTEMPTS = int(os.environ.get("DELEGATION_RETRY_ATTEMPTS", "3"))
|
||||
DELEGATION_RETRY_DELAY = float(os.environ.get("DELEGATION_RETRY_DELAY", "5.0"))
|
||||
DELEGATION_TIMEOUT = float(os.environ.get("DELEGATION_TIMEOUT", "300.0"))
|
||||
|
||||
|
||||
class DelegationStatus(str, Enum):
|
||||
PENDING = "pending"
|
||||
IN_PROGRESS = "in_progress"
|
||||
# QUEUED: peer's a2a-proxy returned HTTP 202 + {queued: true}, meaning
|
||||
# the peer is mid-task and the request was placed in a drain queue.
|
||||
# The reply will arrive via the platform's stitch path when the
|
||||
# peer finishes its current work. The LLM should WAIT, not retry,
|
||||
# and definitely not fall back to doing the work itself — see the
|
||||
# check_task_status docstring for the prompt-side guidance.
|
||||
QUEUED = "queued"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
|
||||
|
||||
@dataclass
|
||||
class DelegationTask:
|
||||
task_id: str
|
||||
workspace_id: str
|
||||
task_description: str
|
||||
status: DelegationStatus = DelegationStatus.PENDING
|
||||
result: Optional[str] = None
|
||||
error: Optional[str] = None
|
||||
|
||||
|
||||
# In-memory store of delegation tasks for this workspace
|
||||
_delegations: dict[str, DelegationTask] = {}
|
||||
_background_tasks: set[asyncio.Task] = set()
|
||||
MAX_DELEGATION_HISTORY = 100
|
||||
logger = __import__("logging").getLogger(__name__)
|
||||
|
||||
|
||||
def _evict_old_delegations():
|
||||
"""Remove completed/failed delegations when store exceeds MAX_DELEGATION_HISTORY."""
|
||||
if len(_delegations) <= MAX_DELEGATION_HISTORY:
|
||||
return
|
||||
# Evict oldest completed/failed first
|
||||
removable = [
|
||||
tid for tid, d in _delegations.items()
|
||||
if d.status in (DelegationStatus.COMPLETED, DelegationStatus.FAILED)
|
||||
]
|
||||
for tid in removable[:len(_delegations) - MAX_DELEGATION_HISTORY]:
|
||||
del _delegations[tid]
|
||||
|
||||
|
||||
def _on_task_done(task: asyncio.Task):
|
||||
"""Callback for background tasks — log unhandled exceptions."""
|
||||
_background_tasks.discard(task)
|
||||
if not task.cancelled() and task.exception():
|
||||
logger.error("Delegation background task failed: %s", task.exception())
|
||||
|
||||
|
||||
async def _notify_completion(task_id: str, target_workspace_id: str, status: str):
|
||||
"""Push notification to platform when delegation completes/fails."""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10) as client:
|
||||
await client.post(
|
||||
f"{PLATFORM_URL}/workspaces/{WORKSPACE_ID}/notify",
|
||||
json={
|
||||
"type": "delegation_complete",
|
||||
"task_id": task_id,
|
||||
"target_workspace_id": target_workspace_id,
|
||||
"status": status,
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Delegation notify failed (best-effort): %s", e)
|
||||
|
||||
|
||||
async def _record_delegation_on_platform(task_id: str, target_workspace_id: str, task: str):
|
||||
"""Register the delegation in the platform's activity_logs (#64 fix).
|
||||
|
||||
Best-effort POST to /workspaces/<self>/delegations/record. The agent still
|
||||
fires A2A directly for speed + OTEL propagation, but the platform's
|
||||
GET /delegations endpoint now mirrors the same set an agent's local
|
||||
check_task_status sees.
|
||||
"""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10) as client:
|
||||
await client.post(
|
||||
f"{PLATFORM_URL}/workspaces/{WORKSPACE_ID}/delegations/record",
|
||||
json={
|
||||
"target_id": target_workspace_id,
|
||||
"task": task,
|
||||
"delegation_id": task_id,
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Delegation record failed (best-effort): %s", e)
|
||||
|
||||
|
||||
async def _refresh_queued_from_platform(task_id: str) -> bool:
|
||||
"""Lazy-refresh a QUEUED delegation's local state from the platform.
|
||||
|
||||
Called by check_task_status when local status is QUEUED. The
|
||||
platform's drain stitch (a2a_queue.go) updates the delegate_result
|
||||
activity_logs row when a queued delegation eventually completes,
|
||||
but it has no callback to this runtime — without this lazy refresh,
|
||||
the LLM polling check_task_status would see "queued" forever
|
||||
even after the platform has the result.
|
||||
|
||||
Returns True if the local delegation was updated to a terminal state
|
||||
(completed/failed), False otherwise. Best-effort — network/parse
|
||||
errors leave the local state untouched and let the next call retry.
|
||||
"""
|
||||
delegation = _delegations.get(task_id)
|
||||
if not delegation:
|
||||
return False
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10) as client:
|
||||
resp = await client.get(
|
||||
f"{PLATFORM_URL}/workspaces/{WORKSPACE_ID}/delegations",
|
||||
headers={},
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
return False
|
||||
entries = resp.json()
|
||||
if not isinstance(entries, list):
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.debug("refresh queued delegation %s: %s", task_id, e)
|
||||
return False
|
||||
# Find the latest delegate_result row matching our task_id.
|
||||
# Platform list is newest-first; the first match is the freshest.
|
||||
for entry in entries:
|
||||
if entry.get("delegation_id") != task_id:
|
||||
continue
|
||||
if entry.get("type") != "delegation":
|
||||
continue
|
||||
# Only delegate_result rows carry the eventual outcome; the
|
||||
# initial 'delegate' row stays at status='pending' even after
|
||||
# the result lands. Filtering on summary text is brittle, but
|
||||
# the rows from the LIST endpoint don't include `method`. The
|
||||
# `delegate_result` rows are the ones with `error` (failure)
|
||||
# or `response_preview` (success) populated — pick those.
|
||||
status = entry.get("status", "")
|
||||
if status == "completed":
|
||||
delegation.status = DelegationStatus.COMPLETED
|
||||
delegation.result = entry.get("response_preview", "")
|
||||
await _notify_completion(task_id, delegation.workspace_id, "completed")
|
||||
return True
|
||||
if status == "failed":
|
||||
delegation.status = DelegationStatus.FAILED
|
||||
delegation.error = entry.get("error", "")
|
||||
await _notify_completion(task_id, delegation.workspace_id, "failed")
|
||||
return True
|
||||
# status == "queued" / "pending" / "dispatched": platform hasn't
|
||||
# resolved yet; leave local state unchanged so the next poll
|
||||
# retries. Don't break — keep scanning in case there's a newer
|
||||
# entry for the same task_id (possible if the same delegation
|
||||
# was retried).
|
||||
return False
|
||||
|
||||
|
||||
async def _update_delegation_on_platform(task_id: str, status: str, error: str = "", response_preview: str = ""):
|
||||
"""Mirror status changes to the platform's activity_logs (#64 fix).
|
||||
|
||||
Paired with _record_delegation_on_platform — fires on completion/failure
|
||||
so the platform view stays in sync with the agent's local dict.
|
||||
"""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10) as client:
|
||||
await client.post(
|
||||
f"{PLATFORM_URL}/workspaces/{WORKSPACE_ID}/delegations/{task_id}/update",
|
||||
json={
|
||||
"status": status,
|
||||
"error": error,
|
||||
"response_preview": response_preview[:500],
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug("Delegation update failed (best-effort): %s", e)
|
||||
|
||||
|
||||
async def _execute_delegation(task_id: str, workspace_id: str, task: str):
|
||||
"""Background coroutine that sends the A2A request and stores the result."""
|
||||
delegation = _delegations[task_id]
|
||||
delegation.status = DelegationStatus.IN_PROGRESS
|
||||
|
||||
# #64: register on the platform so GET /workspaces/<self>/delegations
|
||||
# sees the same set as check_task_status. Best-effort — platform
|
||||
# unreachability must not block the actual A2A delegation.
|
||||
await _record_delegation_on_platform(task_id, workspace_id, task)
|
||||
|
||||
tracer = get_tracer()
|
||||
with tracer.start_as_current_span("task_delegate") as delegate_span:
|
||||
delegate_span.set_attribute(WORKSPACE_ID_ATTR, WORKSPACE_ID)
|
||||
delegate_span.set_attribute(A2A_SOURCE_WORKSPACE, WORKSPACE_ID)
|
||||
delegate_span.set_attribute(A2A_TARGET_WORKSPACE, workspace_id)
|
||||
delegate_span.set_attribute(A2A_TASK_ID, task_id)
|
||||
|
||||
async with httpx.AsyncClient(timeout=DELEGATION_TIMEOUT) as client:
|
||||
# Discover target URL
|
||||
try:
|
||||
discover_resp = await client.get(
|
||||
f"{PLATFORM_URL}/registry/discover/{workspace_id}",
|
||||
headers={"X-Workspace-ID": WORKSPACE_ID},
|
||||
)
|
||||
if discover_resp.status_code != 200:
|
||||
delegation.status = DelegationStatus.FAILED
|
||||
delegation.error = f"Discovery failed: HTTP {discover_resp.status_code}"
|
||||
log_event(event_type="delegation", action="delegate", resource=workspace_id,
|
||||
outcome="failure", trace_id=task_id, reason="discovery_error")
|
||||
return
|
||||
|
||||
target_url = discover_resp.json().get("url")
|
||||
if not target_url:
|
||||
delegation.status = DelegationStatus.FAILED
|
||||
delegation.error = "No URL for workspace"
|
||||
return
|
||||
except Exception as e:
|
||||
delegation.status = DelegationStatus.FAILED
|
||||
delegation.error = f"Discovery error: {e}"
|
||||
return
|
||||
|
||||
# Send A2A with retry
|
||||
outgoing_headers = inject_trace_headers({
|
||||
"Content-Type": "application/json",
|
||||
"X-Workspace-ID": WORKSPACE_ID,
|
||||
})
|
||||
traceparent = get_current_traceparent()
|
||||
|
||||
last_error = None
|
||||
for attempt in range(DELEGATION_RETRY_ATTEMPTS):
|
||||
try:
|
||||
a2a_resp = await client.post(
|
||||
target_url,
|
||||
headers=outgoing_headers,
|
||||
json={
|
||||
"jsonrpc": "2.0",
|
||||
"method": "message/send",
|
||||
"id": f"delegation-{task_id}-{attempt}",
|
||||
"params": {
|
||||
"message": {
|
||||
"role": "user",
|
||||
"parts": [{"kind": "text", "text": task}],
|
||||
"messageId": f"msg-{task_id}-{attempt}",
|
||||
},
|
||||
"metadata": {
|
||||
"parent_task_id": task_id,
|
||||
"source_workspace_id": WORKSPACE_ID,
|
||||
"traceparent": traceparent,
|
||||
},
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
# HTTP 202 + {queued: true} = peer's a2a-proxy
|
||||
# accepted the request but the peer's runtime is
|
||||
# mid-task. Platform-side drain will deliver the
|
||||
# reply asynchronously. Mark QUEUED locally so
|
||||
# check_task_status can surface that state
|
||||
# to the LLM with explicit "wait, don't bypass"
|
||||
# guidance. Do NOT mark FAILED — the request is
|
||||
# alive in the platform's queue, not lost.
|
||||
#
|
||||
# Without this branch, the loop falls through, the
|
||||
# `if "error" in result` line below references an
|
||||
# unbound `result`, and the eventual FAILED status
|
||||
# leads the LLM to conclude the peer is permanently
|
||||
# unavailable — at which point it does the delegated
|
||||
# work itself, defeating the whole orchestration.
|
||||
if a2a_resp.status_code == 202:
|
||||
try:
|
||||
queued_body = a2a_resp.json()
|
||||
except Exception:
|
||||
queued_body = {}
|
||||
if queued_body.get("queued") is True:
|
||||
delegation.status = DelegationStatus.QUEUED
|
||||
log_event(
|
||||
event_type="delegation", action="delegate",
|
||||
resource=workspace_id, outcome="queued",
|
||||
trace_id=task_id, attempt=attempt + 1,
|
||||
)
|
||||
await _notify_completion(task_id, workspace_id, "queued")
|
||||
await _update_delegation_on_platform(
|
||||
task_id, "queued", "", "",
|
||||
)
|
||||
return
|
||||
|
||||
if a2a_resp.status_code == 200:
|
||||
try:
|
||||
result = a2a_resp.json()
|
||||
except Exception:
|
||||
delegation.status = DelegationStatus.FAILED
|
||||
delegation.error = "Invalid JSON response"
|
||||
return
|
||||
|
||||
if "result" in result:
|
||||
task_result = result["result"]
|
||||
artifacts = task_result.get("artifacts", [])
|
||||
texts = []
|
||||
for artifact in artifacts:
|
||||
for part in artifact.get("parts", []):
|
||||
if part.get("kind") == "text":
|
||||
texts.append(part["text"])
|
||||
# Also check top-level parts
|
||||
for part in task_result.get("parts", []):
|
||||
if part.get("kind") == "text":
|
||||
texts.append(part["text"])
|
||||
|
||||
delegation.status = DelegationStatus.COMPLETED
|
||||
delegation.result = "\n".join(texts) if texts else str(task_result)
|
||||
log_event(event_type="delegation", action="delegate", resource=workspace_id,
|
||||
outcome="success", trace_id=task_id, attempt=attempt + 1)
|
||||
await _notify_completion(task_id, workspace_id, "completed")
|
||||
# #64: mirror to platform activity_logs so
|
||||
# GET /delegations shows the completion state.
|
||||
await _update_delegation_on_platform(
|
||||
task_id, "completed", "",
|
||||
delegation.result or "",
|
||||
)
|
||||
return
|
||||
|
||||
if "error" in result:
|
||||
last_error = result["error"].get("message", str(result["error"]))
|
||||
break
|
||||
|
||||
except (httpx.ConnectError, httpx.TimeoutException) as e:
|
||||
last_error = str(e)
|
||||
if attempt < DELEGATION_RETRY_ATTEMPTS - 1:
|
||||
await asyncio.sleep(DELEGATION_RETRY_DELAY * (attempt + 1))
|
||||
continue
|
||||
|
||||
delegation.status = DelegationStatus.FAILED
|
||||
delegation.error = str(last_error)
|
||||
log_event(event_type="delegation", action="delegate", resource=workspace_id,
|
||||
outcome="failure", trace_id=task_id, last_error=str(last_error))
|
||||
await _notify_completion(task_id, workspace_id, "failed")
|
||||
# #64: mirror failure to platform activity_logs.
|
||||
await _update_delegation_on_platform(
|
||||
task_id, "failed", str(last_error), "",
|
||||
)
|
||||
|
||||
|
||||
@tool
|
||||
async def delegate_task(
|
||||
workspace_id: str,
|
||||
task: str,
|
||||
) -> str:
|
||||
"""Delegate a task to a peer workspace via A2A and WAIT for the response.
|
||||
|
||||
Synchronous variant — blocks until the peer replies (or the platform's
|
||||
A2A round-trip times out). Use this for QUICK questions and small
|
||||
sub-tasks where you can afford to wait inline.
|
||||
|
||||
For longer-running work (research, multi-minute jobs) use
|
||||
delegate_task_async + check_task_status instead so you don't hold
|
||||
this workspace busy waiting.
|
||||
|
||||
Tool name + description are sourced from the platform_tools registry —
|
||||
a single ToolSpec drives MCP, LangChain, and system-prompt docs.
|
||||
"""
|
||||
from a2a_tools import tool_delegate_task
|
||||
return await tool_delegate_task(workspace_id, task)
|
||||
|
||||
|
||||
@tool
|
||||
async def delegate_task_async(
|
||||
workspace_id: str,
|
||||
task: str,
|
||||
) -> dict:
|
||||
"""Delegate a task to a peer workspace via A2A protocol (non-blocking).
|
||||
|
||||
Sends the task in the background and returns immediately with a task_id.
|
||||
Use check_task_status to poll for the result, or continue working
|
||||
and check later. The delegate works independently.
|
||||
|
||||
Args:
|
||||
workspace_id: The ID of the target workspace to delegate to.
|
||||
task: The task description to send to the peer.
|
||||
|
||||
Returns:
|
||||
A dict with task_id and status="delegated". Use check_task_status(task_id) to get results.
|
||||
"""
|
||||
task_id = str(uuid.uuid4())
|
||||
|
||||
# Task #190 / #193 — Self-delegation guard (async path). Even on the
|
||||
# async path that returns a task_id immediately, _execute_delegation
|
||||
# eventually fires the A2A POST back to our own URL, which times out
|
||||
# against our own held run lock, gets recorded with source_id=our
|
||||
# workspace UUID, and surfaces in the inbox as a peer_agent message
|
||||
# from ourselves (#190). Reject before scheduling the background task
|
||||
# so no peer_agent echo can be generated. Sibling guards:
|
||||
# - workspace-server/internal/handlers/delegation.go (Go API gate)
|
||||
# - workspace/a2a_tools_delegation.py (MCP sync + async paths)
|
||||
# - workspace/builtin_tools/a2a_tools.py (framework-agnostic sync)
|
||||
if WORKSPACE_ID and workspace_id == WORKSPACE_ID:
|
||||
log_event(event_type="delegation", action="delegate", resource=workspace_id,
|
||||
outcome="rejected_self_delegation", trace_id=task_id)
|
||||
return {
|
||||
"success": False,
|
||||
"error": (
|
||||
"self-delegation rejected: cannot delegate_task_async to your "
|
||||
"own workspace (would time out and echo back as a peer_agent "
|
||||
"message from yourself — #190)"
|
||||
),
|
||||
}
|
||||
|
||||
# RBAC check
|
||||
roles, custom_perms = get_workspace_roles()
|
||||
if not check_permission("delegate", roles, custom_perms):
|
||||
log_event(event_type="rbac", action="rbac.deny", resource=workspace_id,
|
||||
outcome="denied", trace_id=task_id, attempted_action="delegate", roles=roles)
|
||||
return {"success": False, "error": f"RBAC: no 'delegate' permission. Roles: {roles}"}
|
||||
|
||||
log_event(event_type="delegation", action="delegate", resource=workspace_id,
|
||||
outcome="dispatched", trace_id=task_id, task_preview=task[:200])
|
||||
|
||||
# Store the delegation and launch background task
|
||||
delegation = DelegationTask(
|
||||
task_id=task_id,
|
||||
workspace_id=workspace_id,
|
||||
task_description=task[:200],
|
||||
)
|
||||
_delegations[task_id] = delegation
|
||||
_evict_old_delegations()
|
||||
|
||||
bg_task = asyncio.create_task(_execute_delegation(task_id, workspace_id, task))
|
||||
_background_tasks.add(bg_task)
|
||||
bg_task.add_done_callback(_on_task_done)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"task_id": task_id,
|
||||
"status": "delegated",
|
||||
"message": f"Task delegated to {workspace_id}. Use check_task_status('{task_id}') to get the result when ready.",
|
||||
}
|
||||
|
||||
|
||||
@tool
|
||||
async def check_task_status(
|
||||
task_id: str = "",
|
||||
) -> dict:
|
||||
"""Check the status of a delegated task, or list all active delegations.
|
||||
|
||||
Status semantics — IMPORTANT:
|
||||
|
||||
- "pending" / "in_progress" → peer is actively working. Wait and check again.
|
||||
- "queued" → peer's a2a-proxy accepted the call but the peer is
|
||||
processing a prior task. The reply WILL arrive — the platform's
|
||||
drain re-dispatches when the peer is free. This tool transparently
|
||||
polls the platform for the eventual outcome on each call, so
|
||||
keep polling check_task_status periodically and you'll see
|
||||
the status flip to "completed" / "failed" automatically.
|
||||
Do NOT retry the delegation. Do NOT do the work yourself.
|
||||
Acknowledge to the user that the peer is busy and will reply,
|
||||
then continue with other delegations or check back later.
|
||||
- "completed" → result is in the `result` field.
|
||||
- "failed" → real failure (network, peer crashed, etc.). The
|
||||
`error` field has the cause. Only fall back to doing the work
|
||||
yourself if status is "failed", never if status is "queued".
|
||||
|
||||
Args:
|
||||
task_id: The task_id returned by delegate_task_async. If empty, lists all delegations.
|
||||
|
||||
Returns:
|
||||
Status and result (if completed) of the delegation.
|
||||
"""
|
||||
if not task_id:
|
||||
# List all delegations
|
||||
summary = []
|
||||
for tid, d in _delegations.items():
|
||||
entry = {
|
||||
"task_id": tid,
|
||||
"workspace_id": d.workspace_id,
|
||||
"status": d.status.value,
|
||||
"task": d.task_description,
|
||||
}
|
||||
if d.status == DelegationStatus.COMPLETED:
|
||||
entry["result_preview"] = (d.result or "")[:200]
|
||||
if d.status == DelegationStatus.FAILED:
|
||||
entry["error"] = d.error
|
||||
summary.append(entry)
|
||||
return {"delegations": summary, "count": len(summary)}
|
||||
|
||||
delegation = _delegations.get(task_id)
|
||||
if not delegation:
|
||||
return {"error": f"No delegation found with task_id {task_id}"}
|
||||
|
||||
# Lazy refresh for QUEUED entries: the platform's drain stitch
|
||||
# updates its activity_logs row when the queued delegation
|
||||
# eventually completes, but doesn't push back to this runtime.
|
||||
# Without this refresh, the LLM polling here would see "queued"
|
||||
# forever even after the result is available — exactly the bug
|
||||
# the upstream director-bypass docstring guidance warned against.
|
||||
if delegation.status == DelegationStatus.QUEUED:
|
||||
await _refresh_queued_from_platform(task_id)
|
||||
# delegation is the same dict entry — _refresh mutates in-place.
|
||||
|
||||
result = {
|
||||
"task_id": task_id,
|
||||
"workspace_id": delegation.workspace_id,
|
||||
"status": delegation.status.value,
|
||||
"task": delegation.task_description,
|
||||
}
|
||||
|
||||
if delegation.status == DelegationStatus.COMPLETED:
|
||||
result["result"] = delegation.result
|
||||
elif delegation.status == DelegationStatus.FAILED:
|
||||
result["error"] = delegation.error
|
||||
|
||||
# RFC #2251 V1.0 reproduction-harness instrumentation. Every poll of
|
||||
# check_task_status emits a phase=check_status line so the harness
|
||||
# operator can tell whether a coordinator stuck for 8 minutes was
|
||||
# polling-children-the-whole-time vs synthesizing-after-children-done.
|
||||
# `grep rfc2251_phase=check_status` in the workspace's container log
|
||||
# gives the polling pattern. Strip when V1.0 ships.
|
||||
logger.info(
|
||||
"rfc2251_phase=check_status task_id=%s peer=%s status=%s",
|
||||
task_id, delegation.workspace_id, delegation.status.value,
|
||||
)
|
||||
return result
|
||||
@@ -1,403 +0,0 @@
|
||||
"""Bridge between Molecule AI's RBAC + audit subsystem and the Microsoft Agent
|
||||
Governance Toolkit (agent-os-kernel, released April 2, 2026).
|
||||
|
||||
Integration points
|
||||
------------------
|
||||
* ``check_permission`` → ``PolicyEvaluator.evaluate()``
|
||||
Molecule AI's RBAC gate runs first; if RBAC allows the action the toolkit
|
||||
evaluator is consulted according to ``policy_mode``.
|
||||
|
||||
* ``log_event`` → governance audit sink
|
||||
Every permission decision (allow or deny) is written via
|
||||
``tools.audit.log_event`` with extra governance metadata so the full
|
||||
decision trail lands in Molecule AI's existing audit stream.
|
||||
|
||||
* OTEL traceparent flows through
|
||||
``tools.telemetry.get_current_traceparent()`` is called inside ``emit()``
|
||||
and the W3C traceparent string is attached to every audit record, giving
|
||||
end-to-end distributed tracing across agent boundaries.
|
||||
|
||||
Graceful degradation
|
||||
--------------------
|
||||
If ``agent-os-kernel`` is not installed the module falls back to Molecule AI
|
||||
RBAC alone. No exception propagates to the agent — governance is a
|
||||
best-effort overlay, never a hard dependency.
|
||||
|
||||
Install::
|
||||
|
||||
pip install agent-os-kernel
|
||||
|
||||
Minimal config.yaml snippet::
|
||||
|
||||
governance:
|
||||
enabled: true
|
||||
toolkit: microsoft
|
||||
policy_mode: strict # strict | permissive | audit
|
||||
policy_endpoint: https://your-tenant.governance.azure.com
|
||||
policy_file: policies/workspace.rego
|
||||
blocked_patterns:
|
||||
- ".*\\.exec$"
|
||||
- "shell\\."
|
||||
max_tool_calls_per_task: 50
|
||||
|
||||
NOTE: The agent-os-kernel package was released April 2, 2026 and is in
|
||||
community preview. The API bindings in this module target v3.0.x of the
|
||||
package (agent_os.policies.PolicyEvaluator). If the package API changes,
|
||||
update _init_evaluator() accordingly.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
WORKSPACE_ID: str = os.environ.get("WORKSPACE_ID", "")
|
||||
|
||||
# Module-level singleton — set by initialize_governance() at startup
|
||||
_adapter: Optional["GovernanceAdapter"] = None
|
||||
|
||||
|
||||
class GovernanceAdapter:
|
||||
"""Bridges Molecule AI RBAC + audit trail to the Microsoft Agent Governance Toolkit."""
|
||||
|
||||
def __init__(self, config: Any) -> None:
|
||||
self._config = config
|
||||
self._evaluator = None
|
||||
self._toolkit_available: bool = False
|
||||
|
||||
async def initialize(self) -> None:
|
||||
"""Async entry point: initialise evaluator and log outcome."""
|
||||
self._init_evaluator()
|
||||
if self._toolkit_available:
|
||||
logger.info(
|
||||
"GovernanceAdapter initialised — toolkit=%s mode=%s",
|
||||
self._config.toolkit,
|
||||
self._config.policy_mode,
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"GovernanceAdapter initialised in RBAC-only mode "
|
||||
"(agent-os-kernel not available or failed to load)."
|
||||
)
|
||||
|
||||
def _init_evaluator(self) -> None:
|
||||
"""Lazy-import and configure the PolicyEvaluator from agent-os-kernel.
|
||||
|
||||
All failures are caught and logged; the adapter simply runs without
|
||||
the toolkit rather than crashing the workspace.
|
||||
"""
|
||||
try:
|
||||
try:
|
||||
from agent_os.policies import PolicyEvaluator # type: ignore[import]
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"agent-os-kernel is not installed — graceful degradation active. "
|
||||
"Governance will use Molecule AI RBAC only. "
|
||||
"To enable the Microsoft Agent Governance Toolkit run: "
|
||||
"pip install agent-os-kernel"
|
||||
)
|
||||
return
|
||||
|
||||
kwargs: dict[str, Any] = {
|
||||
"policy_mode": self._config.policy_mode,
|
||||
"max_tool_calls_per_task": self._config.max_tool_calls_per_task,
|
||||
"blocked_patterns": self._config.blocked_patterns,
|
||||
}
|
||||
if self._config.policy_endpoint:
|
||||
kwargs["endpoint"] = self._config.policy_endpoint
|
||||
|
||||
self._evaluator = PolicyEvaluator(**kwargs)
|
||||
|
||||
# Load a policy file if one is configured and exists on disk.
|
||||
if self._config.policy_file:
|
||||
policy_file = self._config.policy_file
|
||||
if os.path.exists(policy_file):
|
||||
ext = os.path.splitext(policy_file)[1].lower()
|
||||
if ext == ".rego":
|
||||
self._evaluator.load_rego(path=policy_file)
|
||||
logger.info("Loaded Rego policy file: %s", policy_file)
|
||||
elif ext in (".yaml", ".yml"):
|
||||
self._evaluator.load_yaml(path=policy_file)
|
||||
logger.info("Loaded YAML policy file: %s", policy_file)
|
||||
elif ext == ".cedar":
|
||||
self._evaluator.load_cedar(path=policy_file)
|
||||
logger.info("Loaded Cedar policy file: %s", policy_file)
|
||||
else:
|
||||
logger.warning(
|
||||
"Unrecognised policy file extension '%s' — skipping load.",
|
||||
ext,
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"policy_file '%s' does not exist — skipping load.",
|
||||
policy_file,
|
||||
)
|
||||
|
||||
self._toolkit_available = True
|
||||
logger.info(
|
||||
"agent-os-kernel PolicyEvaluator ready — policy_mode=%s",
|
||||
self._config.policy_mode,
|
||||
)
|
||||
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.warning(
|
||||
"Failed to initialise agent-os-kernel PolicyEvaluator: %s — "
|
||||
"graceful degradation active (RBAC only).",
|
||||
exc,
|
||||
)
|
||||
|
||||
def check_permission(
|
||||
self,
|
||||
action: str,
|
||||
roles: list[str],
|
||||
custom_permissions: dict | None = None,
|
||||
context: dict | None = None,
|
||||
) -> tuple[bool, str]:
|
||||
"""Evaluate an action against Molecule AI RBAC and (optionally) the toolkit.
|
||||
|
||||
Returns
|
||||
-------
|
||||
tuple[bool, str]
|
||||
``(allowed, reason)`` — reason is a short human-readable string
|
||||
explaining the decision.
|
||||
"""
|
||||
from builtin_tools import audit # inline import to avoid circular dependencies
|
||||
|
||||
context = context or {}
|
||||
|
||||
# --- Step 1: Molecule AI RBAC gate (always runs) ---
|
||||
rbac_allowed: bool = audit.check_permission(action, roles, custom_permissions)
|
||||
|
||||
if not rbac_allowed:
|
||||
self.emit(
|
||||
event_type="permission_check",
|
||||
action=action,
|
||||
resource=context.get("resource", ""),
|
||||
outcome="denied",
|
||||
actor=context.get("actor"),
|
||||
policy_decision="rbac_deny",
|
||||
roles=roles,
|
||||
)
|
||||
return False, f"RBAC denied action '{action}' for roles {roles}"
|
||||
|
||||
# --- Step 2: If toolkit unavailable or audit-only mode, return RBAC result ---
|
||||
if not self._toolkit_available or self._config.policy_mode == "audit":
|
||||
self.emit(
|
||||
event_type="permission_check",
|
||||
action=action,
|
||||
resource=context.get("resource", ""),
|
||||
outcome="allowed",
|
||||
actor=context.get("actor"),
|
||||
policy_decision="rbac_allowed",
|
||||
roles=roles,
|
||||
toolkit_mode=self._config.policy_mode,
|
||||
)
|
||||
return rbac_allowed, "rbac_allowed"
|
||||
|
||||
# --- Step 3: Toolkit evaluation ---
|
||||
eval_context: dict[str, Any] = {
|
||||
"action": action,
|
||||
"resource": context.get("resource", ""),
|
||||
"roles": roles,
|
||||
"workspace_id": WORKSPACE_ID,
|
||||
}
|
||||
# Merge any extra context keys the caller supplied.
|
||||
for key, value in context.items():
|
||||
if key not in eval_context:
|
||||
eval_context[key] = value
|
||||
|
||||
toolkit_allowed: bool = True
|
||||
reason: str = ""
|
||||
evaluator_name: str = "agent-os-kernel"
|
||||
|
||||
try:
|
||||
decision = self._evaluator.evaluate(eval_context)
|
||||
toolkit_allowed = getattr(decision, "allowed", True)
|
||||
reason = getattr(decision, "reason", "")
|
||||
evaluator_name = getattr(decision, "evaluator_name", "agent-os-kernel")
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.warning(
|
||||
"agent-os-kernel evaluation raised an exception: %s — "
|
||||
"falling back to RBAC result to avoid blocking the agent.",
|
||||
exc,
|
||||
)
|
||||
self.emit(
|
||||
event_type="permission_check",
|
||||
action=action,
|
||||
resource=context.get("resource", ""),
|
||||
outcome="allowed",
|
||||
actor=context.get("actor"),
|
||||
policy_decision="toolkit_evaluation_error",
|
||||
toolkit_mode=self._config.policy_mode,
|
||||
roles=roles,
|
||||
)
|
||||
return rbac_allowed, "toolkit_evaluation_error"
|
||||
|
||||
# --- Step 4: Combine results according to policy_mode ---
|
||||
if self._config.policy_mode == "permissive":
|
||||
# Toolkit denial is advisory only in permissive mode.
|
||||
if not toolkit_allowed:
|
||||
logger.warning(
|
||||
"Governance toolkit denied action '%s' (reason=%s) but policy_mode "
|
||||
"is 'permissive' — allowing and logging advisory denial.",
|
||||
action,
|
||||
reason,
|
||||
)
|
||||
final_allowed = rbac_allowed
|
||||
else:
|
||||
# strict: both gates must allow.
|
||||
final_allowed = rbac_allowed and toolkit_allowed
|
||||
|
||||
outcome = "allowed" if final_allowed else "denied"
|
||||
self.emit(
|
||||
event_type="permission_check",
|
||||
action=action,
|
||||
resource=context.get("resource", ""),
|
||||
outcome=outcome,
|
||||
actor=context.get("actor"),
|
||||
policy_decision=reason or outcome,
|
||||
evaluator=evaluator_name,
|
||||
toolkit_mode=self._config.policy_mode,
|
||||
roles=roles,
|
||||
)
|
||||
return final_allowed, reason or "allowed"
|
||||
|
||||
def emit(
|
||||
self,
|
||||
event_type: str,
|
||||
action: str,
|
||||
resource: str,
|
||||
outcome: str,
|
||||
actor: str | None = None,
|
||||
trace_id: str | None = None,
|
||||
**extra: Any,
|
||||
) -> str:
|
||||
"""Write a governance-annotated audit event.
|
||||
|
||||
Pulls the current W3C traceparent from the active OTEL span so that
|
||||
governance decisions are traceable across service boundaries.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
The ``trace_id`` produced by ``audit.log_event``.
|
||||
"""
|
||||
from builtin_tools import audit # inline import to avoid circular dependencies
|
||||
from builtin_tools.telemetry import get_current_traceparent # inline import
|
||||
|
||||
traceparent: str | None = get_current_traceparent()
|
||||
|
||||
recorded_trace_id: str = audit.log_event(
|
||||
event_type,
|
||||
action,
|
||||
resource,
|
||||
outcome,
|
||||
actor=actor,
|
||||
trace_id=trace_id,
|
||||
governance_toolkit=(
|
||||
self._config.toolkit if self._toolkit_available else "disabled"
|
||||
),
|
||||
traceparent=traceparent or "",
|
||||
**extra,
|
||||
)
|
||||
return recorded_trace_id
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Module-level functions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def initialize_governance(config: Any) -> Optional[GovernanceAdapter]:
|
||||
"""Initialize the module-level GovernanceAdapter singleton.
|
||||
|
||||
Called once at startup by main.py when governance.enabled is True.
|
||||
Returns the adapter, or None if initialization fails.
|
||||
"""
|
||||
global _adapter
|
||||
|
||||
try:
|
||||
adapter = GovernanceAdapter(config)
|
||||
await adapter.initialize()
|
||||
_adapter = adapter
|
||||
logger.info(
|
||||
"Governance singleton initialised — toolkit=%s mode=%s",
|
||||
config.toolkit,
|
||||
config.policy_mode,
|
||||
)
|
||||
return adapter
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.warning(
|
||||
"initialize_governance() failed: %s — governance disabled for this session.",
|
||||
exc,
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def get_governance_adapter() -> Optional[GovernanceAdapter]:
|
||||
"""Return the module-level GovernanceAdapter singleton (may be None)."""
|
||||
return _adapter
|
||||
|
||||
|
||||
def check_permission_with_governance(
|
||||
action: str,
|
||||
roles: list[str],
|
||||
custom_permissions: dict | None = None,
|
||||
context: dict | None = None,
|
||||
) -> tuple[bool, str]:
|
||||
"""Convenience wrapper: use GovernanceAdapter when available, else RBAC only.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
action:
|
||||
The action name to evaluate (e.g. ``"memory.write"``).
|
||||
roles:
|
||||
The list of role names held by the requesting actor.
|
||||
custom_permissions:
|
||||
Optional custom role→action mapping to overlay on built-in roles.
|
||||
context:
|
||||
Optional extra context forwarded to the PolicyEvaluator.
|
||||
|
||||
Returns
|
||||
-------
|
||||
tuple[bool, str]
|
||||
``(allowed, reason)``
|
||||
"""
|
||||
if _adapter is None:
|
||||
from builtin_tools import audit # inline import to avoid circular dependencies
|
||||
|
||||
result: bool = audit.check_permission(action, roles, custom_permissions)
|
||||
return result, "rbac_only"
|
||||
|
||||
return _adapter.check_permission(action, roles, custom_permissions, context)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Private helper
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _emit_governance_event(
|
||||
event_type: str,
|
||||
action: str,
|
||||
resource: str,
|
||||
outcome: str,
|
||||
actor: str | None = None,
|
||||
trace_id: str | None = None,
|
||||
**extra: Any,
|
||||
) -> Optional[str]:
|
||||
"""Emit a governance audit event via the singleton adapter if one is set.
|
||||
|
||||
Returns the trace_id produced by log_event, or None if no adapter is set.
|
||||
"""
|
||||
if _adapter is None:
|
||||
return None
|
||||
return _adapter.emit(
|
||||
event_type,
|
||||
action,
|
||||
resource,
|
||||
outcome,
|
||||
actor=actor,
|
||||
trace_id=trace_id,
|
||||
**extra,
|
||||
)
|
||||
@@ -1,561 +0,0 @@
|
||||
"""Human-In-The-Loop (HITL) workflow primitives.
|
||||
|
||||
Generalizes the approval tool into reusable HITL building blocks that work
|
||||
across all Molecule AI adapters.
|
||||
|
||||
Features
|
||||
--------
|
||||
@requires_approval
|
||||
Decorator that gates *any* async callable (tool, method, standalone fn)
|
||||
behind a human approval request. The decorated function only runs if
|
||||
the request is granted. Roles in ``hitl.bypass_roles`` skip the gate.
|
||||
|
||||
pause_task / resume_task
|
||||
LangChain tools for explicit pause/resume of in-flight tasks. An agent
|
||||
calls ``pause_task(task_id, reason)`` to suspend itself; an external
|
||||
signal (webhook, dashboard click, another agent) calls ``resume_task``
|
||||
with the same task_id to wake it up.
|
||||
|
||||
Notification channels
|
||||
---------------------
|
||||
Configured under ``hitl:`` in ``config.yaml``:
|
||||
|
||||
hitl:
|
||||
channels:
|
||||
- type: dashboard # always active; uses platform approval API
|
||||
- type: slack
|
||||
webhook_url: https://hooks.slack.com/services/…
|
||||
- type: email
|
||||
smtp_host: smtp.example.com
|
||||
smtp_port: 587
|
||||
from: alerts@example.com
|
||||
to: ops@example.com
|
||||
username: alerts@example.com # optional; password from SMTP_PASSWORD env
|
||||
default_timeout: 300 # seconds before an unanswered request times out
|
||||
bypass_roles: [admin] # roles that skip the approval gate entirely
|
||||
|
||||
Environment variables
|
||||
---------------------
|
||||
SMTP_PASSWORD Password for SMTP authentication (preferred over config file)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import functools
|
||||
import logging
|
||||
import os
|
||||
import smtplib
|
||||
from dataclasses import dataclass, field
|
||||
from email.mime.text import MIMEText
|
||||
from typing import Any, Callable
|
||||
|
||||
import httpx
|
||||
from langchain_core.tools import tool
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Config
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@dataclass
|
||||
class HITLConfig:
|
||||
"""HITL settings loaded from the ``hitl:`` block in config.yaml."""
|
||||
channels: list[dict] = field(default_factory=lambda: [{"type": "dashboard"}])
|
||||
default_timeout: float = 300.0
|
||||
bypass_roles: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
def _load_hitl_config() -> HITLConfig:
|
||||
"""Load HITL config from workspace config; fall back to safe defaults."""
|
||||
try:
|
||||
from config import load_config
|
||||
cfg = load_config()
|
||||
raw = getattr(cfg, "hitl", None)
|
||||
if raw is None:
|
||||
return HITLConfig()
|
||||
return HITLConfig(
|
||||
channels=raw.channels if hasattr(raw, "channels") else [{"type": "dashboard"}],
|
||||
default_timeout=float(raw.default_timeout if hasattr(raw, "default_timeout") else 300),
|
||||
bypass_roles=list(raw.bypass_roles if hasattr(raw, "bypass_roles") else []),
|
||||
)
|
||||
except Exception:
|
||||
return HITLConfig()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pause / Resume registry
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class _TaskPauseRegistry:
|
||||
"""In-process registry mapping task_id → asyncio.Event + optional result.
|
||||
|
||||
Multiple coroutines awaiting the same task_id are all unblocked when
|
||||
``resume()`` is called. Results survive until the awaiting coroutine
|
||||
calls ``pop_result()``.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._events: dict[str, asyncio.Event] = {}
|
||||
self._results: dict[str, dict] = {}
|
||||
# #265: owner map — workspace_id that created each task.
|
||||
# Empty string means "no owner / legacy" (bypasses ownership check).
|
||||
self._owners: dict[str, str] = {}
|
||||
|
||||
def register(self, task_id: str, owner: str = "") -> asyncio.Event:
|
||||
"""Create and store an Event for *task_id*. Returns the event.
|
||||
|
||||
Args:
|
||||
task_id: Unique task identifier.
|
||||
owner: Workspace ID that owns this task. When set, ``resume``
|
||||
will reject callers from a different workspace.
|
||||
"""
|
||||
ev = asyncio.Event()
|
||||
self._events[task_id] = ev
|
||||
self._owners[task_id] = owner
|
||||
return ev
|
||||
|
||||
def resume(self, task_id: str, result: dict | None = None, owner: str = "") -> bool:
|
||||
"""Signal the Event for *task_id*. Returns False if not registered.
|
||||
|
||||
Args:
|
||||
task_id: The identifier used in ``register``.
|
||||
result: Optional result payload forwarded to the waiting coroutine.
|
||||
owner: Caller's workspace ID. When both the stored owner and
|
||||
*owner* are non-empty and they differ, the call is rejected
|
||||
(returns False) — prevents cross-workspace prompt injection
|
||||
(#265). Passing ``owner=""`` bypasses the check (used in
|
||||
direct registry calls from tests and platform code).
|
||||
"""
|
||||
# #265 ownership check
|
||||
stored_owner = self._owners.get(task_id, "")
|
||||
if owner and stored_owner and owner != stored_owner:
|
||||
logger.warning(
|
||||
"HITL: resume rejected for task %s — caller workspace %r != owner %r",
|
||||
task_id, owner, stored_owner,
|
||||
)
|
||||
return False
|
||||
ev = self._events.get(task_id)
|
||||
if ev is None:
|
||||
return False
|
||||
self._results[task_id] = result or {}
|
||||
ev.set()
|
||||
return True
|
||||
|
||||
def pop_result(self, task_id: str) -> dict:
|
||||
"""Return and remove the stored result for *task_id*."""
|
||||
return self._results.pop(task_id, {})
|
||||
|
||||
def cleanup(self, task_id: str) -> None:
|
||||
"""Remove *task_id* from all dicts."""
|
||||
self._events.pop(task_id, None)
|
||||
self._results.pop(task_id, None)
|
||||
self._owners.pop(task_id, None)
|
||||
|
||||
def list_paused(self) -> list[str]:
|
||||
"""Return IDs of tasks whose events have not yet been set."""
|
||||
return [tid for tid, ev in self._events.items() if not ev.is_set()]
|
||||
|
||||
|
||||
# Global singleton — safe within one asyncio event loop / process
|
||||
pause_registry = _TaskPauseRegistry()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Notification channels
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
async def _notify_channels(
|
||||
action: str,
|
||||
reason: str,
|
||||
approval_id: str,
|
||||
cfg: HITLConfig,
|
||||
) -> None:
|
||||
"""Fire-and-forget notifications to all configured channels.
|
||||
|
||||
Errors in individual channels are logged but never re-raised so that a
|
||||
misconfigured Slack webhook cannot block the approval flow.
|
||||
"""
|
||||
platform_url = os.environ.get("PLATFORM_URL", "http://host.docker.internal:8080")
|
||||
workspace_id = os.environ.get("WORKSPACE_ID", "")
|
||||
|
||||
for channel in cfg.channels:
|
||||
ch_type = channel.get("type", "dashboard")
|
||||
try:
|
||||
if ch_type == "slack":
|
||||
await _notify_slack(channel, action, reason, approval_id,
|
||||
platform_url, workspace_id)
|
||||
elif ch_type == "email":
|
||||
await _notify_email(channel, action, reason, approval_id,
|
||||
platform_url, workspace_id)
|
||||
# "dashboard" is handled by the platform via the approval POST
|
||||
except Exception as exc:
|
||||
logger.warning("HITL: channel '%s' notification failed: %s", ch_type, exc)
|
||||
|
||||
|
||||
async def _notify_slack(
|
||||
cfg: dict,
|
||||
action: str,
|
||||
reason: str,
|
||||
approval_id: str,
|
||||
platform_url: str,
|
||||
workspace_id: str,
|
||||
) -> None:
|
||||
webhook_url = cfg.get("webhook_url", "")
|
||||
if not webhook_url:
|
||||
return
|
||||
|
||||
approve_url = f"{platform_url}/workspaces/{workspace_id}/approvals/{approval_id}/approve"
|
||||
deny_url = f"{platform_url}/workspaces/{workspace_id}/approvals/{approval_id}/deny"
|
||||
|
||||
payload = {
|
||||
"text": f":warning: Approval required from workspace `{workspace_id}`",
|
||||
"blocks": [
|
||||
{
|
||||
"type": "section",
|
||||
"text": {
|
||||
"type": "mrkdwn",
|
||||
"text": (
|
||||
f"*Action:* {action}\n"
|
||||
f"*Reason:* {reason}\n"
|
||||
f"*Approval ID:* `{approval_id}`"
|
||||
),
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "actions",
|
||||
"elements": [
|
||||
{
|
||||
"type": "button",
|
||||
"text": {"type": "plain_text", "text": "Approve"},
|
||||
"style": "primary",
|
||||
"url": approve_url,
|
||||
},
|
||||
{
|
||||
"type": "button",
|
||||
"text": {"type": "plain_text", "text": "Deny"},
|
||||
"style": "danger",
|
||||
"url": deny_url,
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
}
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
await client.post(webhook_url, json=payload)
|
||||
logger.info("HITL: Slack notification sent for approval %s", approval_id)
|
||||
|
||||
|
||||
async def _notify_email(
|
||||
cfg: dict,
|
||||
action: str,
|
||||
reason: str,
|
||||
approval_id: str,
|
||||
platform_url: str,
|
||||
workspace_id: str,
|
||||
) -> None:
|
||||
smtp_host = cfg.get("smtp_host", "")
|
||||
smtp_port = int(cfg.get("smtp_port", 587))
|
||||
from_addr = cfg.get("from", "")
|
||||
to_addr = cfg.get("to", "")
|
||||
|
||||
if not all([smtp_host, from_addr, to_addr]):
|
||||
logger.warning("HITL: email channel missing smtp_host/from/to — skipping")
|
||||
return
|
||||
|
||||
approve_url = f"{platform_url}/workspaces/{workspace_id}/approvals/{approval_id}/approve"
|
||||
deny_url = f"{platform_url}/workspaces/{workspace_id}/approvals/{approval_id}/deny"
|
||||
|
||||
body = (
|
||||
f"Approval required from workspace {workspace_id}\n\n"
|
||||
f"Action : {action}\n"
|
||||
f"Reason : {reason}\n"
|
||||
f"ID : {approval_id}\n\n"
|
||||
f"Approve: {approve_url}\n"
|
||||
f"Deny : {deny_url}\n"
|
||||
)
|
||||
|
||||
msg = MIMEText(body, "plain", "utf-8")
|
||||
msg["Subject"] = f"[Molecule AI] Approval required: {action}"
|
||||
msg["From"] = from_addr
|
||||
msg["To"] = to_addr
|
||||
|
||||
username = cfg.get("username", "")
|
||||
password = cfg.get("password", os.environ.get("SMTP_PASSWORD", ""))
|
||||
|
||||
def _send() -> None:
|
||||
with smtplib.SMTP(smtp_host, smtp_port) as srv:
|
||||
srv.ehlo()
|
||||
srv.starttls()
|
||||
if username and password:
|
||||
srv.login(username, password)
|
||||
srv.send_message(msg)
|
||||
|
||||
await asyncio.to_thread(_send)
|
||||
logger.info("HITL: email notification sent for approval %s", approval_id)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# @requires_approval decorator
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def requires_approval(
|
||||
action_description: str = "",
|
||||
reason_template: str = "",
|
||||
bypass_roles: list[str] | None = None,
|
||||
) -> Callable[[Callable], Callable]:
|
||||
"""Decorator that gates an async callable behind a human approval request.
|
||||
|
||||
The wrapped function executes only when a human approves. Use this on
|
||||
any tool or async helper that performs destructive or high-impact work.
|
||||
|
||||
Args:
|
||||
action_description: Short label for the action shown to the approver.
|
||||
Defaults to the function's ``name`` attribute or
|
||||
``__name__``.
|
||||
reason_template: f-string template for the reason line. Keyword
|
||||
arguments of the decorated function are available,
|
||||
e.g. ``"Delete table {table_name}"``).
|
||||
bypass_roles: Roles that skip the gate entirely. Overrides
|
||||
``hitl.bypass_roles`` in config.yaml when given.
|
||||
|
||||
Returns:
|
||||
A decorator; applying it to a function returns an async wrapper.
|
||||
|
||||
Usage::
|
||||
|
||||
@tool
|
||||
@requires_approval("Wipe production DB", bypass_roles=["admin"])
|
||||
async def drop_table(table_name: str) -> dict:
|
||||
...
|
||||
|
||||
# Works with plain async functions too:
|
||||
@requires_approval("Send customer email")
|
||||
async def send_email(to: str, body: str) -> dict:
|
||||
...
|
||||
"""
|
||||
def decorator(fn: Callable) -> Callable:
|
||||
action = action_description or getattr(fn, "name", None) or fn.__name__
|
||||
|
||||
@functools.wraps(fn)
|
||||
async def wrapper(*args: Any, **kwargs: Any) -> Any:
|
||||
hitl_cfg = _load_hitl_config()
|
||||
|
||||
# --- Check bypass roles -----------------------------------------
|
||||
active_bypass = bypass_roles if bypass_roles is not None else hitl_cfg.bypass_roles
|
||||
if active_bypass:
|
||||
try:
|
||||
from builtin_tools.audit import get_workspace_roles
|
||||
roles, _ = get_workspace_roles()
|
||||
if any(r in active_bypass for r in roles):
|
||||
logger.info(
|
||||
"@requires_approval bypassed (role %s) for '%s'", roles, action
|
||||
)
|
||||
return await fn(*args, **kwargs)
|
||||
except Exception:
|
||||
pass # If RBAC check fails, proceed to approval gate
|
||||
|
||||
# --- Build reason string -----------------------------------------
|
||||
if reason_template:
|
||||
try:
|
||||
reason = reason_template.format(**kwargs)
|
||||
except (KeyError, IndexError):
|
||||
reason = reason_template
|
||||
else:
|
||||
arg_parts = [f"{k}={str(v)[:60]}" for k, v in list(kwargs.items())[:3]]
|
||||
reason = f"Args: {', '.join(arg_parts)}" if arg_parts else "Automated action"
|
||||
|
||||
# --- Fire non-dashboard notifications (async, non-blocking) ------
|
||||
asyncio.create_task(
|
||||
_notify_channels(action, reason, "pending", hitl_cfg)
|
||||
)
|
||||
|
||||
# --- Request approval via approval tool --------------------------
|
||||
try:
|
||||
from builtin_tools.approval import request_approval
|
||||
approval_result = await request_approval.ainvoke(
|
||||
{"action": action, "reason": reason}
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("@requires_approval: approval call failed: %s", exc)
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"Approval gate error: {exc}",
|
||||
}
|
||||
|
||||
if not approval_result.get("approved"):
|
||||
# Art. 14 audit: log the denial outcome so the activity log
|
||||
# contains evidence that the human oversight gate was exercised.
|
||||
try:
|
||||
from builtin_tools.audit import log_event
|
||||
log_event(
|
||||
event_type="hitl",
|
||||
action="approve",
|
||||
resource=action,
|
||||
outcome="denied",
|
||||
actor=approval_result.get("decided_by"),
|
||||
approval_id=approval_result.get("approval_id"),
|
||||
reason=reason,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
return {
|
||||
"success": False,
|
||||
"error": (
|
||||
f"Action '{action}' not approved: "
|
||||
f"{approval_result.get('message', approval_result.get('error', 'denied'))}"
|
||||
),
|
||||
"approval_id": approval_result.get("approval_id"),
|
||||
}
|
||||
|
||||
# Art. 14 audit: log the approval grant before running the function.
|
||||
try:
|
||||
from builtin_tools.audit import log_event
|
||||
log_event(
|
||||
event_type="hitl",
|
||||
action="approve",
|
||||
resource=action,
|
||||
outcome="granted",
|
||||
actor=approval_result.get("decided_by"),
|
||||
approval_id=approval_result.get("approval_id"),
|
||||
reason=reason,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# --- Approved — run the original function ------------------------
|
||||
return await fn(*args, **kwargs)
|
||||
|
||||
return wrapper
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pause / Resume LangChain tools
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@tool
|
||||
async def pause_task(task_id: str, reason: str = "") -> dict:
|
||||
"""Suspend the current task and wait for a resume signal.
|
||||
|
||||
The agent calls this to pause itself at a decision point. Execution
|
||||
resumes when ``resume_task`` is called with the same task_id, or after
|
||||
the configured ``hitl.default_timeout`` seconds.
|
||||
|
||||
Args:
|
||||
task_id: Unique identifier for this pause point (use the A2A task ID
|
||||
or any stable string that the caller can reference later).
|
||||
reason: Human-readable description of why the task is pausing.
|
||||
"""
|
||||
# #265: record workspace ownership on registration so resume_task can
|
||||
# reject callers from a different workspace (cross-workspace prompt-injection
|
||||
# prevention). External task_id is unchanged — only internal ownership
|
||||
# metadata is added, so no tests or callers need to update their task IDs.
|
||||
_ws = os.environ.get("WORKSPACE_ID", "")
|
||||
|
||||
try:
|
||||
from builtin_tools.audit import log_event
|
||||
log_event(
|
||||
event_type="hitl",
|
||||
action="pause",
|
||||
resource=task_id,
|
||||
outcome="paused",
|
||||
trace_id=task_id,
|
||||
reason=reason,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
event = pause_registry.register(task_id, owner=_ws)
|
||||
timeout = _load_hitl_config().default_timeout
|
||||
logger.info("HITL: task %s paused — %s", task_id, reason or "(no reason given)")
|
||||
|
||||
try:
|
||||
await asyncio.wait_for(event.wait(), timeout=timeout)
|
||||
result = pause_registry.pop_result(task_id)
|
||||
logger.info("HITL: task %s resumed", task_id)
|
||||
try:
|
||||
from builtin_tools.audit import log_event
|
||||
log_event(
|
||||
event_type="hitl",
|
||||
action="resume",
|
||||
resource=task_id,
|
||||
outcome="resumed",
|
||||
trace_id=task_id,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
return {"resumed": True, "task_id": task_id, **result}
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning("HITL: task %s timed out after %.0fs", task_id, timeout)
|
||||
try:
|
||||
from builtin_tools.audit import log_event
|
||||
log_event(
|
||||
event_type="hitl",
|
||||
action="pause",
|
||||
resource=task_id,
|
||||
outcome="timeout",
|
||||
trace_id=task_id,
|
||||
timeout_seconds=timeout,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
return {
|
||||
"resumed": False,
|
||||
"task_id": task_id,
|
||||
"error": f"Timed out after {timeout:.0f}s waiting for resume signal",
|
||||
}
|
||||
finally:
|
||||
pause_registry.cleanup(task_id)
|
||||
|
||||
|
||||
@tool
|
||||
async def resume_task(task_id: str, message: str = "") -> dict:
|
||||
"""Resume a previously paused task.
|
||||
|
||||
Signals the ``pause_task`` coroutine waiting on *task_id* to continue.
|
||||
Safe to call even if the task has already resumed or timed out (returns
|
||||
success=False in that case).
|
||||
|
||||
Args:
|
||||
task_id: The identifier passed to ``pause_task``.
|
||||
message: Optional message forwarded to the resumed task.
|
||||
"""
|
||||
# #265: pass caller's workspace ID so the registry can reject a resume
|
||||
# from a different workspace (ownership check in _TaskPauseRegistry.resume).
|
||||
_ws = os.environ.get("WORKSPACE_ID", "")
|
||||
|
||||
result_payload = {"message": message} if message else {}
|
||||
success = pause_registry.resume(task_id, result_payload, owner=_ws)
|
||||
|
||||
if success:
|
||||
logger.info("HITL: resume signal sent for task %s", task_id)
|
||||
try:
|
||||
from builtin_tools.audit import log_event
|
||||
log_event(
|
||||
event_type="hitl",
|
||||
action="resume",
|
||||
resource=task_id,
|
||||
outcome="success",
|
||||
trace_id=task_id,
|
||||
message=message,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
return {"success": True, "task_id": task_id}
|
||||
|
||||
return {
|
||||
"success": False,
|
||||
"task_id": task_id,
|
||||
"error": "Task not found or already resumed",
|
||||
}
|
||||
|
||||
|
||||
@tool
|
||||
async def list_paused_tasks() -> dict:
|
||||
"""List all tasks currently suspended and waiting for a resume signal."""
|
||||
paused = pause_registry.list_paused()
|
||||
return {"paused_tasks": paused, "count": len(paused)}
|
||||
@@ -1,470 +0,0 @@
|
||||
"""HMA memory tools for agents.
|
||||
|
||||
Hierarchical Memory Architecture:
|
||||
- LOCAL: private to this workspace, invisible to others
|
||||
- TEAM: shared with parent + siblings (same team)
|
||||
- GLOBAL: readable by all, writable by root workspaces only
|
||||
|
||||
RBAC enforcement
|
||||
----------------
|
||||
``commit_memory`` requires the ``"memory.write"`` action.
|
||||
``recall_memory`` requires the ``"memory.read"`` action.
|
||||
Roles are read from ``config.yaml`` under ``rbac.roles`` (default: operator).
|
||||
|
||||
Audit trail
|
||||
-----------
|
||||
Every memory operation appends a JSON Lines record to the audit log:
|
||||
|
||||
memory / memory.write / allowed — write permitted by RBAC
|
||||
memory / memory.write / success — write committed successfully
|
||||
memory / memory.write / failure — write failed (platform error)
|
||||
memory / memory.read / allowed — read permitted by RBAC
|
||||
memory / memory.read / success — search returned results
|
||||
memory / memory.read / failure — search failed (platform error)
|
||||
|
||||
RBAC denials emit ``rbac / rbac.deny / denied`` events instead.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import uuid
|
||||
from types import SimpleNamespace
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.tools import tool
|
||||
from builtin_tools.awareness_client import build_awareness_client
|
||||
from builtin_tools.audit import check_permission, get_workspace_roles, log_event
|
||||
from builtin_tools.security import _redact_secrets
|
||||
from builtin_tools.telemetry import MEMORY_QUERY, MEMORY_SCOPE, WORKSPACE_ID_ATTR, get_tracer
|
||||
|
||||
try: # pragma: no cover - optional runtime dependency in lightweight test envs
|
||||
import httpx # type: ignore
|
||||
except ImportError: # pragma: no cover
|
||||
httpx = SimpleNamespace(AsyncClient=None)
|
||||
|
||||
PLATFORM_URL = os.environ.get("PLATFORM_URL", "http://host.docker.internal:8080")
|
||||
WORKSPACE_ID = os.environ.get("WORKSPACE_ID", "")
|
||||
|
||||
|
||||
@tool
|
||||
async def commit_memory(content: str, scope: str = "LOCAL") -> dict:
|
||||
"""Store a fact in memory with a specific scope.
|
||||
|
||||
Args:
|
||||
content: The fact or knowledge to remember.
|
||||
scope: Memory scope — LOCAL (private), TEAM (shared with team), or GLOBAL (company-wide, root only).
|
||||
"""
|
||||
content = _redact_secrets(content)
|
||||
trace_id = str(uuid.uuid4())
|
||||
scope = scope.upper()
|
||||
if scope not in ("LOCAL", "TEAM", "GLOBAL"):
|
||||
return {"error": "scope must be LOCAL, TEAM, or GLOBAL"}
|
||||
|
||||
# --- RBAC check -----------------------------------------------------------
|
||||
roles, custom_perms = get_workspace_roles()
|
||||
if not check_permission("memory.write", roles, custom_perms):
|
||||
log_event(
|
||||
event_type="rbac",
|
||||
action="rbac.deny",
|
||||
resource=scope,
|
||||
outcome="denied",
|
||||
trace_id=trace_id,
|
||||
attempted_action="memory.write",
|
||||
roles=roles,
|
||||
)
|
||||
return {
|
||||
"success": False,
|
||||
"error": (
|
||||
"RBAC: this workspace does not have the 'memory.write' permission. "
|
||||
f"Current roles: {roles}"
|
||||
),
|
||||
}
|
||||
|
||||
log_event(
|
||||
event_type="memory",
|
||||
action="memory.write",
|
||||
resource=scope,
|
||||
outcome="allowed",
|
||||
trace_id=trace_id,
|
||||
memory_scope=scope,
|
||||
content_length=len(content),
|
||||
)
|
||||
|
||||
# ── OTEL: memory_write span ──────────────────────────────────────────────
|
||||
tracer = get_tracer()
|
||||
|
||||
with tracer.start_as_current_span("memory_write") as mem_span:
|
||||
mem_span.set_attribute(WORKSPACE_ID_ATTR, WORKSPACE_ID)
|
||||
mem_span.set_attribute(MEMORY_SCOPE, scope)
|
||||
mem_span.set_attribute("memory.content_length", len(content))
|
||||
|
||||
awareness_client = build_awareness_client()
|
||||
if awareness_client is not None:
|
||||
try:
|
||||
result = await awareness_client.commit(content, scope)
|
||||
except Exception as e:
|
||||
log_event(
|
||||
event_type="memory",
|
||||
action="memory.write",
|
||||
resource=scope,
|
||||
outcome="failure",
|
||||
trace_id=trace_id,
|
||||
memory_scope=scope,
|
||||
error=str(e),
|
||||
)
|
||||
try:
|
||||
mem_span.record_exception(e)
|
||||
except Exception:
|
||||
pass
|
||||
return {"success": False, "error": str(e)}
|
||||
else:
|
||||
# #215-class bug: platform now gates /workspaces/:id/memories behind
|
||||
# workspace auth. Import auth_headers lazily (same pattern as the
|
||||
# activity-log path below) so test environments that don't ship
|
||||
# platform_auth still work.
|
||||
try:
|
||||
from platform_auth import auth_headers as _auth
|
||||
_headers = _auth()
|
||||
except Exception:
|
||||
_headers = {}
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
try:
|
||||
resp = await client.post(
|
||||
f"{PLATFORM_URL}/workspaces/{WORKSPACE_ID}/memories",
|
||||
json={"content": content, "scope": scope},
|
||||
headers=_headers,
|
||||
)
|
||||
if resp.status_code == 201:
|
||||
result = {"success": True, "id": resp.json().get("id"), "scope": scope}
|
||||
else:
|
||||
result = {"success": False, "error": resp.json().get("error", resp.text)}
|
||||
except Exception as e:
|
||||
log_event(
|
||||
event_type="memory",
|
||||
action="memory.write",
|
||||
resource=scope,
|
||||
outcome="failure",
|
||||
trace_id=trace_id,
|
||||
memory_scope=scope,
|
||||
error=str(e),
|
||||
)
|
||||
try:
|
||||
mem_span.record_exception(e)
|
||||
except Exception:
|
||||
pass
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
if result.get("success"):
|
||||
mem_span.set_attribute("memory.id", result.get("id") or "")
|
||||
mem_span.set_attribute("memory.success", True)
|
||||
log_event(
|
||||
event_type="memory",
|
||||
action="memory.write",
|
||||
resource=scope,
|
||||
outcome="success",
|
||||
trace_id=trace_id,
|
||||
memory_scope=scope,
|
||||
memory_id=result.get("id"),
|
||||
)
|
||||
# #125: surface memory writes in /activity so the Canvas
|
||||
# "Agent Comms" tab shows what an agent chose to remember.
|
||||
# Fire-and-forget — failure here must not poison the tool
|
||||
# response since the memory write itself already succeeded.
|
||||
await _record_memory_activity(scope, content, result.get("id"))
|
||||
await _maybe_log_skill_promotion(content, scope, result)
|
||||
else:
|
||||
mem_span.set_attribute("memory.success", False)
|
||||
log_event(
|
||||
event_type="memory",
|
||||
action="memory.write",
|
||||
resource=scope,
|
||||
outcome="failure",
|
||||
trace_id=trace_id,
|
||||
memory_scope=scope,
|
||||
error=result.get("error"),
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@tool
|
||||
async def recall_memory(query: str = "", scope: str = "") -> dict:
|
||||
"""Search stored memories.
|
||||
|
||||
Args:
|
||||
query: Text to search for (empty returns all).
|
||||
scope: Filter by scope — LOCAL, TEAM, GLOBAL, or empty for all accessible.
|
||||
"""
|
||||
trace_id = str(uuid.uuid4())
|
||||
scope = scope.upper()
|
||||
if scope and scope not in ("LOCAL", "TEAM", "GLOBAL"):
|
||||
return {"error": "scope must be LOCAL, TEAM, GLOBAL, or empty"}
|
||||
|
||||
# --- RBAC check -----------------------------------------------------------
|
||||
roles, custom_perms = get_workspace_roles()
|
||||
if not check_permission("memory.read", roles, custom_perms):
|
||||
log_event(
|
||||
event_type="rbac",
|
||||
action="rbac.deny",
|
||||
resource=scope or "all",
|
||||
outcome="denied",
|
||||
trace_id=trace_id,
|
||||
attempted_action="memory.read",
|
||||
roles=roles,
|
||||
)
|
||||
return {
|
||||
"success": False,
|
||||
"error": (
|
||||
"RBAC: this workspace does not have the 'memory.read' permission. "
|
||||
f"Current roles: {roles}"
|
||||
),
|
||||
}
|
||||
|
||||
log_event(
|
||||
event_type="memory",
|
||||
action="memory.read",
|
||||
resource=scope or "all",
|
||||
outcome="allowed",
|
||||
trace_id=trace_id,
|
||||
memory_scope=scope or "all",
|
||||
query_length=len(query),
|
||||
)
|
||||
|
||||
# ── OTEL: memory_read span ───────────────────────────────────────────────
|
||||
tracer = get_tracer()
|
||||
|
||||
with tracer.start_as_current_span("memory_read") as mem_span:
|
||||
mem_span.set_attribute(WORKSPACE_ID_ATTR, WORKSPACE_ID)
|
||||
mem_span.set_attribute(MEMORY_SCOPE, scope or "all")
|
||||
mem_span.set_attribute(MEMORY_QUERY, query[:256] if query else "")
|
||||
|
||||
awareness_client = build_awareness_client()
|
||||
if awareness_client is not None:
|
||||
try:
|
||||
result = await awareness_client.search(query, scope)
|
||||
mem_span.set_attribute("memory.result_count", result.get("count", 0))
|
||||
mem_span.set_attribute("memory.success", result.get("success", False))
|
||||
log_event(
|
||||
event_type="memory",
|
||||
action="memory.read",
|
||||
resource=scope or "all",
|
||||
outcome="success" if result.get("success") else "failure",
|
||||
trace_id=trace_id,
|
||||
memory_scope=scope or "all",
|
||||
result_count=result.get("count", 0),
|
||||
)
|
||||
return result
|
||||
except Exception as e:
|
||||
log_event(
|
||||
event_type="memory",
|
||||
action="memory.read",
|
||||
resource=scope or "all",
|
||||
outcome="failure",
|
||||
trace_id=trace_id,
|
||||
memory_scope=scope or "all",
|
||||
error=str(e),
|
||||
)
|
||||
try:
|
||||
mem_span.record_exception(e)
|
||||
except Exception:
|
||||
pass
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
params = {}
|
||||
if query:
|
||||
params["q"] = query
|
||||
if scope:
|
||||
params["scope"] = scope.upper()
|
||||
|
||||
# #215-class bug (search path): same fix as commit_memory above —
|
||||
# the platform gates GET /workspaces/:id/memories behind workspace
|
||||
# auth, so without auth_headers() every search silently 401s and the
|
||||
# agent thinks its backlog is empty (observed on Technical Researcher
|
||||
# idle-loop pilot 2026-04-15).
|
||||
try:
|
||||
from platform_auth import auth_headers as _auth
|
||||
_headers = _auth()
|
||||
except Exception:
|
||||
_headers = {}
|
||||
|
||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||
try:
|
||||
resp = await client.get(
|
||||
f"{PLATFORM_URL}/workspaces/{WORKSPACE_ID}/memories",
|
||||
params=params,
|
||||
headers=_headers,
|
||||
)
|
||||
if resp.status_code == 200:
|
||||
memories = resp.json()
|
||||
mem_span.set_attribute("memory.result_count", len(memories))
|
||||
mem_span.set_attribute("memory.success", True)
|
||||
log_event(
|
||||
event_type="memory",
|
||||
action="memory.read",
|
||||
resource=scope or "all",
|
||||
outcome="success",
|
||||
trace_id=trace_id,
|
||||
memory_scope=scope or "all",
|
||||
result_count=len(memories),
|
||||
)
|
||||
return {
|
||||
"success": True,
|
||||
"count": len(memories),
|
||||
"memories": memories,
|
||||
}
|
||||
mem_span.set_attribute("memory.success", False)
|
||||
log_event(
|
||||
event_type="memory",
|
||||
action="memory.read",
|
||||
resource=scope or "all",
|
||||
outcome="failure",
|
||||
trace_id=trace_id,
|
||||
memory_scope=scope or "all",
|
||||
http_status=resp.status_code,
|
||||
)
|
||||
return {"success": False, "error": resp.json().get("error", resp.text)}
|
||||
except Exception as e:
|
||||
log_event(
|
||||
event_type="memory",
|
||||
action="memory.read",
|
||||
resource=scope or "all",
|
||||
outcome="failure",
|
||||
trace_id=trace_id,
|
||||
memory_scope=scope or "all",
|
||||
error=str(e),
|
||||
)
|
||||
try:
|
||||
mem_span.record_exception(e)
|
||||
except Exception:
|
||||
pass
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
|
||||
def _parse_promotion_packet(content: str) -> dict[str, Any] | None:
|
||||
"""Return a structured memory packet when content looks like promotion metadata."""
|
||||
text = content.strip()
|
||||
if not text.startswith("{"):
|
||||
return None
|
||||
|
||||
try:
|
||||
payload = json.loads(text)
|
||||
except json.JSONDecodeError:
|
||||
return None
|
||||
|
||||
if not isinstance(payload, dict): # pragma: no cover
|
||||
return None
|
||||
if not payload.get("promote_to_skill"):
|
||||
return None
|
||||
|
||||
return payload
|
||||
|
||||
|
||||
async def _record_memory_activity(scope: str, content: str, memory_id: str | None) -> None:
|
||||
"""Surface a successful memory write as an activity row so the Canvas
|
||||
"Agent Comms" tab can display what an agent chose to remember.
|
||||
Fire-and-forget — never raises. #125.
|
||||
|
||||
The summary is intentionally short (scope tag + first 80 chars of
|
||||
content with a ``…`` ellipsis when truncated) so the activity table
|
||||
stays readable; full content lives in ``agent_memories``.
|
||||
"""
|
||||
workspace_id = WORKSPACE_ID.strip()
|
||||
platform_url = PLATFORM_URL.strip().rstrip("/")
|
||||
if not workspace_id or not platform_url:
|
||||
return
|
||||
|
||||
preview = content.strip().replace("\n", " ")
|
||||
if len(preview) > 80:
|
||||
preview = preview[:80] + "…"
|
||||
summary = f"[{scope}] {preview}"
|
||||
|
||||
# NOTE: target_id is a UUID column scoped to workspace_id references —
|
||||
# cannot hold awareness/memory IDs (which are arbitrary strings).
|
||||
# We embed the memory_id in the summary instead so it's still searchable.
|
||||
if memory_id:
|
||||
summary = f"{summary} (id={memory_id[:24]})"
|
||||
payload: dict[str, Any] = {
|
||||
"workspace_id": workspace_id,
|
||||
"activity_type": "memory_write",
|
||||
"summary": summary,
|
||||
"status": "ok",
|
||||
}
|
||||
|
||||
try:
|
||||
try:
|
||||
from platform_auth import auth_headers as _auth
|
||||
_headers = _auth()
|
||||
except Exception:
|
||||
_headers = {}
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
await client.post(
|
||||
f"{platform_url}/workspaces/{workspace_id}/activity",
|
||||
json=payload,
|
||||
headers=_headers,
|
||||
)
|
||||
except Exception:
|
||||
# Activity logging is purely observability — never poison the
|
||||
# tool response on a failure here. We don't even log_event the
|
||||
# failure since the memory write itself succeeded and that's
|
||||
# what matters to the caller.
|
||||
pass
|
||||
|
||||
|
||||
async def _maybe_log_skill_promotion(content: str, scope: str, memory_result: dict) -> None:
|
||||
"""Best-effort activity log for durable memory entries that should become skills."""
|
||||
packet = _parse_promotion_packet(content)
|
||||
if packet is None:
|
||||
return
|
||||
|
||||
workspace_id = WORKSPACE_ID.strip()
|
||||
platform_url = PLATFORM_URL.strip().rstrip("/")
|
||||
if not workspace_id or not platform_url:
|
||||
return
|
||||
|
||||
repetition_signal = packet.get("repetition_signal")
|
||||
summary = (
|
||||
packet.get("summary")
|
||||
or packet.get("title")
|
||||
or packet.get("what changed")
|
||||
or "Repeatable workflow promoted to skill candidate"
|
||||
)
|
||||
metadata: dict[str, Any] = {
|
||||
"source": "memory-curation",
|
||||
"scope": scope,
|
||||
"memory_id": memory_result.get("id"),
|
||||
"promote_to_skill": True,
|
||||
"repetition_signal": repetition_signal,
|
||||
"memory_packet": packet,
|
||||
}
|
||||
|
||||
payload = {
|
||||
"activity_type": "skill_promotion",
|
||||
"method": "memory/skill-promotion",
|
||||
"summary": summary,
|
||||
"status": "ok",
|
||||
"source_id": workspace_id,
|
||||
"request_body": packet,
|
||||
"metadata": metadata,
|
||||
}
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
await client.post(
|
||||
f"{platform_url}/workspaces/{workspace_id}/activity",
|
||||
json=payload,
|
||||
)
|
||||
await client.post(
|
||||
f"{platform_url}/registry/heartbeat",
|
||||
json={
|
||||
"workspace_id": workspace_id,
|
||||
"error_rate": 0,
|
||||
"sample_error": "",
|
||||
"active_tasks": 1,
|
||||
"uptime_seconds": 0,
|
||||
"current_task": f"Skill promotion: {summary}",
|
||||
},
|
||||
)
|
||||
except Exception:
|
||||
# Best-effort observability only. Memory commits must never fail because
|
||||
# the promotion log could not be written.
|
||||
return
|
||||
@@ -1,281 +0,0 @@
|
||||
"""Code sandbox tool for safe code execution.
|
||||
|
||||
Executes code in an isolated environment. Three backends are supported:
|
||||
|
||||
subprocess (default)
|
||||
Runs code locally via asyncio subprocess with a hard timeout.
|
||||
Best for Tier 1/2 agents where run_code is lightly used and the
|
||||
workspace container itself is the isolation boundary.
|
||||
|
||||
docker
|
||||
Throwaway Docker-in-Docker container: network disabled, memory capped,
|
||||
read-only filesystem. Requires Docker socket access inside the container.
|
||||
Best for Tier 3 on-prem deployments.
|
||||
|
||||
e2b
|
||||
Cloud-hosted microVM sandbox via E2B (https://e2b.dev).
|
||||
No local Docker required — code runs in E2B's isolated cloud VMs.
|
||||
Supports Python and JavaScript.
|
||||
Requires:
|
||||
- e2b-code-interpreter Python package (pinned in requirements.txt)
|
||||
- E2B_API_KEY workspace secret (set via canvas Secrets panel or API)
|
||||
Best for hosted/cloud Molecule AI deployments.
|
||||
|
||||
Backend is selected via the SANDBOX_BACKEND env var, which the provisioner
|
||||
sets from config.yaml → sandbox.backend. Default: "subprocess".
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
from langchain_core.tools import tool
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SANDBOX_BACKEND = os.environ.get("SANDBOX_BACKEND", "subprocess")
|
||||
SANDBOX_TIMEOUT = int(os.environ.get("SANDBOX_TIMEOUT", "30"))
|
||||
SANDBOX_MEMORY_LIMIT = os.environ.get("SANDBOX_MEMORY_LIMIT", "256m")
|
||||
MAX_OUTPUT = 10_000
|
||||
|
||||
# E2B kernel names differ from internal language names.
|
||||
_E2B_KERNEL_MAP = {
|
||||
"python": "python3",
|
||||
"javascript": "js",
|
||||
"js": "js",
|
||||
}
|
||||
|
||||
|
||||
@tool
|
||||
async def run_code(code: str, language: str = "python") -> dict:
|
||||
"""Execute code in an isolated sandbox and return the output.
|
||||
|
||||
Args:
|
||||
code: The code to execute.
|
||||
language: Programming language — python, javascript, or shell.
|
||||
The e2b backend supports python and javascript only.
|
||||
"""
|
||||
if SANDBOX_BACKEND == "docker":
|
||||
return await _run_docker(code, language)
|
||||
elif SANDBOX_BACKEND == "e2b":
|
||||
return await _run_e2b(code, language)
|
||||
else:
|
||||
return await _run_subprocess(code, language)
|
||||
|
||||
|
||||
async def _run_subprocess(code: str, language: str) -> dict:
|
||||
"""Fallback: run code in a subprocess with timeout."""
|
||||
cmd_map = {
|
||||
"python": ["python3", "-c"],
|
||||
"javascript": ["node", "-e"],
|
||||
"shell": ["sh", "-c"],
|
||||
"bash": ["bash", "-c"],
|
||||
}
|
||||
|
||||
cmd_prefix = cmd_map.get(language)
|
||||
if not cmd_prefix:
|
||||
return {"error": f"Unsupported language: {language}", "exit_code": -1}
|
||||
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*cmd_prefix, code,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
|
||||
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=SANDBOX_TIMEOUT)
|
||||
|
||||
return {
|
||||
"exit_code": proc.returncode,
|
||||
"stdout": stdout.decode("utf-8", errors="replace")[:MAX_OUTPUT],
|
||||
"stderr": stderr.decode("utf-8", errors="replace")[:MAX_OUTPUT],
|
||||
"language": language,
|
||||
"backend": "subprocess",
|
||||
}
|
||||
except asyncio.TimeoutError:
|
||||
try:
|
||||
proc.kill()
|
||||
await proc.wait()
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
return {"error": f"Timeout after {SANDBOX_TIMEOUT}s", "exit_code": -1}
|
||||
except Exception as e:
|
||||
return {"error": str(e), "exit_code": -1}
|
||||
|
||||
|
||||
async def _run_docker(code: str, language: str) -> dict:
|
||||
"""Run code in a throwaway Docker container via mounted temp file."""
|
||||
image_map = {
|
||||
"python": ("python:3.11-slim", ["python3", "/sandbox/code.py"]),
|
||||
"javascript": ("node:20-slim", ["node", "/sandbox/code.js"]),
|
||||
"shell": ("alpine:3.18", ["sh", "/sandbox/code.sh"]),
|
||||
"bash": ("alpine:3.18", ["sh", "/sandbox/code.sh"]),
|
||||
}
|
||||
|
||||
entry = image_map.get(language)
|
||||
if not entry:
|
||||
return {"error": f"Unsupported language: {language}", "exit_code": -1}
|
||||
|
||||
image, run_cmd = entry
|
||||
code_file = None
|
||||
|
||||
try:
|
||||
# Write code to temp file — avoids shell metacharacter injection
|
||||
ext = {"python": ".py", "javascript": ".js", "shell": ".sh", "bash": ".sh"}.get(language, ".txt")
|
||||
fd, code_file = tempfile.mkstemp(suffix=ext, prefix="sandbox_")
|
||||
with os.fdopen(fd, "w") as f:
|
||||
f.write(code)
|
||||
|
||||
cmd = [
|
||||
"docker", "run", "--rm",
|
||||
"--network", "none",
|
||||
"--memory", SANDBOX_MEMORY_LIMIT,
|
||||
"--cpus", "0.5",
|
||||
"--read-only",
|
||||
"--tmpfs", "/tmp:size=32m",
|
||||
"-v", f"{code_file}:/sandbox/code{ext}:ro",
|
||||
image,
|
||||
] + run_cmd
|
||||
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*cmd,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
|
||||
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=SANDBOX_TIMEOUT)
|
||||
|
||||
return {
|
||||
"exit_code": proc.returncode,
|
||||
"stdout": stdout.decode("utf-8", errors="replace")[:MAX_OUTPUT],
|
||||
"stderr": stderr.decode("utf-8", errors="replace")[:MAX_OUTPUT],
|
||||
"language": language,
|
||||
"backend": "docker",
|
||||
"image": image,
|
||||
}
|
||||
except asyncio.TimeoutError:
|
||||
return {"error": f"Timeout after {SANDBOX_TIMEOUT}s", "exit_code": -1}
|
||||
except Exception as e:
|
||||
return {"error": str(e), "exit_code": -1}
|
||||
finally:
|
||||
if code_file:
|
||||
try:
|
||||
os.unlink(code_file)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
async def _run_e2b(code: str, language: str) -> dict:
|
||||
"""Run code in an E2B cloud microVM sandbox.
|
||||
|
||||
Requires the e2b-code-interpreter package and an E2B_API_KEY secret.
|
||||
Each call creates a fresh sandbox, runs the code, and destroys the sandbox.
|
||||
Sandbox lifetime is bounded by SANDBOX_TIMEOUT seconds.
|
||||
|
||||
Supported languages: python, javascript.
|
||||
"""
|
||||
# Import lazily so the package is only required when the e2b backend is
|
||||
# actually configured — other backends work without it installed.
|
||||
try:
|
||||
from e2b_code_interpreter import Sandbox
|
||||
except ImportError:
|
||||
return {
|
||||
"error": (
|
||||
"e2b-code-interpreter is not installed. "
|
||||
"Add it to requirements.txt or switch to the docker/subprocess backend."
|
||||
),
|
||||
"exit_code": -1,
|
||||
}
|
||||
|
||||
api_key = os.environ.get("E2B_API_KEY")
|
||||
if not api_key:
|
||||
return {
|
||||
"error": (
|
||||
"E2B_API_KEY is not set. "
|
||||
"Add it as a workspace secret via the canvas Secrets panel or platform API."
|
||||
),
|
||||
"exit_code": -1,
|
||||
}
|
||||
|
||||
kernel = _E2B_KERNEL_MAP.get(language)
|
||||
if kernel is None:
|
||||
return {
|
||||
"error": (
|
||||
f"Language '{language}' is not supported by the e2b backend. "
|
||||
"Supported: python, javascript."
|
||||
),
|
||||
"exit_code": -1,
|
||||
}
|
||||
|
||||
sandbox = None
|
||||
try:
|
||||
# Create a fresh sandbox for this execution.
|
||||
# timeout controls the sandbox lifetime in seconds.
|
||||
sandbox = await asyncio.wait_for(
|
||||
asyncio.get_running_loop().run_in_executor(
|
||||
None,
|
||||
lambda: Sandbox(api_key=api_key, timeout=SANDBOX_TIMEOUT),
|
||||
),
|
||||
timeout=SANDBOX_TIMEOUT,
|
||||
)
|
||||
|
||||
# Execute code and collect results.
|
||||
execution = await asyncio.wait_for(
|
||||
asyncio.get_running_loop().run_in_executor(
|
||||
None,
|
||||
lambda: sandbox.run_code(code, language=kernel),
|
||||
),
|
||||
timeout=SANDBOX_TIMEOUT,
|
||||
)
|
||||
|
||||
# E2B returns a list of Result objects; collect text/error output.
|
||||
stdout_parts = []
|
||||
stderr_parts = []
|
||||
|
||||
for result in execution.results:
|
||||
# result.text is the primary output (stdout equivalent)
|
||||
if hasattr(result, "text") and result.text:
|
||||
stdout_parts.append(str(result.text))
|
||||
# Some result types expose an error attribute
|
||||
if hasattr(result, "error") and result.error:
|
||||
stderr_parts.append(str(result.error))
|
||||
|
||||
# Logs are stored separately in execution.logs
|
||||
if hasattr(execution, "logs"):
|
||||
logs = execution.logs
|
||||
if hasattr(logs, "stdout") and logs.stdout:
|
||||
stdout_parts.extend(logs.stdout)
|
||||
if hasattr(logs, "stderr") and logs.stderr:
|
||||
stderr_parts.extend(logs.stderr)
|
||||
|
||||
combined_stdout = "".join(stdout_parts)[:MAX_OUTPUT]
|
||||
combined_stderr = "".join(stderr_parts)[:MAX_OUTPUT]
|
||||
|
||||
# Treat any stderr output as a non-zero exit code (e2b doesn't expose
|
||||
# a numeric exit code at the sandbox level).
|
||||
exit_code = 1 if combined_stderr else 0
|
||||
|
||||
return {
|
||||
"exit_code": exit_code,
|
||||
"stdout": combined_stdout,
|
||||
"stderr": combined_stderr,
|
||||
"language": language,
|
||||
"backend": "e2b",
|
||||
}
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning("E2B sandbox timed out after %ds", SANDBOX_TIMEOUT)
|
||||
return {"error": f"Timeout after {SANDBOX_TIMEOUT}s", "exit_code": -1}
|
||||
except Exception as e:
|
||||
logger.exception("E2B sandbox error: %s", e)
|
||||
return {"error": str(e), "exit_code": -1}
|
||||
finally:
|
||||
# Always destroy the sandbox to avoid leaking E2B credits.
|
||||
if sandbox is not None:
|
||||
try:
|
||||
await asyncio.get_running_loop().run_in_executor(
|
||||
None, sandbox.kill
|
||||
)
|
||||
except Exception:
|
||||
pass # Best-effort cleanup
|
||||
@@ -1,120 +0,0 @@
|
||||
"""Secret-scrubbing utilities for workspace runtime (#834 — C2).
|
||||
|
||||
Provides ``_redact_secrets()`` applied at every ``commit_memory`` call site
|
||||
to prevent API keys and tokens from being persisted verbatim in the
|
||||
memories table.
|
||||
|
||||
Design notes
|
||||
------------
|
||||
- **Allowlist of known prefixes** (``sk-``, ``ghp_``, etc.) cover the most
|
||||
dangerous tokens because they are unambiguous.
|
||||
- **Contextual pattern** covers generic high-entropy values that appear
|
||||
immediately after assignment keywords (``key=``, ``token=``, ``secret=``,
|
||||
``password=``, ``api_key=``). The keyword is preserved in the output so
|
||||
log lines remain readable; only the value is redacted.
|
||||
- **Idempotent**: the replacement token ``[REDACTED]`` does not match any
|
||||
of the patterns, so calling ``_redact_secrets`` twice is safe.
|
||||
- **No false-positive risk on normal prose**: all patterns require either
|
||||
a well-known prefix (``AKIA``, ``ghp_``, ``sk-``) or both a keyword and
|
||||
≥ 40 base64/alphanumeric chars — ordinary English words never match.
|
||||
|
||||
Relationship to ``compliance.redact_pii``
|
||||
------------------------------------------
|
||||
``redact_pii`` handles PII (emails, SSNs, credit cards) and uses typed
|
||||
tokens ``[REDACTED:type]`` for SIEM indexing. ``_redact_secrets`` is
|
||||
narrowly scoped to API credentials and uses the plain ``[REDACTED]`` token
|
||||
because the exact secret type is not important at the storage layer —
|
||||
what matters is that no credential value ever reaches the database.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import List
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Replacement sentinel
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
#: Replacement token — deliberately plain so downstream readers do not need
|
||||
#: to parse structured tokens. Does not match any scrub pattern (idempotent).
|
||||
REDACTED: str = "[REDACTED]"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Patterns
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Patterns that identify secret values by their well-known prefix.
|
||||
# Ordered from most specific to least specific.
|
||||
_BARE_PATTERNS: List[re.Pattern] = [
|
||||
# OpenAI / Anthropic-style keys: sk-<20+ alnum/hyphen/underscore chars>
|
||||
# Covers: sk-<key>, sk-ant-<key>, sk-proj-<key>, etc.
|
||||
re.compile(r"\bsk-[A-Za-z0-9_-]{20,}\b"),
|
||||
# GitHub classic personal access token
|
||||
re.compile(r"\bghp_[A-Za-z0-9]{36}\b"),
|
||||
# GitHub server-to-server token
|
||||
re.compile(r"\bghs_[A-Za-z0-9]{36}\b"),
|
||||
# GitHub fine-grained personal access token
|
||||
re.compile(r"\bgithub_pat_[A-Za-z0-9_]{82}\b"),
|
||||
# AWS access key ID
|
||||
re.compile(r"\bAKIA[0-9A-Z]{16}\b"),
|
||||
]
|
||||
|
||||
# Contextual pattern: keyword= followed by a high-entropy value.
|
||||
#
|
||||
# Group 1 captures the keyword + equals sign so it is preserved in the
|
||||
# replacement — "api_key=[REDACTED]" is more informative than "[REDACTED]".
|
||||
#
|
||||
# The value charset [A-Za-z0-9+/] covers base64 and common token alphabets.
|
||||
# The minimum length of 40 chars prevents false-positives on short values.
|
||||
_CONTEXTUAL_RE: re.Pattern = re.compile(
|
||||
r"(?i)"
|
||||
r"((?:api_key|key|token|secret|password)\s*=\s*)"
|
||||
r"([A-Za-z0-9+/]{40,}={0,2})"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _redact_secrets(content: str) -> str:
|
||||
"""Scrub known secret patterns from *content*, replacing with ``[REDACTED]``.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
content:
|
||||
Raw string to scrub — typically a ``commit_memory`` payload.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Copy of *content* with secrets replaced. If no secrets are found,
|
||||
the original string is returned unchanged. Calling this function
|
||||
on already-redacted content is safe (idempotent).
|
||||
|
||||
Examples::
|
||||
|
||||
>>> _redact_secrets("token is sk-abc1234567890123456789012345")
|
||||
'token is [REDACTED]'
|
||||
|
||||
>>> _redact_secrets("api_key=" + "A" * 45)
|
||||
'api_key=[REDACTED]'
|
||||
|
||||
>>> _redact_secrets("The answer is 42.")
|
||||
'The answer is 42.'
|
||||
|
||||
>>> _redact_secrets("[REDACTED]")
|
||||
'[REDACTED]'
|
||||
"""
|
||||
result = content
|
||||
|
||||
# Apply prefix-based patterns first (most unambiguous)
|
||||
for pattern in _BARE_PATTERNS:
|
||||
result = pattern.sub(REDACTED, result)
|
||||
|
||||
# Apply contextual pattern — preserve keyword, replace only the value
|
||||
result = _CONTEXTUAL_RE.sub(r"\1" + REDACTED, result)
|
||||
|
||||
return result
|
||||
@@ -1,344 +0,0 @@
|
||||
"""Skill dependency security scanner — supply-chain risk management.
|
||||
|
||||
Scans a skill's ``requirements.txt`` for known CVEs before the skill is
|
||||
loaded into the workspace. Two scanners are supported:
|
||||
|
||||
Snyk CLI — ``snyk test --file=requirements.txt --json``
|
||||
Preferred; requires the ``snyk`` binary in PATH and
|
||||
a SNYK_TOKEN env var for authenticated scans.
|
||||
|
||||
pip-audit — ``pip-audit -r requirements.txt --json``
|
||||
Fallback; no authentication required.
|
||||
|
||||
The scanner is auto-selected: Snyk if available, pip-audit otherwise.
|
||||
If neither is present in PATH the scan is silently skipped with a log line.
|
||||
|
||||
Scan mode (``security_scan.mode`` in config.yaml):
|
||||
|
||||
block — raise ``SkillSecurityError`` when critical/high CVEs are found;
|
||||
the skill is *not* loaded.
|
||||
warn — log a WARNING + audit event; the skill is loaded anyway.
|
||||
off — skip scanning entirely; useful in air-gapped CI.
|
||||
|
||||
Audit trail
|
||||
-----------
|
||||
Every scan (pass or fail) is recorded via ``tools.audit.log_event`` with
|
||||
``event_type="security_scan"``, enabling compliance reports to prove that
|
||||
all loaded skills were checked before activation.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import shutil
|
||||
import subprocess
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from builtin_tools.audit import log_event
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public exception
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class SkillSecurityError(RuntimeError):
|
||||
"""Raised when a skill fails security scanning in ``block`` mode.
|
||||
|
||||
The message contains the skill name, scanner used, and a summary of the
|
||||
critical/high findings so operators can act on it immediately.
|
||||
"""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Data models
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class CVEFinding:
|
||||
"""A single vulnerability finding from a security scanner."""
|
||||
|
||||
vuln_id: str
|
||||
"""CVE or advisory identifier, e.g. ``SNYK-PYTHON-REQUESTS-1234``."""
|
||||
package: str
|
||||
"""Affected package name."""
|
||||
version: str
|
||||
"""Installed version of the package."""
|
||||
severity: str
|
||||
"""One of: critical | high | medium | low | unknown."""
|
||||
description: str
|
||||
"""Short human-readable summary (≤ 200 chars)."""
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScanResult:
|
||||
"""Aggregated result of a single skill dependency scan."""
|
||||
|
||||
skill_name: str
|
||||
scanner: str
|
||||
"""Scanner used: ``"snyk"`` | ``"pip-audit"`` | ``"none"``."""
|
||||
requirements_file: Optional[str]
|
||||
"""Absolute path to the scanned requirements.txt, or ``None``."""
|
||||
findings: list[CVEFinding] = field(default_factory=list)
|
||||
scan_error: Optional[str] = None
|
||||
"""Non-fatal scanner error (e.g. timeout); findings may be incomplete."""
|
||||
|
||||
@property
|
||||
def critical_or_high(self) -> list[CVEFinding]:
|
||||
return [f for f in self.findings if f.severity in ("critical", "high")]
|
||||
|
||||
@property
|
||||
def has_critical_or_high(self) -> bool:
|
||||
return bool(self.critical_or_high)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Internal helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _find_requirements(skill_path: Path) -> Optional[Path]:
|
||||
"""Return the first ``requirements.txt`` found in the skill tree."""
|
||||
for candidate in (
|
||||
skill_path / "requirements.txt",
|
||||
skill_path / "tools" / "requirements.txt",
|
||||
):
|
||||
if candidate.exists():
|
||||
return candidate
|
||||
return None
|
||||
|
||||
|
||||
def _run_scanner(cmd: list[str], timeout: int = 120) -> tuple[str, Optional[str]]:
|
||||
"""Run a scanner subprocess and return ``(stdout, error_or_None)``."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout,
|
||||
)
|
||||
# Both Snyk and pip-audit exit 1 when vulns are found — not an error.
|
||||
# Exit 2 from Snyk means a genuine scan failure.
|
||||
if result.returncode == 2 and not result.stdout.strip():
|
||||
return "", f"scanner exited 2: {result.stderr.strip()[:200]}"
|
||||
return result.stdout, None
|
||||
except subprocess.TimeoutExpired:
|
||||
return "", f"scanner timed out after {timeout}s"
|
||||
except FileNotFoundError as exc:
|
||||
return "", str(exc)
|
||||
except Exception as exc: # pylint: disable=broad-except
|
||||
return "", str(exc)
|
||||
|
||||
|
||||
def _parse_snyk(stdout: str) -> tuple[list[CVEFinding], Optional[str]]:
|
||||
"""Parse ``snyk test --json`` output."""
|
||||
if not stdout.strip():
|
||||
return [], "empty snyk output"
|
||||
try:
|
||||
data = json.loads(stdout)
|
||||
except json.JSONDecodeError as exc:
|
||||
return [], f"snyk JSON parse error: {exc}"
|
||||
|
||||
vulns = data.get("vulnerabilities", [])
|
||||
findings = [
|
||||
CVEFinding(
|
||||
vuln_id=v.get("id", "UNKNOWN"),
|
||||
package=v.get("packageName", "?"),
|
||||
version=v.get("version", "?"),
|
||||
severity=v.get("severity", "unknown").lower(),
|
||||
description=(v.get("title", "") or "")[:200],
|
||||
)
|
||||
for v in vulns
|
||||
if isinstance(v, dict)
|
||||
]
|
||||
return findings, None
|
||||
|
||||
|
||||
def _parse_pip_audit(stdout: str) -> tuple[list[CVEFinding], Optional[str]]:
|
||||
"""Parse ``pip-audit --json`` output.
|
||||
|
||||
pip-audit does not always provide a CVSS severity level. When absent we
|
||||
conservatively classify the finding as ``"high"`` so it is not silently
|
||||
ignored in ``warn`` mode.
|
||||
"""
|
||||
if not stdout.strip():
|
||||
return [], "empty pip-audit output"
|
||||
try:
|
||||
data = json.loads(stdout)
|
||||
except json.JSONDecodeError as exc:
|
||||
return [], f"pip-audit JSON parse error: {exc}"
|
||||
|
||||
# pip-audit ≥ 2.x wraps results in {"dependencies": [...]}
|
||||
if isinstance(data, dict):
|
||||
deps = data.get("dependencies", [])
|
||||
else:
|
||||
deps = data # older versions return a bare list
|
||||
|
||||
findings: list[CVEFinding] = []
|
||||
for dep in deps:
|
||||
if not isinstance(dep, dict):
|
||||
continue
|
||||
for vuln in dep.get("vulns", []):
|
||||
sev_raw = vuln.get("fix_versions") and "high" # pip-audit lacks severity
|
||||
sev = (vuln.get("severity") or sev_raw or "high").lower()
|
||||
findings.append(
|
||||
CVEFinding(
|
||||
vuln_id=vuln.get("id", "UNKNOWN"),
|
||||
package=dep.get("name", "?"),
|
||||
version=dep.get("version", "?"),
|
||||
severity=sev,
|
||||
description=(vuln.get("description", "") or "")[:200],
|
||||
)
|
||||
)
|
||||
return findings, None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def scan_skill_dependencies(
|
||||
skill_name: str,
|
||||
skill_path: Path,
|
||||
mode: str,
|
||||
fail_open_if_no_scanner: bool = True,
|
||||
) -> ScanResult:
|
||||
"""Scan a skill's dependency file for known CVEs.
|
||||
|
||||
Args:
|
||||
skill_name: Name of the skill (used in log messages and audit events).
|
||||
skill_path: Absolute path to the skill's root directory.
|
||||
mode: ``"block"`` | ``"warn"`` | ``"off"``
|
||||
fail_open_if_no_scanner:
|
||||
When *True* (default) silently skip scanning if neither snyk nor
|
||||
pip-audit is in PATH. When *False* and ``mode="block"``, raise
|
||||
:class:`SkillSecurityError` so operators know the gate is absent.
|
||||
Corresponds to ``security_scan.fail_open_if_no_scanner`` in
|
||||
config.yaml. Closes #268.
|
||||
|
||||
Returns:
|
||||
A :class:`ScanResult` describing what was found.
|
||||
|
||||
Raises:
|
||||
:class:`SkillSecurityError`: When ``mode="block"`` and one or more
|
||||
critical/high severity CVEs are found — OR when
|
||||
``mode="block"`` and ``fail_open_if_no_scanner=False`` and no
|
||||
scanner is available.
|
||||
"""
|
||||
if mode == "off":
|
||||
return ScanResult(skill_name=skill_name, scanner="none", requirements_file=None)
|
||||
|
||||
req_file = _find_requirements(skill_path)
|
||||
if req_file is None:
|
||||
# No requirements file — nothing to scan; not a problem.
|
||||
return ScanResult(skill_name=skill_name, scanner="none", requirements_file=None)
|
||||
|
||||
# ── Select scanner ────────────────────────────────────────────────────────
|
||||
scanner_name: str
|
||||
findings: list[CVEFinding]
|
||||
scan_error: Optional[str]
|
||||
|
||||
if shutil.which("snyk"):
|
||||
scanner_name = "snyk"
|
||||
stdout, run_error = _run_scanner(
|
||||
["snyk", "test", f"--file={req_file}", "--json"]
|
||||
)
|
||||
if run_error:
|
||||
findings, scan_error = [], run_error
|
||||
else:
|
||||
findings, scan_error = _parse_snyk(stdout)
|
||||
|
||||
elif shutil.which("pip-audit"):
|
||||
scanner_name = "pip-audit"
|
||||
stdout, run_error = _run_scanner(
|
||||
["pip-audit", "-r", str(req_file), "--json", "--progress-spinner=off"]
|
||||
)
|
||||
if run_error:
|
||||
findings, scan_error = [], run_error
|
||||
else:
|
||||
findings, scan_error = _parse_pip_audit(stdout)
|
||||
|
||||
else:
|
||||
logger.info(
|
||||
"security_scan: no scanner (snyk, pip-audit) in PATH — skipping %s",
|
||||
skill_name,
|
||||
)
|
||||
log_event(
|
||||
event_type="security_scan",
|
||||
action="skill.security_scan",
|
||||
resource=skill_name,
|
||||
outcome="skipped",
|
||||
reason="no_scanner_in_path",
|
||||
requirements_file=str(req_file),
|
||||
mode=mode,
|
||||
)
|
||||
# #268: if fail_open_if_no_scanner=False and mode=block, the operator
|
||||
# explicitly opted in to "fail closed" — raise so the missing scanner
|
||||
# is visible rather than silently skipped.
|
||||
if not fail_open_if_no_scanner and mode == "block":
|
||||
raise SkillSecurityError(
|
||||
f"Skill '{skill_name}' blocked: no scanner (snyk or pip-audit) "
|
||||
f"found in PATH and fail_open_if_no_scanner=false"
|
||||
)
|
||||
return ScanResult(
|
||||
skill_name=skill_name,
|
||||
scanner="none",
|
||||
requirements_file=str(req_file),
|
||||
scan_error="No scanner (snyk or pip-audit) found in PATH",
|
||||
)
|
||||
|
||||
result = ScanResult(
|
||||
skill_name=skill_name,
|
||||
scanner=scanner_name,
|
||||
requirements_file=str(req_file),
|
||||
findings=findings,
|
||||
scan_error=scan_error,
|
||||
)
|
||||
|
||||
# ── Log scan outcome to audit trail ──────────────────────────────────────
|
||||
audit_outcome = "clean" if not result.has_critical_or_high else "vulnerable"
|
||||
log_event(
|
||||
event_type="security_scan",
|
||||
action="skill.security_scan",
|
||||
resource=skill_name,
|
||||
outcome=audit_outcome,
|
||||
scanner=scanner_name,
|
||||
requirements_file=str(req_file),
|
||||
total_findings=len(findings),
|
||||
critical_or_high_count=len(result.critical_or_high),
|
||||
scan_error=scan_error,
|
||||
)
|
||||
|
||||
if scan_error:
|
||||
logger.warning(
|
||||
"security_scan: scanner error for skill '%s': %s", skill_name, scan_error
|
||||
)
|
||||
|
||||
# ── Enforce mode ─────────────────────────────────────────────────────────
|
||||
if result.has_critical_or_high:
|
||||
summary = ", ".join(
|
||||
f"{f.vuln_id}({f.severity}) in {f.package}@{f.version}"
|
||||
for f in result.critical_or_high[:5]
|
||||
)
|
||||
if len(result.critical_or_high) > 5:
|
||||
summary += f" … and {len(result.critical_or_high) - 5} more"
|
||||
|
||||
msg = (
|
||||
f"Skill '{skill_name}' has {len(result.critical_or_high)} "
|
||||
f"critical/high CVE(s) [{scanner_name}]: {summary}"
|
||||
)
|
||||
|
||||
if mode == "block":
|
||||
logger.error("Blocking skill load — %s", msg)
|
||||
raise SkillSecurityError(msg)
|
||||
|
||||
# warn mode — continue loading, but make noise
|
||||
logger.warning("Security warning — %s", msg)
|
||||
|
||||
return result
|
||||
@@ -1,418 +0,0 @@
|
||||
"""OpenTelemetry (OTEL) instrumentation for the Molecule AI workspace runtime.
|
||||
|
||||
Architecture
|
||||
------------
|
||||
* One global ``TracerProvider`` is initialised at startup via ``setup_telemetry()``.
|
||||
* Up to three exporters are wired in:
|
||||
1. **OTLP/HTTP** — activated when ``OTEL_EXPORTER_OTLP_ENDPOINT`` is set.
|
||||
Point this at any compatible collector (Jaeger, Tempo, Grafana OTEL, …).
|
||||
2. **Langfuse OTLP bridge** — activated when the ``LANGFUSE_HOST``,
|
||||
``LANGFUSE_PUBLIC_KEY`` and ``LANGFUSE_SECRET_KEY`` env vars are all present.
|
||||
Langfuse ≥4 accepts OTLP/HTTP at ``<host>/api/public/otel``.
|
||||
This is a *second* exporter alongside the existing Langfuse LangChain
|
||||
callback handler in agent.py — both paths emit spans simultaneously.
|
||||
3. **Console** (debug) — activated when ``OTEL_DEBUG=1``.
|
||||
|
||||
* **W3C TraceContext** propagation (``traceparent`` / ``tracestate``) is used for
|
||||
cross-workspace context injection and extraction so A2A hops form a single
|
||||
distributed trace.
|
||||
|
||||
* ``make_trace_middleware()`` returns an ASGI middleware that extracts incoming
|
||||
trace context from HTTP headers and stores it in a ``ContextVar`` so the
|
||||
A2A executor can access it to parent its spans correctly.
|
||||
|
||||
GenAI semantic conventions
|
||||
--------------------------
|
||||
Attribute constants for ``gen_ai.*`` follow OpenTelemetry GenAI SemConv 1.26.
|
||||
|
||||
Usage example
|
||||
-------------
|
||||
# main.py — call once at startup
|
||||
from builtin_tools.telemetry import setup_telemetry, make_trace_middleware
|
||||
setup_telemetry(service_name=workspace_id)
|
||||
instrumented = make_trace_middleware(app.build())
|
||||
|
||||
# Any module
|
||||
from builtin_tools.telemetry import get_tracer
|
||||
tracer = get_tracer()
|
||||
with tracer.start_as_current_span("my_span") as span:
|
||||
span.set_attribute("key", "value")
|
||||
|
||||
# Outgoing HTTP — inject W3C headers
|
||||
from builtin_tools.telemetry import inject_trace_headers
|
||||
headers = inject_trace_headers({"Content-Type": "application/json"})
|
||||
await client.post(url, headers=headers, ...)
|
||||
|
||||
# Incoming HTTP — extract context (done automatically by middleware)
|
||||
from builtin_tools.telemetry import extract_trace_context
|
||||
ctx = extract_trace_context(dict(request.headers))
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import logging
|
||||
import os
|
||||
from contextvars import ContextVar
|
||||
from typing import Any, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GenAI Semantic Convention attribute keys (OTel SemConv 1.26)
|
||||
# https://opentelemetry.io/docs/specs/semconv/gen-ai/
|
||||
# ---------------------------------------------------------------------------
|
||||
GEN_AI_SYSTEM = "gen_ai.system"
|
||||
GEN_AI_REQUEST_MODEL = "gen_ai.request.model"
|
||||
GEN_AI_OPERATION_NAME = "gen_ai.operation.name"
|
||||
GEN_AI_USAGE_INPUT_TOKENS = "gen_ai.usage.input_tokens"
|
||||
GEN_AI_USAGE_OUTPUT_TOKENS = "gen_ai.usage.output_tokens"
|
||||
GEN_AI_RESPONSE_FINISH_REASONS = "gen_ai.response.finish_reasons"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Workspace / A2A attribute keys
|
||||
# ---------------------------------------------------------------------------
|
||||
WORKSPACE_ID_ATTR = "workspace.id"
|
||||
A2A_SOURCE_WORKSPACE = "a2a.source_workspace_id"
|
||||
A2A_TARGET_WORKSPACE = "a2a.target_workspace_id"
|
||||
A2A_TASK_ID = "a2a.task_id"
|
||||
MEMORY_SCOPE = "memory.scope"
|
||||
MEMORY_QUERY = "memory.query"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Module-level state
|
||||
# ---------------------------------------------------------------------------
|
||||
WORKSPACE_ID: str = os.environ.get("WORKSPACE_ID", "unknown")
|
||||
|
||||
_initialized: bool = False
|
||||
_tracer: Any = None # opentelemetry.trace.Tracer | _NoopTracer
|
||||
|
||||
# ContextVar that carries incoming trace context from the ASGI middleware to
|
||||
# the A2A executor. Using a ContextVar (rather than a global) is safe with
|
||||
# asyncio because each task inherits a copy of the context at creation time.
|
||||
_incoming_trace_context: ContextVar[Optional[Any]] = ContextVar(
|
||||
"otel_incoming_trace_context", default=None
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def setup_telemetry(service_name: Optional[str] = None) -> None:
|
||||
"""Initialise the global ``TracerProvider``. Safe to call multiple times.
|
||||
|
||||
Reads configuration from environment variables:
|
||||
|
||||
``OTEL_EXPORTER_OTLP_ENDPOINT``
|
||||
Base URL of an OTLP-compatible collector (e.g. ``http://jaeger:4318``).
|
||||
Spans are sent to ``<endpoint>/v1/traces``.
|
||||
|
||||
``LANGFUSE_HOST`` + ``LANGFUSE_PUBLIC_KEY`` + ``LANGFUSE_SECRET_KEY``
|
||||
When all three are set, a second OTLP exporter is wired to Langfuse's
|
||||
ingest endpoint using HTTP Basic auth.
|
||||
|
||||
``OTEL_DEBUG``
|
||||
Set to ``1`` / ``true`` to also print spans to stdout.
|
||||
"""
|
||||
global _initialized, _tracer
|
||||
|
||||
if _initialized:
|
||||
return
|
||||
|
||||
try:
|
||||
from opentelemetry import propagate, trace
|
||||
from opentelemetry.baggage.propagation import W3CBaggagePropagator
|
||||
from opentelemetry.propagators.composite import CompositePropagator
|
||||
from opentelemetry.sdk.resources import SERVICE_NAME as OTEL_SERVICE_NAME
|
||||
from opentelemetry.sdk.resources import Resource
|
||||
from opentelemetry.sdk.trace import TracerProvider
|
||||
from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter
|
||||
from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
|
||||
except ImportError as exc:
|
||||
logger.warning(
|
||||
"OTEL: opentelemetry packages not installed — telemetry disabled. "
|
||||
"Add opentelemetry-api, opentelemetry-sdk, "
|
||||
"opentelemetry-exporter-otlp-proto-http to requirements.txt. "
|
||||
"Error: %s",
|
||||
exc,
|
||||
)
|
||||
return
|
||||
|
||||
svc = service_name or f"molecule-{WORKSPACE_ID}"
|
||||
|
||||
resource = Resource.create(
|
||||
{
|
||||
OTEL_SERVICE_NAME: svc,
|
||||
"service.version": "1.0.0",
|
||||
WORKSPACE_ID_ATTR: WORKSPACE_ID,
|
||||
}
|
||||
)
|
||||
|
||||
provider = TracerProvider(resource=resource)
|
||||
|
||||
# -- Exporter 1: Generic OTLP/HTTP ----------------------------------------
|
||||
otlp_endpoint = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT", "").rstrip("/")
|
||||
if otlp_endpoint:
|
||||
try:
|
||||
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
||||
|
||||
exporter = OTLPSpanExporter(endpoint=f"{otlp_endpoint}/v1/traces")
|
||||
provider.add_span_processor(BatchSpanProcessor(exporter))
|
||||
logger.info("OTEL: OTLP/HTTP exporter → %s", otlp_endpoint)
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"OTEL: OTEL_EXPORTER_OTLP_ENDPOINT is set but "
|
||||
"opentelemetry-exporter-otlp-proto-http is not installed"
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("OTEL: OTLP exporter init failed: %s", exc)
|
||||
|
||||
# -- Exporter 2: Langfuse OTLP bridge -------------------------------------
|
||||
# Langfuse ≥4 accepts OTLP at <host>/api/public/otel (Basic auth).
|
||||
lf_host = os.environ.get("LANGFUSE_HOST", "").rstrip("/")
|
||||
lf_public = os.environ.get("LANGFUSE_PUBLIC_KEY", "")
|
||||
lf_secret = os.environ.get("LANGFUSE_SECRET_KEY", "")
|
||||
|
||||
if lf_host and lf_public and lf_secret:
|
||||
try:
|
||||
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
||||
|
||||
lf_endpoint = f"{lf_host}/api/public/otel/v1/traces"
|
||||
token = base64.b64encode(f"{lf_public}:{lf_secret}".encode()).decode()
|
||||
lf_exporter = OTLPSpanExporter(
|
||||
endpoint=lf_endpoint,
|
||||
headers={"Authorization": f"Basic {token}"},
|
||||
)
|
||||
provider.add_span_processor(BatchSpanProcessor(lf_exporter))
|
||||
logger.info("OTEL: Langfuse OTLP bridge → %s", lf_endpoint)
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"OTEL: Langfuse env vars set but "
|
||||
"opentelemetry-exporter-otlp-proto-http is not installed"
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("OTEL: Langfuse OTLP bridge init failed: %s", exc)
|
||||
|
||||
# -- Exporter 3: Console (debug) ------------------------------------------
|
||||
if os.environ.get("OTEL_DEBUG", "").lower() in ("1", "true", "yes"):
|
||||
provider.add_span_processor(BatchSpanProcessor(ConsoleSpanExporter()))
|
||||
logger.info("OTEL: console debug exporter enabled")
|
||||
|
||||
# -- Register global provider + W3C propagators ---------------------------
|
||||
trace.set_tracer_provider(provider)
|
||||
propagate.set_global_textmap(
|
||||
CompositePropagator(
|
||||
[
|
||||
TraceContextTextMapPropagator(),
|
||||
W3CBaggagePropagator(),
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
_tracer = trace.get_tracer(
|
||||
"molecule.workspace",
|
||||
schema_url="https://opentelemetry.io/schemas/1.26.0",
|
||||
)
|
||||
_initialized = True
|
||||
logger.info("OTEL: telemetry initialised for service '%s'", svc)
|
||||
|
||||
|
||||
def get_tracer() -> Any:
|
||||
"""Return the global ``Tracer``. Lazily calls ``setup_telemetry()`` if needed.
|
||||
|
||||
Returns a no-op tracer when the opentelemetry packages are not installed so
|
||||
that instrumented code never raises ``ImportError``.
|
||||
"""
|
||||
global _tracer
|
||||
|
||||
if not _initialized:
|
||||
setup_telemetry()
|
||||
|
||||
if _tracer is None:
|
||||
# Packages unavailable — hand back a no-op implementation
|
||||
try:
|
||||
from opentelemetry import trace
|
||||
|
||||
return trace.get_tracer("molecule.noop")
|
||||
except ImportError:
|
||||
return _NoopTracer()
|
||||
|
||||
return _tracer
|
||||
|
||||
|
||||
def inject_trace_headers(headers: dict) -> dict:
|
||||
"""Inject W3C ``traceparent`` / ``tracestate`` into *headers* and return it.
|
||||
|
||||
Mutates the dict in-place so it can be used directly::
|
||||
|
||||
headers = inject_trace_headers({"Content-Type": "application/json"})
|
||||
await client.post(url, headers=headers, ...)
|
||||
"""
|
||||
try:
|
||||
from opentelemetry import propagate
|
||||
|
||||
propagate.inject(headers)
|
||||
except Exception:
|
||||
pass # Never let telemetry break the caller
|
||||
return headers
|
||||
|
||||
|
||||
def extract_trace_context(carrier: dict) -> Any:
|
||||
"""Extract W3C trace context from a header mapping.
|
||||
|
||||
Returns an OpenTelemetry ``Context`` object suitable for::
|
||||
|
||||
tracer.start_as_current_span("name", context=ctx)
|
||||
|
||||
Returns ``None`` when packages are unavailable or no context is present.
|
||||
"""
|
||||
try:
|
||||
from opentelemetry import propagate
|
||||
|
||||
return propagate.extract(carrier)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def get_current_traceparent() -> Optional[str]:
|
||||
"""Return the W3C ``traceparent`` string for the active span, or ``None``."""
|
||||
try:
|
||||
from opentelemetry import trace
|
||||
|
||||
span = trace.get_current_span()
|
||||
ctx = span.get_span_context()
|
||||
if not ctx.is_valid:
|
||||
return None
|
||||
trace_id = format(ctx.trace_id, "032x")
|
||||
span_id = format(ctx.span_id, "016x")
|
||||
flags = "01" if ctx.trace_flags else "00"
|
||||
return f"00-{trace_id}-{span_id}-{flags}"
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def make_trace_middleware(asgi_app: Any) -> Any:
|
||||
"""Wrap an ASGI application with W3C trace-context extraction middleware.
|
||||
|
||||
The middleware reads ``traceparent`` / ``tracestate`` from every incoming
|
||||
HTTP request and stores the extracted ``Context`` in the
|
||||
``_incoming_trace_context`` ContextVar. The A2A executor reads that
|
||||
ContextVar to parent its ``task_receive`` span correctly, forming an
|
||||
unbroken distributed trace across workspace hops.
|
||||
|
||||
Usage::
|
||||
|
||||
built = app.build()
|
||||
instrumented = make_trace_middleware(built)
|
||||
uvicorn.Config(instrumented, ...)
|
||||
"""
|
||||
|
||||
async def _middleware(scope: dict, receive: Any, send: Any) -> None: # type: ignore[override]
|
||||
if scope.get("type") != "http":
|
||||
await asgi_app(scope, receive, send)
|
||||
return
|
||||
|
||||
# Decode byte-headers from the ASGI scope (latin-1 per HTTP/1.1 spec)
|
||||
raw_headers: list[tuple[bytes, bytes]] = scope.get("headers", [])
|
||||
str_headers: dict[str, str] = {
|
||||
k.decode("latin-1"): v.decode("latin-1") for k, v in raw_headers
|
||||
}
|
||||
|
||||
ctx = extract_trace_context(str_headers)
|
||||
token = _incoming_trace_context.set(ctx)
|
||||
try:
|
||||
await asgi_app(scope, receive, send)
|
||||
finally:
|
||||
_incoming_trace_context.reset(token)
|
||||
|
||||
return _middleware
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers for GenAI attributes
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def gen_ai_system_from_model(model_str: str) -> str:
|
||||
"""Map a ``provider:model`` string to a ``gen_ai.system`` value."""
|
||||
if ":" not in model_str:
|
||||
return "unknown"
|
||||
provider = model_str.split(":", 1)[0].lower()
|
||||
return {
|
||||
"anthropic": "anthropic",
|
||||
"openai": "openai",
|
||||
"openrouter": "openrouter",
|
||||
"groq": "groq",
|
||||
"google_genai": "google",
|
||||
"ollama": "ollama",
|
||||
}.get(provider, provider)
|
||||
|
||||
|
||||
def record_llm_token_usage(span: Any, result: dict) -> None:
|
||||
"""Extract token counts from a LangGraph ainvoke result and set span attrs.
|
||||
|
||||
Handles both Anthropic (``usage``) and OpenAI (``token_usage``) metadata
|
||||
shapes. Silently skips if metadata is absent.
|
||||
"""
|
||||
try:
|
||||
messages = result.get("messages", [])
|
||||
for msg in reversed(messages):
|
||||
meta = getattr(msg, "response_metadata", {}) or {}
|
||||
# Anthropic
|
||||
usage = meta.get("usage", {})
|
||||
if usage:
|
||||
inp = usage.get("input_tokens") or usage.get("prompt_tokens")
|
||||
out = usage.get("output_tokens") or usage.get("completion_tokens")
|
||||
if inp is not None:
|
||||
span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, int(inp))
|
||||
if out is not None:
|
||||
span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, int(out))
|
||||
return
|
||||
# OpenAI
|
||||
token_usage = meta.get("token_usage", {})
|
||||
if token_usage:
|
||||
inp = token_usage.get("prompt_tokens")
|
||||
out = token_usage.get("completion_tokens")
|
||||
if inp is not None:
|
||||
span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, int(inp))
|
||||
if out is not None:
|
||||
span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, int(out))
|
||||
return
|
||||
except Exception:
|
||||
pass # Best-effort — never break the caller
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# No-op fallbacks (used when opentelemetry packages are absent)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class _NoopSpan:
|
||||
"""Transparent no-op span that satisfies the context-manager protocol."""
|
||||
|
||||
def set_attribute(self, key: str, value: Any) -> None: # noqa: ARG002
|
||||
pass
|
||||
|
||||
def set_status(self, *args: Any, **kwargs: Any) -> None:
|
||||
pass
|
||||
|
||||
def record_exception(self, exc: BaseException, *args: Any, **kwargs: Any) -> None:
|
||||
pass
|
||||
|
||||
def add_event(self, name: str, *args: Any, **kwargs: Any) -> None:
|
||||
pass
|
||||
|
||||
def __enter__(self) -> "_NoopSpan":
|
||||
return self
|
||||
|
||||
def __exit__(self, *args: Any) -> None:
|
||||
pass
|
||||
|
||||
|
||||
class _NoopTracer:
|
||||
"""Transparent no-op tracer returned when the SDK is unavailable."""
|
||||
|
||||
def start_as_current_span(self, name: str, *args: Any, **kwargs: Any) -> _NoopSpan: # noqa: ARG002
|
||||
return _NoopSpan()
|
||||
|
||||
def start_span(self, name: str, *args: Any, **kwargs: Any) -> _NoopSpan: # noqa: ARG002
|
||||
return _NoopSpan()
|
||||
@@ -1,697 +0,0 @@
|
||||
"""Temporal durable execution wrapper for Molecule AI A2A workspaces.
|
||||
|
||||
Architecture
|
||||
-----------
|
||||
A co-located Temporal worker runs as an asyncio background task **inside the
|
||||
same process** as the A2A server. This means worker activities share the same
|
||||
memory space as the A2A handler, which lets us bridge non-serialisable objects
|
||||
(LangGraph agent, EventQueue, RequestContext) through an in-process registry
|
||||
without having to serialise them through Temporal's state store.
|
||||
|
||||
Workflow stages (names mirror the OTEL span names in a2a_executor.py):
|
||||
|
||||
task_receive → llm_call → task_complete
|
||||
|
||||
task_receive — durable checkpoint: task acknowledged, queued
|
||||
llm_call — durable checkpoint: LLM execution + SSE streaming (retryable)
|
||||
task_complete — durable checkpoint: execution finished, telemetry recorded
|
||||
|
||||
Crash-recovery behaviour
|
||||
------------------------
|
||||
If the process crashes while ``llm_call`` is running, Temporal retries the
|
||||
activity on the restarted process. The in-process registry is empty after a
|
||||
restart, so the activity detects a registry miss, logs a warning, and returns
|
||||
an error result. The SSE client connection is already gone at that point so
|
||||
no response can be delivered — but the task is permanently recorded in
|
||||
Temporal's history and will not silently disappear.
|
||||
|
||||
Env vars
|
||||
--------
|
||||
TEMPORAL_HOST Temporal gRPC endpoint (default: ``localhost:7233``)
|
||||
Set this to enable durable execution. Leave unset (or point
|
||||
at an unreachable host) to run in direct-execution mode.
|
||||
|
||||
Dependencies (optional)
|
||||
-----------
|
||||
temporalio>=1.7.0
|
||||
|
||||
Add to requirements.txt to enable. The module loads and the wrapper class
|
||||
works without the package installed — all Temporal paths return early with a
|
||||
graceful fallback to direct execution.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import dataclasses
|
||||
import logging
|
||||
import os
|
||||
import uuid
|
||||
from datetime import timedelta
|
||||
from typing import Any, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _platform_url() -> str:
|
||||
"""Return the platform URL, defaulting to host.docker.internal.
|
||||
|
||||
The workspace runtime always runs inside a Docker container, so
|
||||
``localhost`` refers to the container itself, not the platform host.
|
||||
The platform API is only reachable via ``host.docker.internal`` from
|
||||
within a workspace container, regardless of how the container was started.
|
||||
"""
|
||||
return os.environ.get("PLATFORM_URL", "http://host.docker.internal:8080")
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Constants
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
_TASK_QUEUE = "molecule-agent-tasks"
|
||||
_WORKFLOW_EXECUTION_TIMEOUT = timedelta(minutes=30)
|
||||
_ACTIVITY_START_TO_CLOSE_TIMEOUT = timedelta(minutes=10)
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Checkpoint persistence (non-fatal)
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def _fetch_latest_checkpoint(workspace_id: str) -> Optional[dict]:
|
||||
"""GET /workspaces/:id/checkpoints/latest — returns the most recently
|
||||
completed step for this workspace, or None if no checkpoints exist yet.
|
||||
|
||||
Non-fatal: any HTTP error, network failure, or timeout returns None so
|
||||
the calling code continues without a resume context. A 404 (no checkpoints)
|
||||
is the expected response for a freshly provisioned workspace.
|
||||
|
||||
Args:
|
||||
workspace_id: The workspace to query.
|
||||
|
||||
Reads:
|
||||
PLATFORM_URL Platform base URL (default ``http://host.docker.internal:8080``).
|
||||
"""
|
||||
try:
|
||||
from platform_auth import auth_headers as _auth_headers # type: ignore[import]
|
||||
|
||||
platform_url = _platform_url()
|
||||
url = f"{platform_url}/workspaces/{workspace_id}/checkpoints/latest"
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
resp = await client.get(url, headers=_auth_headers())
|
||||
if resp.status_code == 404:
|
||||
return None
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
except Exception as exc:
|
||||
logger.debug(
|
||||
"Temporal: latest checkpoint fetch skipped workspace=%s: %s "
|
||||
"(non-fatal — starting fresh context)",
|
||||
workspace_id,
|
||||
exc,
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
async def _save_checkpoint(
|
||||
workspace_id: str,
|
||||
workflow_id: str,
|
||||
step_name: str,
|
||||
step_index: int,
|
||||
payload: Optional[dict] = None,
|
||||
) -> None:
|
||||
"""POST a step checkpoint to the platform.
|
||||
|
||||
Non-fatal: any HTTP error, network failure, or timeout is logged as a
|
||||
WARNING and silently swallowed so the calling activity always continues.
|
||||
Checkpoint loss is survivable; aborting a workflow on a transient DB or
|
||||
network blip is not.
|
||||
|
||||
Args:
|
||||
workspace_id: The workspace whose token is used for auth.
|
||||
workflow_id: Unique ID for this workflow execution (task_id).
|
||||
step_name: Temporal activity stage name
|
||||
(``task_receive`` / ``llm_call`` / ``task_complete``).
|
||||
step_index: 0-based stage index matching the platform schema.
|
||||
payload: Optional JSON-serialisable dict stored as JSONB.
|
||||
|
||||
Reads:
|
||||
PLATFORM_URL Platform base URL (default ``http://host.docker.internal:8080``).
|
||||
"""
|
||||
try:
|
||||
from platform_auth import auth_headers as _auth_headers # type: ignore[import]
|
||||
|
||||
platform_url = _platform_url()
|
||||
url = f"{platform_url}/workspaces/{workspace_id}/checkpoints"
|
||||
body: dict = {
|
||||
"workflow_id": workflow_id,
|
||||
"step_name": step_name,
|
||||
"step_index": step_index,
|
||||
}
|
||||
if payload is not None:
|
||||
body["payload"] = payload
|
||||
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
resp = await client.post(url, json=body, headers=_auth_headers())
|
||||
resp.raise_for_status()
|
||||
|
||||
logger.debug(
|
||||
"Temporal: checkpoint saved workspace=%s wf=%s step=%s idx=%d",
|
||||
workspace_id,
|
||||
workflow_id,
|
||||
step_name,
|
||||
step_index,
|
||||
)
|
||||
except Exception as exc:
|
||||
# Non-fatal: workflow continues regardless of checkpoint outcome.
|
||||
logger.warning(
|
||||
"Temporal: checkpoint failed workspace=%s wf=%s step=%s: %s "
|
||||
"(non-fatal — workflow continues)",
|
||||
workspace_id,
|
||||
workflow_id,
|
||||
step_name,
|
||||
exc,
|
||||
)
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Serialisable data models
|
||||
# These are the only objects that cross the Temporal serialisation boundary.
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class AgentTaskInput:
|
||||
"""Serialisable snapshot of an incoming A2A task.
|
||||
|
||||
All fields must be JSON-representable so that Temporal can persist them in
|
||||
its workflow history (used for crash recovery and replay).
|
||||
"""
|
||||
|
||||
task_id: str
|
||||
context_id: str
|
||||
user_input: str
|
||||
model: str
|
||||
workspace_id: str
|
||||
history: list # [[role, content], ...] — tuples converted to lists
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class LLMResult:
|
||||
"""Serialisable execution result passed from ``llm_call`` to ``task_complete``."""
|
||||
|
||||
final_text: str
|
||||
success: bool
|
||||
error: str = ""
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# In-process registry
|
||||
#
|
||||
# Maps task_id → {executor, context, event_queue, final_text}
|
||||
# Activities look up non-serialisable objects here. The registry is
|
||||
# populated by TemporalWorkflowWrapper.run() before the workflow starts and
|
||||
# cleaned up in the finally block when the workflow completes.
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
_task_registry: dict[str, dict[str, Any]] = {}
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Temporal workflow + activities
|
||||
# Loaded only when the temporalio package is installed. The surrounding
|
||||
# try/except ensures the module imports cleanly without the package.
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
_TEMPORAL_AVAILABLE = False
|
||||
|
||||
try:
|
||||
from temporalio import activity, workflow
|
||||
from temporalio.client import Client
|
||||
from temporalio.worker import Worker
|
||||
|
||||
_TEMPORAL_AVAILABLE = True
|
||||
|
||||
# ── Activities ────────────────────────────────────────────────────────── #
|
||||
|
||||
@activity.defn(name="task_receive")
|
||||
async def task_receive_activity(inp: AgentTaskInput) -> dict:
|
||||
"""Durable checkpoint: task received and queued for LLM execution.
|
||||
|
||||
Mirrors the *task_receive* OTEL span opened in
|
||||
``LangGraphA2AExecutor._core_execute()``. This activity is lightweight —
|
||||
it validates that the in-process registry entry exists and logs receipt.
|
||||
The actual A2A "working" signal (``updater.start_work()``) is emitted
|
||||
inside ``_core_execute()`` so that SSE timing is preserved.
|
||||
|
||||
Saves a step checkpoint after completing. Checkpoint failure is
|
||||
non-fatal — the activity returns normally regardless.
|
||||
"""
|
||||
logger.info(
|
||||
"Temporal[task_receive] task_id=%s context_id=%s workspace=%s model=%s",
|
||||
inp.task_id,
|
||||
inp.context_id,
|
||||
inp.workspace_id,
|
||||
inp.model,
|
||||
)
|
||||
if inp.task_id not in _task_registry:
|
||||
logger.warning(
|
||||
"Temporal[task_receive] task_id=%s not found in registry "
|
||||
"(crash recovery path — no SSE client connection available)",
|
||||
inp.task_id,
|
||||
)
|
||||
try:
|
||||
await _save_checkpoint(
|
||||
inp.workspace_id, inp.task_id, "task_receive", 0,
|
||||
{"task_id": inp.task_id, "status": "registry_miss"},
|
||||
)
|
||||
except Exception as _ckpt_exc: # pragma: no cover
|
||||
logger.warning("task_receive checkpoint swallowed: %s", _ckpt_exc)
|
||||
return {"task_id": inp.task_id, "status": "registry_miss"}
|
||||
|
||||
try:
|
||||
await _save_checkpoint(
|
||||
inp.workspace_id, inp.task_id, "task_receive", 0,
|
||||
{"task_id": inp.task_id, "status": "received"},
|
||||
)
|
||||
except Exception as _ckpt_exc: # pragma: no cover
|
||||
logger.warning("task_receive checkpoint swallowed: %s", _ckpt_exc)
|
||||
return {"task_id": inp.task_id, "status": "received"}
|
||||
|
||||
@activity.defn(name="llm_call")
|
||||
async def llm_call_activity(inp: AgentTaskInput) -> LLMResult:
|
||||
"""Durable checkpoint: LLM execution with streaming to the event_queue.
|
||||
|
||||
Mirrors the *llm_call* OTEL span in ``LangGraphA2AExecutor._core_execute()``.
|
||||
Calls ``executor._core_execute()`` which handles the full execution pipeline:
|
||||
SSE streaming, OTEL sub-spans, final message emission, and heartbeat updates.
|
||||
|
||||
On crash recovery (empty registry): logs a warning and returns an error
|
||||
result. Temporal records the failure and will retry if configured to do so.
|
||||
The original SSE client connection is gone after a crash, so no response
|
||||
can be delivered, but the task is durably recorded in Temporal's history.
|
||||
"""
|
||||
logger.info("Temporal[llm_call] task_id=%s", inp.task_id)
|
||||
|
||||
entry = _task_registry.get(inp.task_id)
|
||||
if entry is None:
|
||||
msg = (
|
||||
f"task_id={inp.task_id} not in registry — "
|
||||
"process likely restarted; original SSE client connection is gone"
|
||||
)
|
||||
logger.warning("Temporal[llm_call] registry miss: %s", msg)
|
||||
miss_result = LLMResult(final_text="", success=False, error=msg)
|
||||
try:
|
||||
await _save_checkpoint(
|
||||
inp.workspace_id, inp.task_id, "llm_call", 1,
|
||||
{"success": False, "error": msg},
|
||||
)
|
||||
except Exception as _ckpt_exc: # pragma: no cover
|
||||
logger.warning("llm_call checkpoint swallowed: %s", _ckpt_exc)
|
||||
return miss_result
|
||||
|
||||
try:
|
||||
executor = entry["executor"]
|
||||
context = entry["context"]
|
||||
event_queue = entry["event_queue"]
|
||||
|
||||
# _core_execute() is the renamed body of the original execute().
|
||||
# It handles: OTEL spans, SSE streaming, final message, heartbeat.
|
||||
final_text = await executor._core_execute(context, event_queue)
|
||||
|
||||
# Cache for task_complete observability
|
||||
entry["final_text"] = final_text or ""
|
||||
result = LLMResult(final_text=final_text or "", success=True)
|
||||
|
||||
except Exception as exc:
|
||||
logger.error(
|
||||
"Temporal[llm_call] task_id=%s execution error: %s",
|
||||
inp.task_id,
|
||||
exc,
|
||||
exc_info=True,
|
||||
)
|
||||
result = LLMResult(final_text="", success=False, error=str(exc))
|
||||
|
||||
try:
|
||||
await _save_checkpoint(
|
||||
inp.workspace_id, inp.task_id, "llm_call", 1,
|
||||
{"success": result.success, "error": result.error or None},
|
||||
)
|
||||
except Exception as _ckpt_exc: # pragma: no cover
|
||||
logger.warning("llm_call checkpoint swallowed: %s", _ckpt_exc)
|
||||
return result
|
||||
|
||||
@activity.defn(name="task_complete")
|
||||
async def task_complete_activity(result: LLMResult) -> None:
|
||||
"""Durable checkpoint: task execution finished.
|
||||
|
||||
Mirrors the *task_complete* OTEL span in ``LangGraphA2AExecutor._core_execute()``.
|
||||
This activity records the outcome for Temporal observability. The actual
|
||||
OTEL task_complete span fires inside ``_core_execute()``; this activity
|
||||
provides a durable, queryable record in Temporal's workflow history.
|
||||
|
||||
Saves a step checkpoint. Checkpoint failure is non-fatal.
|
||||
The ``workspace_id`` and ``task_id`` are not available in this activity
|
||||
(only the ``LLMResult`` is passed from ``llm_call``), so the checkpoint
|
||||
is skipped here — ``llm_call`` already captured the final outcome.
|
||||
"""
|
||||
if result.success:
|
||||
logger.info(
|
||||
"Temporal[task_complete] success=True final_text_len=%d",
|
||||
len(result.final_text),
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"Temporal[task_complete] success=False error=%r",
|
||||
result.error,
|
||||
)
|
||||
|
||||
# ── Workflow ──────────────────────────────────────────────────────────── #
|
||||
|
||||
@workflow.defn
|
||||
class MoleculeAIAgentWorkflow:
|
||||
"""Durable Temporal workflow for Molecule AI A2A agent task execution.
|
||||
|
||||
Sequences three activities that mirror the OTEL span hierarchy in
|
||||
``LangGraphA2AExecutor._core_execute()``:
|
||||
|
||||
task_receive → llm_call → task_complete
|
||||
|
||||
Each activity is a durable checkpoint: if the process crashes between
|
||||
activities, Temporal resumes from the last completed checkpoint on
|
||||
restart. If an activity fails (exception or timeout), Temporal can
|
||||
retry it according to the configured retry policy.
|
||||
"""
|
||||
|
||||
@workflow.run
|
||||
async def run(self, inp: AgentTaskInput) -> LLMResult:
|
||||
opts: dict[str, Any] = {
|
||||
"start_to_close_timeout": _ACTIVITY_START_TO_CLOSE_TIMEOUT,
|
||||
}
|
||||
|
||||
# Stage 1 — acknowledge receipt (lightweight checkpoint)
|
||||
await workflow.execute_activity(task_receive_activity, inp, **opts)
|
||||
|
||||
# Stage 2 — LLM execution (main work; retryable on crash/timeout)
|
||||
result: LLMResult = await workflow.execute_activity(
|
||||
llm_call_activity, inp, **opts
|
||||
)
|
||||
|
||||
# Stage 3 — record completion (lightweight checkpoint)
|
||||
await workflow.execute_activity(task_complete_activity, result, **opts)
|
||||
|
||||
return result
|
||||
|
||||
except ImportError:
|
||||
# temporalio not installed — the wrapper class below will gracefully fall
|
||||
# back to direct execution for every call.
|
||||
logger.debug(
|
||||
"Temporal: temporalio package not installed — "
|
||||
"durable execution disabled (add temporalio>=1.7.0 to requirements.txt)"
|
||||
)
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# TemporalWorkflowWrapper
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TemporalWorkflowWrapper:
|
||||
"""Wraps ``LangGraphA2AExecutor.execute()`` with Temporal durable execution.
|
||||
|
||||
The wrapper intercepts each ``execute()`` call and routes it through a
|
||||
``MoleculeAIAgentWorkflow`` Temporal workflow. If Temporal is unavailable
|
||||
for any reason, execution falls back transparently to the direct path
|
||||
(``executor._core_execute()``), so the A2A server never crashes due to
|
||||
Temporal issues.
|
||||
|
||||
Lifecycle
|
||||
---------
|
||||
1. ``create_wrapper()`` — instantiate and register the global singleton.
|
||||
2. ``await wrapper.start()`` — connect to Temporal, launch the background
|
||||
worker. No-op (with a log warning) if Temporal is unreachable.
|
||||
3. Normal operation — ``wrapper.run()`` is called from ``execute()``.
|
||||
4. ``await wrapper.stop()`` — cancel the background worker task on shutdown.
|
||||
|
||||
Co-located worker pattern
|
||||
-------------------------
|
||||
The Temporal worker runs as an asyncio background task in the **same event
|
||||
loop** as the A2A server. This means:
|
||||
- No separate worker process to manage.
|
||||
- Activities share the process's memory (registry access works).
|
||||
- Worker and server share the same asyncio event loop.
|
||||
|
||||
Env vars
|
||||
--------
|
||||
``TEMPORAL_HOST`` Temporal gRPC address, e.g. ``localhost:7233`` or
|
||||
``temporal.internal:7233``. Defaults to
|
||||
``localhost:7233``. If Temporal is not reachable at
|
||||
this address, the wrapper falls back to direct execution.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._host: str = os.environ.get("TEMPORAL_HOST", "localhost:7233")
|
||||
self._client: Optional[Any] = None
|
||||
self._worker: Optional[Any] = None
|
||||
self._worker_task: Optional[asyncio.Task] = None # type: ignore[type-arg]
|
||||
self._available: bool = False
|
||||
|
||||
# ── Lifecycle ─────────────────────────────────────────────────────────── #
|
||||
|
||||
async def start(self) -> None:
|
||||
"""Connect to Temporal and start the co-located background worker.
|
||||
|
||||
Safe to call multiple times (idempotent after first success).
|
||||
Never raises — logs a warning and returns on any failure.
|
||||
"""
|
||||
if not _TEMPORAL_AVAILABLE:
|
||||
logger.info(
|
||||
"Temporal: temporalio package not installed — "
|
||||
"all tasks will use direct execution. "
|
||||
"To enable durable execution: pip install temporalio>=1.7.0"
|
||||
)
|
||||
return
|
||||
|
||||
if self._available:
|
||||
return # already started
|
||||
|
||||
# Connect to the Temporal server
|
||||
try:
|
||||
self._client = await Client.connect(self._host) # type: ignore[name-defined]
|
||||
logger.info("Temporal: connected to %s", self._host)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Temporal: cannot connect to %s (%s) — "
|
||||
"all tasks will use direct execution (no durable state)",
|
||||
self._host,
|
||||
exc,
|
||||
)
|
||||
return
|
||||
|
||||
# Start the worker as an asyncio background task
|
||||
try:
|
||||
self._worker = Worker( # type: ignore[name-defined]
|
||||
self._client,
|
||||
task_queue=_TASK_QUEUE,
|
||||
workflows=[MoleculeAIAgentWorkflow], # type: ignore[name-defined]
|
||||
activities=[
|
||||
task_receive_activity, # type: ignore[name-defined]
|
||||
llm_call_activity, # type: ignore[name-defined]
|
||||
task_complete_activity, # type: ignore[name-defined]
|
||||
],
|
||||
)
|
||||
self._worker_task = asyncio.create_task(
|
||||
self._worker.run(),
|
||||
name="temporal-worker",
|
||||
)
|
||||
self._available = True
|
||||
logger.info(
|
||||
"Temporal: co-located worker started on task queue '%s'",
|
||||
_TASK_QUEUE,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Temporal: worker initialisation failed (%s) — "
|
||||
"falling back to direct execution",
|
||||
exc,
|
||||
)
|
||||
|
||||
async def stop(self) -> None:
|
||||
"""Gracefully stop the Temporal worker background task."""
|
||||
self._available = False
|
||||
if self._worker_task and not self._worker_task.done():
|
||||
self._worker_task.cancel()
|
||||
try:
|
||||
await self._worker_task
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
logger.info("Temporal: worker stopped")
|
||||
|
||||
# ── Public API ────────────────────────────────────────────────────────── #
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""Return ``True`` if Temporal is connected and the worker is running."""
|
||||
return self._available
|
||||
|
||||
async def run(
|
||||
self,
|
||||
executor: Any,
|
||||
context: Any,
|
||||
event_queue: Any,
|
||||
) -> None:
|
||||
"""Route one A2A task execution through a Temporal durable workflow.
|
||||
|
||||
Steps
|
||||
-----
|
||||
1. Build a serialisable ``AgentTaskInput`` from the A2A request context.
|
||||
2. Store non-serialisable state (executor, context, event_queue) in
|
||||
the in-process ``_task_registry`` keyed by task_id.
|
||||
3. Submit and await ``MoleculeAIAgentWorkflow`` on the Temporal server.
|
||||
4. Clean up the registry entry (always, via ``finally``).
|
||||
|
||||
Falls back to ``executor._core_execute()`` if:
|
||||
- Temporal is not available (``is_available()`` is False).
|
||||
- Input extraction fails.
|
||||
- The workflow raises any exception.
|
||||
|
||||
This guarantees that the A2A client always receives a response even
|
||||
when Temporal is misconfigured or temporarily unreachable.
|
||||
"""
|
||||
if not self._available or self._client is None:
|
||||
# Temporal unavailable — silent direct fallback
|
||||
await executor._core_execute(context, event_queue)
|
||||
return
|
||||
|
||||
task_id = getattr(context, "task_id", None) or str(uuid.uuid4())
|
||||
context_id = getattr(context, "context_id", None) or str(uuid.uuid4())
|
||||
|
||||
# Build serialisable AgentTaskInput
|
||||
try:
|
||||
from adapters.shared_runtime import (
|
||||
extract_history as _extract_history,
|
||||
extract_message_text,
|
||||
)
|
||||
|
||||
user_input = extract_message_text(context) or ""
|
||||
raw_history = _extract_history(context)
|
||||
# Convert (role, content) tuples → [role, content] lists (JSON-safe)
|
||||
history: list = [list(pair) for pair in raw_history]
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Temporal: failed to extract serialisable task input (%s) — "
|
||||
"falling back to direct execution",
|
||||
exc,
|
||||
)
|
||||
await executor._core_execute(context, event_queue)
|
||||
return
|
||||
|
||||
workspace_id_env = os.environ.get("WORKSPACE_ID", "unknown")
|
||||
|
||||
# Issue #837: query the latest checkpoint for this workspace.
|
||||
# If a previous workflow crashed mid-step, inject the last known
|
||||
# step into the history so the agent is aware of its prior state.
|
||||
# Non-fatal: a missing or 404 response means starting fresh.
|
||||
last_ckpt = await _fetch_latest_checkpoint(workspace_id_env)
|
||||
if last_ckpt:
|
||||
step_name = last_ckpt.get("step_name", "unknown")
|
||||
workflow_id_ckpt = last_ckpt.get("workflow_id", "")
|
||||
completed_at = last_ckpt.get("completed_at", "")
|
||||
ckpt_note = (
|
||||
f"[SYSTEM: This workspace was previously executing workflow "
|
||||
f"'{workflow_id_ckpt}'. The last recorded step was '{step_name}' "
|
||||
f"(completed at {completed_at}). If the current task is a "
|
||||
f"continuation of that workflow, resume from this point. "
|
||||
f"Otherwise ignore this context and start fresh.]"
|
||||
)
|
||||
# Prepend as a synthetic context entry so the agent sees it at the
|
||||
# start of its history — before any user messages for this task.
|
||||
history = [["system", ckpt_note]] + history
|
||||
logger.info(
|
||||
"Temporal: injecting checkpoint context task_id=%s last_step=%s wf=%s",
|
||||
task_id,
|
||||
step_name,
|
||||
workflow_id_ckpt,
|
||||
)
|
||||
|
||||
inp = AgentTaskInput(
|
||||
task_id=task_id,
|
||||
context_id=context_id,
|
||||
user_input=user_input,
|
||||
model=getattr(executor, "_model", "unknown"),
|
||||
workspace_id=workspace_id_env,
|
||||
history=history,
|
||||
)
|
||||
|
||||
# Register non-serialisable in-process state for activities to access
|
||||
_task_registry[task_id] = {
|
||||
"executor": executor,
|
||||
"context": context,
|
||||
"event_queue": event_queue,
|
||||
"final_text": "",
|
||||
}
|
||||
|
||||
try:
|
||||
logger.info(
|
||||
"Temporal: starting workflow molecule-%s on queue '%s'",
|
||||
task_id,
|
||||
_TASK_QUEUE,
|
||||
)
|
||||
await self._client.execute_workflow(
|
||||
MoleculeAIAgentWorkflow.run, # type: ignore[name-defined]
|
||||
inp,
|
||||
id=f"molecule-{task_id}",
|
||||
task_queue=_TASK_QUEUE,
|
||||
execution_timeout=_WORKFLOW_EXECUTION_TIMEOUT,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error(
|
||||
"Temporal: workflow molecule-%s failed (%s) — "
|
||||
"falling back to direct execution so client receives a response",
|
||||
task_id,
|
||||
exc,
|
||||
exc_info=True,
|
||||
)
|
||||
# Direct fallback ensures the SSE client is never left hanging
|
||||
await executor._core_execute(context, event_queue)
|
||||
finally:
|
||||
_task_registry.pop(task_id, None)
|
||||
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
# Module-level singleton helpers
|
||||
# Used by a2a_executor.py and main.py
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
_global_wrapper: Optional[TemporalWorkflowWrapper] = None
|
||||
|
||||
|
||||
def get_wrapper() -> Optional[TemporalWorkflowWrapper]:
|
||||
"""Return the global ``TemporalWorkflowWrapper``, or ``None`` if not set.
|
||||
|
||||
Called from ``LangGraphA2AExecutor.execute()`` on every request.
|
||||
Returns ``None`` before ``create_wrapper()`` is called (direct-execution mode).
|
||||
"""
|
||||
return _global_wrapper
|
||||
|
||||
|
||||
def create_wrapper() -> TemporalWorkflowWrapper:
|
||||
"""Create (or return the existing) global ``TemporalWorkflowWrapper``.
|
||||
|
||||
Idempotent — safe to call multiple times. Call ``await wrapper.start()``
|
||||
after this to connect to Temporal and launch the background worker.
|
||||
|
||||
Example (in main.py)::
|
||||
|
||||
from builtin_tools.temporal_workflow import create_wrapper as create_temporal_wrapper
|
||||
temporal_wrapper = create_temporal_wrapper()
|
||||
await temporal_wrapper.start() # connects + starts worker
|
||||
try:
|
||||
await server.serve()
|
||||
finally:
|
||||
await temporal_wrapper.stop()
|
||||
"""
|
||||
global _global_wrapper
|
||||
if _global_wrapper is None:
|
||||
_global_wrapper = TemporalWorkflowWrapper()
|
||||
return _global_wrapper
|
||||
@@ -1,57 +0,0 @@
|
||||
"""Helpers for building / mutating the workspace ``AgentCard``.
|
||||
|
||||
Kept as their own module so the behavior is unit-testable without booting
|
||||
the whole runtime (``main.py`` is ``# pragma: no cover``).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Iterable
|
||||
|
||||
from a2a.types import AgentCard, AgentSkill
|
||||
|
||||
|
||||
def enrich_card_skills(card: AgentCard, loaded_skills: Iterable | None) -> bool:
|
||||
"""Replace ``card.skills`` with rich metadata from the adapter's loaded
|
||||
skills, in place. Pairs with PR #2756: the card was built up front from
|
||||
static ``config.skills`` names so /.well-known/agent-card.json could
|
||||
serve before ``adapter.setup()`` finishes; this swaps in the richer
|
||||
descriptions/tags/examples that ``setup()``'s skill loader produces.
|
||||
|
||||
Returns ``True`` on swap, ``False`` when the swap was skipped or
|
||||
failed. Failure cases:
|
||||
* ``loaded_skills`` is None / empty — caller didn't load any.
|
||||
* Any element doesn't expose ``.metadata.{id,name,description,tags,examples}``
|
||||
(a future adapter that doesn't follow the canonical shape).
|
||||
|
||||
Failures DO NOT raise — a malformed ``loaded_skills`` shape would
|
||||
otherwise propagate to ``main.py``'s outer ``except Exception``,
|
||||
silently degrading an OK boot to the not-configured state. Static
|
||||
stubs from ``config.skills`` stay in place; setup() already
|
||||
succeeded, the agent works, only the card's skill enrichment is
|
||||
degraded. Operator sees a clear log line; tests assert this
|
||||
distinction.
|
||||
"""
|
||||
if not loaded_skills:
|
||||
return False
|
||||
|
||||
try:
|
||||
rich = [
|
||||
AgentSkill(
|
||||
id=skill.metadata.id,
|
||||
name=skill.metadata.name,
|
||||
description=skill.metadata.description,
|
||||
tags=skill.metadata.tags,
|
||||
examples=skill.metadata.examples,
|
||||
)
|
||||
for skill in loaded_skills
|
||||
]
|
||||
except Exception as enrich_err: # noqa: BLE001
|
||||
print(
|
||||
f"Warning: skill metadata enrichment failed (keeping static "
|
||||
f"stubs from config.skills): {type(enrich_err).__name__}: {enrich_err}",
|
||||
flush=True,
|
||||
)
|
||||
return False
|
||||
|
||||
card.skills = rich
|
||||
return True
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user