Merge remote-tracking branch 'origin/staging' into docs/auto-promote-staging-prereq-comment
# Conflicts: # .github/workflows/auto-promote-staging.yml
This commit is contained in:
commit
07a17c2e59
80
.github/dependabot.yml
vendored
Normal file
80
.github/dependabot.yml
vendored
Normal file
@ -0,0 +1,80 @@
|
||||
# Dependabot — auto-bump pinned dependencies.
|
||||
#
|
||||
# Why this exists:
|
||||
#
|
||||
# All `uses:` references in .github/workflows/*.yml are pinned to commit
|
||||
# SHAs (with `# v<N>` comments for human readability) instead of mutable
|
||||
# tags like `@v4`. Tag pinning is a known supply-chain risk: a maintainer
|
||||
# (or compromised maintainer account) can repoint `@v4` to malicious code
|
||||
# and our pipelines silently pull it. SHA pinning closes that risk.
|
||||
#
|
||||
# But SHA pinning has a maintenance cost: each upstream legitimate fix
|
||||
# requires manually finding + bumping the SHA. Dependabot for Actions
|
||||
# closes that gap by opening PRs to bump pinned SHAs whenever upstream
|
||||
# tags a new version. Reviewer evaluates the bump like any other
|
||||
# dependency PR.
|
||||
#
|
||||
# Combined: SHA pinning gives us security, Dependabot keeps us current.
|
||||
|
||||
version: 2
|
||||
updates:
|
||||
# GitHub Actions — every workflow file under .github/workflows/.
|
||||
# Weekly cadence is enough for a CI surface this size; the supply-
|
||||
# chain attack window is "minutes between repoint and pull," and
|
||||
# weekly auto-bumps don't help with zero-days regardless. The point
|
||||
# is to pull in non-zero-day fixes without operator effort, not to
|
||||
# be real-time.
|
||||
- package-ecosystem: github-actions
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: weekly
|
||||
open-pull-requests-limit: 5
|
||||
labels:
|
||||
- dependencies
|
||||
- github-actions
|
||||
commit-message:
|
||||
prefix: chore(deps)
|
||||
include: scope
|
||||
|
||||
# Go module — workspace-server. Bumps go.mod deps via PR weekly.
|
||||
- package-ecosystem: gomod
|
||||
directory: "/workspace-server"
|
||||
schedule:
|
||||
interval: weekly
|
||||
open-pull-requests-limit: 5
|
||||
labels:
|
||||
- dependencies
|
||||
- go
|
||||
commit-message:
|
||||
prefix: chore(deps)
|
||||
include: scope
|
||||
|
||||
# npm — canvas (Next.js bundle). Largest dep tree in this repo;
|
||||
# weekly cadence keeps the security surface fresh without flooding
|
||||
# the queue. open-pull-requests-limit: 10 because npm churns more
|
||||
# than the others.
|
||||
- package-ecosystem: npm
|
||||
directory: "/canvas"
|
||||
schedule:
|
||||
interval: weekly
|
||||
open-pull-requests-limit: 10
|
||||
labels:
|
||||
- dependencies
|
||||
- npm
|
||||
commit-message:
|
||||
prefix: chore(deps)
|
||||
include: scope
|
||||
|
||||
# Python — workspace runtime requirements. Pip/requirements.txt-
|
||||
# backed rather than pyproject.toml; Dependabot supports both.
|
||||
- package-ecosystem: pip
|
||||
directory: "/workspace"
|
||||
schedule:
|
||||
interval: weekly
|
||||
open-pull-requests-limit: 5
|
||||
labels:
|
||||
- dependencies
|
||||
- python
|
||||
commit-message:
|
||||
prefix: chore(deps)
|
||||
include: scope
|
||||
205
.github/workflows/auto-promote-on-e2e.yml
vendored
205
.github/workflows/auto-promote-on-e2e.yml
vendored
@ -1,31 +1,68 @@
|
||||
name: Auto-promote :latest on E2E green
|
||||
name: Auto-promote :latest after main image build
|
||||
|
||||
# Retags `ghcr.io/molecule-ai/{platform,platform-tenant}:staging-<sha>`
|
||||
# → `:latest` whenever E2E Staging SaaS passes for a `main` push.
|
||||
# → `:latest` after either the image build or E2E completes on a `main`
|
||||
# push, gated on E2E Staging SaaS not being red for that SHA.
|
||||
#
|
||||
# This is the doc-aligned alternative to the (deferred) Phase 2 canary
|
||||
# fleet — staging E2E catches ~90% of what canary would catch at 0%
|
||||
# ongoing infra cost. See `molecule-controlplane/docs/canary-tenants.md`
|
||||
# section "Do we actually need canary right now?" — recommended
|
||||
# sequencing for the current scale (≤20 paying tenants).
|
||||
# Why two triggers:
|
||||
#
|
||||
# Why a separate workflow rather than folding into e2e-staging-saas.yml:
|
||||
# - Keeps test concerns separate from release concerns.
|
||||
# - Disabling promote (e.g. during an incident) is one toggle, not an
|
||||
# edit to the long E2E workflow file.
|
||||
# - When Phase 2 canary work eventually lands, the canary path can
|
||||
# replace this file's trigger without touching the E2E workflow.
|
||||
# `publish-workspace-server-image` and `e2e-staging-saas` are both
|
||||
# paths-filtered, but with DIFFERENT path sets:
|
||||
#
|
||||
# Why trigger on `main` only:
|
||||
# - `:latest` is what prod tenants pull. We only want SHAs that have
|
||||
# reached `main` (via auto-promote-staging) to advance `:latest`.
|
||||
# - Triggering on staging would let a staging-only revert advance
|
||||
# `:latest` to a SHA that never reaches `main`, breaking the
|
||||
# "production runs what's on `main`" invariant.
|
||||
# publish-workspace-server-image:
|
||||
# workspace-server/**, canvas/**, manifest.json
|
||||
#
|
||||
# e2e-staging-saas (full lifecycle):
|
||||
# workspace-server/internal/handlers/{registry,workspace_provision,
|
||||
# a2a_proxy}.go, workspace-server/internal/middleware/**,
|
||||
# workspace-server/internal/provisioner/**, tests/e2e/test_staging_full_saas.sh
|
||||
#
|
||||
# The E2E set is a strict SUBSET of the publish set. So:
|
||||
# - canvas/** changes → publish fires, E2E does not
|
||||
# - workspace-server/cmd/** changes → publish fires, E2E does not
|
||||
# - workspace-server/internal/sweep/** → publish fires, E2E does not
|
||||
#
|
||||
# The previous version triggered ONLY on E2E completion, which meant
|
||||
# non-E2E-path changes (canvas, cmd, sweep, etc.) rebuilt the image
|
||||
# but never advanced `:latest`. Result: as of 2026-04-28 this workflow
|
||||
# had run zero times since merge despite eight main pushes — `:latest`
|
||||
# was ~7 hours / 9 PRs behind main with no human realising. See
|
||||
# `molecule-core` Slack discussion 2026-04-28.
|
||||
#
|
||||
# Adding `publish-workspace-server-image` as a second trigger closes
|
||||
# the gap: any image rebuild on main eligibly advances `:latest`.
|
||||
#
|
||||
# Why E2E remains a kill-switch (not the trigger):
|
||||
#
|
||||
# When E2E DID run for this SHA and ended red, we abort — `:latest`
|
||||
# stays on the prior known-good digest. When E2E didn't run (paths
|
||||
# filtered out), we proceed: pre-merge gates already validated this
|
||||
# SHA on staging via auto-promote-staging requiring CI + E2E Canvas +
|
||||
# E2E API + CodeQL all green. Image content for non-E2E-paths
|
||||
# (canvas, cmd, sweep) is exercised by those staging gates.
|
||||
#
|
||||
# Why `main` only:
|
||||
#
|
||||
# `:latest` is what prod tenants pull. We only want SHAs that have
|
||||
# reached main (via auto-promote-staging) to advance `:latest`.
|
||||
# Triggering on staging would let a staging-only revert advance
|
||||
# `:latest` to a SHA that never reaches main, breaking the "production
|
||||
# runs what's on main" invariant.
|
||||
#
|
||||
# Idempotency:
|
||||
#
|
||||
# When a SHA touches paths that match BOTH publish and E2E, both
|
||||
# workflows fire and complete. Both trigger this workflow on
|
||||
# completion → two runs race. Both retag `:staging-<sha>` →
|
||||
# `:latest`. crane tag is idempotent (re-tagging the same digest is a
|
||||
# no-op), so the second run is harmless. concurrency group serializes
|
||||
# them anyway.
|
||||
|
||||
on:
|
||||
workflow_run:
|
||||
workflows: ['E2E Staging SaaS (full lifecycle)']
|
||||
workflows:
|
||||
- 'E2E Staging SaaS (full lifecycle)'
|
||||
- 'publish-workspace-server-image'
|
||||
types: [completed]
|
||||
branches: [main]
|
||||
workflow_dispatch:
|
||||
@ -39,15 +76,22 @@ permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
concurrency:
|
||||
# Serialize promotes per-SHA so the publish+E2E both-fired race lands
|
||||
# cleanly. Different SHAs can promote in parallel.
|
||||
group: auto-promote-latest-${{ github.event.workflow_run.head_sha || github.event.inputs.sha || github.sha }}
|
||||
cancel-in-progress: false
|
||||
|
||||
env:
|
||||
IMAGE_NAME: ghcr.io/molecule-ai/platform
|
||||
TENANT_IMAGE_NAME: ghcr.io/molecule-ai/platform-tenant
|
||||
|
||||
jobs:
|
||||
promote:
|
||||
# Skip if E2E failed — `:latest` stays on the prior known-good
|
||||
# digest. Manual dispatch always proceeds (the operator already
|
||||
# decided to promote).
|
||||
# Proceed if upstream succeeded OR manual dispatch. Upstream-failure
|
||||
# paths are filtered here; the E2E-was-red kill-switch lives in the
|
||||
# gate-check step below (covers the case where upstream is publish
|
||||
# success but E2E for the same SHA failed).
|
||||
if: |
|
||||
github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success')
|
||||
@ -65,9 +109,112 @@ jobs:
|
||||
echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
|
||||
echo "full=${FULL}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- uses: imjasonh/setup-crane@v0.4
|
||||
- name: Gate — E2E Staging SaaS state for this SHA
|
||||
# When upstream IS E2E success, we know it's green (filtered by
|
||||
# the job-level `if` already). When upstream is publish, look up
|
||||
# E2E state for the same SHA. Four buckets:
|
||||
#
|
||||
# - completed/success: E2E confirmed safe → proceed
|
||||
# - completed/failure|cancelled|timed_out: E2E found a
|
||||
# regression → ABORT (exit 1), `:latest` stays put
|
||||
# - in_progress|queued|requested: E2E is RACING with publish
|
||||
# for a runtime-touching SHA. publish typically completes
|
||||
# ~5-10min before E2E (~10-15min). If we promote on the
|
||||
# publish signal here, a later E2E failure can't roll back
|
||||
# `:latest` — it'd already be wrongly advanced. So we DEFER:
|
||||
# skip subsequent steps (proceed=false) and let E2E's own
|
||||
# completion event re-fire this workflow, which then takes
|
||||
# the upstream-is-E2E path. exit 0 so the run shows as
|
||||
# success rather than a noisy fake-failure.
|
||||
# - none/none: E2E was paths-filtered out for this SHA (the
|
||||
# change touched canvas/cmd/sweep/etc. — paths covered by
|
||||
# publish but not by E2E). pre-merge gates on staging
|
||||
# already validated this SHA → proceed.
|
||||
#
|
||||
# Manual dispatch skips this check — operator override.
|
||||
id: gate
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
REPO: ${{ github.repository }}
|
||||
SHA: ${{ steps.sha.outputs.full }}
|
||||
UPSTREAM_NAME: ${{ github.event.workflow_run.name }}
|
||||
EVENT_NAME: ${{ github.event_name }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
if [ "$EVENT_NAME" = "workflow_dispatch" ]; then
|
||||
echo "proceed=true" >> "$GITHUB_OUTPUT"
|
||||
echo "::notice::Manual dispatch — skipping E2E gate (operator override)"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [ "$UPSTREAM_NAME" = "E2E Staging SaaS (full lifecycle)" ]; then
|
||||
echo "proceed=true" >> "$GITHUB_OUTPUT"
|
||||
echo "::notice::Upstream is E2E itself (success per job-level if) — gate trivially satisfied"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Upstream is publish-workspace-server-image. Check E2E state.
|
||||
RESULT=$(gh run list \
|
||||
--repo "$REPO" \
|
||||
--workflow e2e-staging-saas.yml \
|
||||
--branch main \
|
||||
--commit "$SHA" \
|
||||
--limit 1 \
|
||||
--json status,conclusion \
|
||||
--jq '.[0] | "\(.status)/\(.conclusion // "none")"' \
|
||||
2>/dev/null || echo "none/none")
|
||||
|
||||
echo "E2E Staging SaaS for ${SHA:0:7}: $RESULT"
|
||||
|
||||
case "$RESULT" in
|
||||
completed/success)
|
||||
echo "proceed=true" >> "$GITHUB_OUTPUT"
|
||||
echo "::notice::E2E green for this SHA — proceeding with promote"
|
||||
;;
|
||||
completed/failure|completed/cancelled|completed/timed_out)
|
||||
echo "proceed=false" >> "$GITHUB_OUTPUT"
|
||||
{
|
||||
echo "## ❌ Auto-promote aborted — E2E Staging SaaS failed"
|
||||
echo
|
||||
echo "E2E Staging SaaS for \`${SHA:0:7}\`: \`$RESULT\`"
|
||||
echo "\`:latest\` stays on the prior known-good digest."
|
||||
echo
|
||||
echo "If the failure was a flake, manually dispatch this workflow with the same sha to override."
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
exit 1
|
||||
;;
|
||||
in_progress/*|queued/*|requested/*|waiting/*|pending/*)
|
||||
echo "proceed=false" >> "$GITHUB_OUTPUT"
|
||||
{
|
||||
echo "## ⏳ Auto-promote deferred — E2E Staging SaaS still running"
|
||||
echo
|
||||
echo "Publish completed before E2E for \`${SHA:0:7}\` (state: \`$RESULT\`)."
|
||||
echo "Skipping retag here — E2E's own completion event will re-fire this workflow."
|
||||
echo "If E2E ends green, that run promotes \`:latest\`. If red, it aborts."
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
;;
|
||||
none/none)
|
||||
echo "proceed=true" >> "$GITHUB_OUTPUT"
|
||||
echo "::notice::E2E paths-filtered out for this SHA — pre-merge staging gates carry"
|
||||
;;
|
||||
*)
|
||||
echo "proceed=false" >> "$GITHUB_OUTPUT"
|
||||
{
|
||||
echo "## ❓ Auto-promote aborted — unexpected E2E state"
|
||||
echo
|
||||
echo "E2E Staging SaaS for \`${SHA:0:7}\`: \`$RESULT\` (unhandled)"
|
||||
echo "Manual investigation needed; re-dispatch with the same sha once resolved."
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
- if: steps.gate.outputs.proceed == 'true'
|
||||
uses: imjasonh/setup-crane@31b88efe9de28ae0ffa220711af4b60be9435f6e # v0.4
|
||||
|
||||
- name: GHCR login
|
||||
if: steps.gate.outputs.proceed == 'true'
|
||||
run: |
|
||||
echo "${{ secrets.GITHUB_TOKEN }}" | \
|
||||
crane auth login ghcr.io -u "${{ github.actor }}" --password-stdin
|
||||
@ -76,35 +223,39 @@ jobs:
|
||||
# Better to fail fast with a clear message than to half-tag
|
||||
# (platform retagged but platform-tenant missing → tenants pull
|
||||
# a stale image).
|
||||
if: steps.gate.outputs.proceed == 'true'
|
||||
run: |
|
||||
set -euo pipefail
|
||||
for img in "${IMAGE_NAME}" "${TENANT_IMAGE_NAME}"; do
|
||||
tag="${img}:staging-${{ steps.sha.outputs.short }}"
|
||||
if ! crane manifest "$tag" >/dev/null 2>&1; then
|
||||
echo "::error::Missing tag: $tag"
|
||||
echo "::error::publish-workspace-server-image must complete on this SHA before auto-promote-on-e2e can retag :latest."
|
||||
echo "::error::publish-workspace-server-image must complete on this SHA before auto-promote can retag :latest."
|
||||
exit 1
|
||||
fi
|
||||
echo " ok: $tag exists"
|
||||
done
|
||||
|
||||
- name: Retag platform :staging-<sha> → :latest
|
||||
if: steps.gate.outputs.proceed == 'true'
|
||||
run: |
|
||||
crane tag "${IMAGE_NAME}:staging-${{ steps.sha.outputs.short }}" latest
|
||||
|
||||
- name: Retag tenant :staging-<sha> → :latest
|
||||
if: steps.gate.outputs.proceed == 'true'
|
||||
run: |
|
||||
crane tag "${TENANT_IMAGE_NAME}:staging-${{ steps.sha.outputs.short }}" latest
|
||||
|
||||
- name: Summary
|
||||
if: steps.gate.outputs.proceed == 'true'
|
||||
run: |
|
||||
{
|
||||
echo "## E2E green → :latest promoted"
|
||||
echo "## :latest promoted to ${{ steps.sha.outputs.short }}"
|
||||
echo
|
||||
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
||||
echo "- Trigger: manual dispatch"
|
||||
else
|
||||
echo "- Upstream E2E run: ${{ github.event.workflow_run.html_url }}"
|
||||
echo "- Upstream: \`${{ github.event.workflow_run.name }}\` ([run](${{ github.event.workflow_run.html_url }}))"
|
||||
fi
|
||||
echo "- platform:staging-${{ steps.sha.outputs.short }} → :latest"
|
||||
echo "- platform-tenant:staging-${{ steps.sha.outputs.short }} → :latest"
|
||||
|
||||
125
.github/workflows/auto-promote-staging.yml
vendored
125
.github/workflows/auto-promote-staging.yml
vendored
@ -1,27 +1,46 @@
|
||||
name: Auto-promote staging → main
|
||||
|
||||
# Fires after any of the staging-branch quality gates complete. When ALL
|
||||
# required gates are green on the same staging SHA, fast-forwards `main`
|
||||
# to that SHA automatically — closing the gap that historically let
|
||||
# features sit on staging for weeks waiting for a bulk promotion PR
|
||||
# (see molecule-core#1496 for the 1172-commit example).
|
||||
# required gates are green on the same staging SHA, opens (or re-uses)
|
||||
# a PR `staging → main` and enables auto-merge so the merge queue lands
|
||||
# it. Closes the gap that historically let features sit on staging for
|
||||
# weeks waiting for a bulk promotion PR (see molecule-core#1496 for the
|
||||
# 1172-commit example).
|
||||
#
|
||||
# 2026-04-28 rewrite (PR #142): the previous version did a direct
|
||||
# `git merge --ff-only origin staging && git push origin main`. That
|
||||
# breaks against main's branch-protection ruleset, which requires
|
||||
# status checks "set by the expected GitHub apps" — direct pushes
|
||||
# can't satisfy that condition (only PR merges through the queue can).
|
||||
# The workflow was failing every tick with:
|
||||
# remote: error: GH006: Protected branch update failed for refs/heads/main.
|
||||
# remote: - Required status checks ... were not set by the expected GitHub apps.
|
||||
# Fix: mirror the PR-based pattern from auto-sync-main-to-staging.yml
|
||||
# (the reverse-direction sync, fixed in #2234 for the same reason).
|
||||
# Both directions now use the same merge-queue path that humans use,
|
||||
# no special-case bypass.
|
||||
#
|
||||
# Safety model:
|
||||
# - Runs ONLY on workflow_run events for the staging branch.
|
||||
# - Requires EVERY named gate workflow to have the same head_sha and
|
||||
# all be `conclusion == success`. If any of them is red, skipped,
|
||||
# cancelled, or pending, we abort (stay on the current main).
|
||||
# - Uses --ff-only: refuses to advance main if main has diverged from
|
||||
# the staging history (e.g. a hotfix landed directly on main). In
|
||||
# that case a human resolves the fork.
|
||||
# - Writes a commit summary so the promote shows up in git log as a
|
||||
# deliberate act, not a stealth move.
|
||||
# - The PR base=main head=staging path lets GitHub itself enforce
|
||||
# branch protection. If main has diverged from staging or required
|
||||
# checks aren't satisfied, the merge queue declines the PR — no
|
||||
# need for a manual ff-only ancestry check here.
|
||||
# - Loop safety: the auto-sync-main-to-staging workflow fires when
|
||||
# main lands the auto-promote PR, but its merge into staging is by
|
||||
# GITHUB_TOKEN which doesn't trigger downstream workflow_run events
|
||||
# (GitHub Actions safety). So this workflow doesn't re-fire from
|
||||
# its own promote landing.
|
||||
#
|
||||
# **Initial rollout:** ship this file but leave the `enabled` input set
|
||||
# such that nothing auto-promotes until staging CI has been reliably
|
||||
# green for a few days. Toggle via repo variable `AUTO_PROMOTE_ENABLED`.
|
||||
# Toggle via repo variable AUTO_PROMOTE_ENABLED (true/unset). When
|
||||
# unset, the workflow logs what it would have done but doesn't open
|
||||
# the PR — useful for dry-running the gate logic without surfacing
|
||||
# a noisy PR while staging CI is still flaky.
|
||||
#
|
||||
# **One-time repo setting (load-bearing):** this workflow opens a
|
||||
# **One-time repo setting (load-bearing):** this workflow opens the
|
||||
# staging→main PR via `gh pr create` using the default GITHUB_TOKEN.
|
||||
# Since GitHub's 2022 default change, that token cannot create or
|
||||
# approve PRs unless the repo opts in. The toggle is at:
|
||||
@ -56,6 +75,7 @@ on:
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
|
||||
jobs:
|
||||
check-all-gates-green:
|
||||
@ -152,14 +172,14 @@ jobs:
|
||||
set -eu
|
||||
# Repo variable AUTO_PROMOTE_ENABLED=true flips this on. While
|
||||
# it's unset, the workflow dry-runs (logs what it would have
|
||||
# done) but doesn't actually push to main. Set the variable in
|
||||
# done) but doesn't open the promote PR. Set the variable in
|
||||
# Settings → Secrets and variables → Actions → Variables.
|
||||
if [ "${AUTO_PROMOTE_ENABLED:-}" != "true" ] && [ "${FORCE_INPUT:-false}" != "true" ]; then
|
||||
{
|
||||
echo "## ⏸ Auto-promote disabled"
|
||||
echo
|
||||
echo "Repo variable \`AUTO_PROMOTE_ENABLED\` is not set to \`true\`."
|
||||
echo "All gates are green on staging; would have promoted to \`main\`."
|
||||
echo "All gates are green on staging; would have opened a promote PR to \`main\`."
|
||||
echo
|
||||
echo "To enable: Settings → Secrets and variables → Actions → Variables → \`AUTO_PROMOTE_ENABLED=true\`."
|
||||
echo "To test once manually: workflow_dispatch with \`force=true\`."
|
||||
@ -168,50 +188,55 @@ jobs:
|
||||
exit 0
|
||||
fi
|
||||
|
||||
- name: Checkout main
|
||||
if: ${{ vars.AUTO_PROMOTE_ENABLED == 'true' || github.event.inputs.force == 'true' }}
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: main
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Fast-forward main → staging HEAD
|
||||
- name: Open (or reuse) staging → main promote PR + enable auto-merge
|
||||
if: ${{ vars.AUTO_PROMOTE_ENABLED == 'true' || github.event.inputs.force == 'true' }}
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
REPO: ${{ github.repository }}
|
||||
TARGET_SHA: ${{ needs.check-all-gates-green.outputs.head_sha }}
|
||||
run: |
|
||||
set -eu
|
||||
git config user.name "github-actions[bot]"
|
||||
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
||||
set -euo pipefail
|
||||
|
||||
git fetch origin staging
|
||||
git fetch origin main
|
||||
# Look for an existing open promote PR (idempotent on re-run
|
||||
# of the workflow). The PR's head IS the staging branch — the
|
||||
# whole point is "advance main to staging's tip", so we don't
|
||||
# need a per-SHA branch like auto-sync-main-to-staging uses.
|
||||
PR_NUM=$(gh pr list --repo "$REPO" \
|
||||
--base main --head staging --state open \
|
||||
--json number --jq '.[0].number // ""')
|
||||
|
||||
# Refuse to advance main if it's diverged from staging history.
|
||||
# Someone landed a commit directly on main that's not on
|
||||
# staging → human needs to decide how to reconcile.
|
||||
if ! git merge-base --is-ancestor "$(git rev-parse origin/main)" "$TARGET_SHA"; then
|
||||
{
|
||||
echo "## ❌ Auto-promote refused — main has diverged"
|
||||
echo
|
||||
echo "\`main\` (\`$(git rev-parse --short origin/main)\`) is not an ancestor of staging (\`${TARGET_SHA:0:7}\`)."
|
||||
echo "Someone committed directly to main or the histories forked."
|
||||
echo
|
||||
echo "Resolve manually: merge main into staging, get CI green on the merged commit,"
|
||||
echo "then the auto-promote will succeed on the next run."
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
exit 1
|
||||
if [ -z "$PR_NUM" ]; then
|
||||
TITLE="staging → main: auto-promote ${TARGET_SHA:0:7}"
|
||||
BODY_FILE=$(mktemp)
|
||||
cat > "$BODY_FILE" <<EOFBODY
|
||||
Automated promotion of \`staging\` (\`${TARGET_SHA:0:8}\`) to \`main\`. All required staging gates green at this SHA: CI, E2E Staging Canvas, E2E API Smoke, CodeQL.
|
||||
|
||||
This PR is auto-generated by \`.github/workflows/auto-promote-staging.yml\` whenever every required gate completes green on the same staging SHA. It exists because main's branch protection requires status checks "set by the expected GitHub apps" — direct \`git push\` from a workflow can't satisfy that, only PR merges through the queue can.
|
||||
|
||||
Merge queue lands this; no human action needed unless gates fail. Reverse-direction sync (the merge commit on main → staging) is handled by \`auto-sync-main-to-staging.yml\`.
|
||||
EOFBODY
|
||||
PR_URL=$(gh pr create --repo "$REPO" \
|
||||
--base main --head staging \
|
||||
--title "$TITLE" \
|
||||
--body-file "$BODY_FILE")
|
||||
PR_NUM=$(echo "$PR_URL" | grep -oE '[0-9]+$' | tail -1)
|
||||
rm -f "$BODY_FILE"
|
||||
echo "::notice::Opened PR #${PR_NUM}"
|
||||
else
|
||||
echo "::notice::Re-using existing promote PR #${PR_NUM}"
|
||||
fi
|
||||
|
||||
# Fast-forward main to the target SHA.
|
||||
git checkout main
|
||||
git merge --ff-only "$TARGET_SHA"
|
||||
git push origin main
|
||||
# Enable auto-merge — the merge queue picks it up once
|
||||
# required gates are green on the merge_group ref.
|
||||
if ! gh pr merge "$PR_NUM" --repo "$REPO" --auto --merge 2>&1; then
|
||||
echo "::warning::Failed to enable auto-merge on PR #${PR_NUM} — operator may need to merge manually."
|
||||
fi
|
||||
|
||||
{
|
||||
echo "## ✅ Auto-promoted main → ${TARGET_SHA:0:7}"
|
||||
echo "## ✅ Auto-promote PR opened"
|
||||
echo
|
||||
echo "All gate workflows green on staging at this SHA."
|
||||
echo "\`main\` fast-forwarded to match."
|
||||
echo "- Source: staging at \`${TARGET_SHA:0:8}\`"
|
||||
echo "- PR: #${PR_NUM}"
|
||||
echo
|
||||
echo "Merge queue lands the PR once required gates are green; no human action needed unless gates fail."
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
|
||||
178
.github/workflows/auto-sync-main-to-staging.yml
vendored
178
.github/workflows/auto-sync-main-to-staging.yml
vendored
@ -17,35 +17,45 @@ name: Auto-sync main → staging
|
||||
# bridges). Each time the bridge needed update-branch + a re-CI
|
||||
# round before merging. Operationally annoying and avoidable.
|
||||
#
|
||||
# This workflow closes the gap automatically:
|
||||
# Architecture:
|
||||
#
|
||||
# 1. Push to main fires (regardless of source: auto-promote, UI
|
||||
# merge, API merge, direct push).
|
||||
# 2. Check whether main is already in staging's ancestry — if
|
||||
# yes, no-op (auto-promote-staging already kept them in sync
|
||||
# via fast-forward).
|
||||
# 3. If not, try fast-forward staging to main first (works when
|
||||
# staging hasn't diverged with its own commits).
|
||||
# 4. If ff fails (staging has commits main doesn't — feature work
|
||||
# in flight), do a real merge with a "chore: sync" commit so
|
||||
# staging absorbs main's tip while keeping its own history.
|
||||
# 5. Push staging.
|
||||
# This repo's `staging` branch is protected by a `merge_queue`
|
||||
# ruleset (id 15500102) that blocks ALL direct pushes — no bypass
|
||||
# even for org admins or the GitHub Actions integration. Direct
|
||||
# `git push origin staging` returns GH013. So instead of pushing
|
||||
# directly, this workflow:
|
||||
#
|
||||
# 1. Checks if main is already in staging's ancestry → no-op.
|
||||
# 2. Creates an `auto-sync/main-<sha>` branch from staging.
|
||||
# 3. Tries `git merge --ff-only origin/main` → if staging hasn't
|
||||
# diverged this is a clean ff.
|
||||
# 4. Otherwise `git merge --no-ff origin/main` to absorb main's
|
||||
# tip while keeping staging's history.
|
||||
# 5. Pushes the auto-sync branch.
|
||||
# 6. Opens a PR (base=staging, head=auto-sync/main-<sha>) and
|
||||
# enables auto-merge so the merge queue lands it.
|
||||
#
|
||||
# This mirrors the path human PRs take through staging — same
|
||||
# rules, same gates, no special-case bypass.
|
||||
#
|
||||
# Loop safety:
|
||||
#
|
||||
# `GITHUB_TOKEN`-authored pushes do NOT trigger downstream workflow
|
||||
# runs by default (GitHub Actions safety). So when this workflow
|
||||
# pushes the synced staging, `auto-promote-staging.yml` is NOT
|
||||
# triggered by that push. The next developer push to staging triggers
|
||||
# auto-promote normally. No loop is even theoretically possible.
|
||||
# `GITHUB_TOKEN`-authored merges (including the merge queue's land
|
||||
# of the auto-sync PR) do NOT trigger downstream workflow runs
|
||||
# (GitHub Actions safety). So when the auto-sync PR lands on
|
||||
# staging, `auto-promote-staging.yml` is NOT triggered by that
|
||||
# push. The next developer push to staging triggers auto-promote
|
||||
# normally. No loop possible.
|
||||
#
|
||||
# Concurrency:
|
||||
#
|
||||
# Two pushes to main in quick succession (e.g., manual UI merge
|
||||
# immediately followed by auto-promote-staging's ff-merge) would
|
||||
# otherwise race two auto-sync runs against the same staging branch
|
||||
# — second push fails non-fast-forward. The concurrency group
|
||||
# serializes them so the second run sees the first's result.
|
||||
# immediately followed by auto-promote-staging's ff-merge) could
|
||||
# otherwise open two overlapping auto-sync PRs. The concurrency
|
||||
# group serializes runs; the second waits for the first to exit.
|
||||
# (The first run exits after opening + auto-merge-queueing the PR,
|
||||
# not after the merge actually completes — so multiple PRs can be
|
||||
# open simultaneously, but the merge queue handles them serially.)
|
||||
|
||||
on:
|
||||
push:
|
||||
@ -53,6 +63,7 @@ on:
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
|
||||
concurrency:
|
||||
group: auto-sync-main-to-staging
|
||||
@ -60,10 +71,11 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
sync-staging:
|
||||
runs-on: ubuntu-latest
|
||||
# Self-hosted Mac mini matches the rest of this repo's workflows.
|
||||
runs-on: [self-hosted, macos, arm64]
|
||||
steps:
|
||||
- name: Checkout staging
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
ref: staging
|
||||
@ -85,65 +97,117 @@ jobs:
|
||||
echo "## ✅ No-op"
|
||||
echo
|
||||
echo "staging already contains \`origin/main\` ($(git rev-parse --short=8 origin/main))."
|
||||
echo "auto-promote-staging or a previous auto-sync run already kept them aligned."
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
else
|
||||
echo "needs_sync=true" >> "$GITHUB_OUTPUT"
|
||||
echo "::notice::staging is missing main's tip — sync needed"
|
||||
MAIN_SHORT=$(git rev-parse --short=8 origin/main)
|
||||
echo "main_short=${MAIN_SHORT}" >> "$GITHUB_OUTPUT"
|
||||
echo "branch=auto-sync/main-${MAIN_SHORT}" >> "$GITHUB_OUTPUT"
|
||||
echo "::notice::staging is missing main's tip (${MAIN_SHORT}) — opening sync PR"
|
||||
fi
|
||||
|
||||
- name: Fast-forward staging to main
|
||||
- name: Create auto-sync branch + merge main
|
||||
if: steps.check.outputs.needs_sync == 'true'
|
||||
id: ff
|
||||
id: prep
|
||||
run: |
|
||||
set -euo pipefail
|
||||
BRANCH="${{ steps.check.outputs.branch }}"
|
||||
|
||||
# If a previous auto-sync run already opened a branch for the
|
||||
# same main sha, prefer reusing it (idempotent behavior on
|
||||
# workflow restart). Force-update from latest staging anyway
|
||||
# so it absorbs any staging-side commits that landed since.
|
||||
git checkout -B "$BRANCH"
|
||||
|
||||
if git merge --ff-only origin/main; then
|
||||
echo "did_ff=true" >> "$GITHUB_OUTPUT"
|
||||
echo "::notice::Fast-forwarded staging to origin/main"
|
||||
echo "::notice::Fast-forwarded ${BRANCH} to origin/main"
|
||||
else
|
||||
echo "did_ff=false" >> "$GITHUB_OUTPUT"
|
||||
echo "::notice::ff failed — staging has its own commits; will create merge"
|
||||
fi
|
||||
|
||||
- name: Merge main into staging (when ff fails)
|
||||
if: steps.check.outputs.needs_sync == 'true' && steps.ff.outputs.did_ff != 'true'
|
||||
run: |
|
||||
set -euo pipefail
|
||||
# ff failed because staging has commits main doesn't — typical
|
||||
# in-flight feature work. Create a merge commit so staging
|
||||
# absorbs main's tip while keeping its own history.
|
||||
if ! git merge --no-ff origin/main -m "chore: sync main → staging (auto)"; then
|
||||
# Hygiene: leave the work tree clean before failing. Doesn't
|
||||
# affect future runs (each gets a fresh checkout) but a
|
||||
# half-merged tree is an unpleasant artifact to debug if
|
||||
# anyone ever shells into the runner.
|
||||
# Hygiene: leave the work tree clean before failing.
|
||||
git merge --abort || true
|
||||
{
|
||||
echo "## ❌ Conflict"
|
||||
echo
|
||||
echo "Auto-merge \`main → staging\` failed with conflicts."
|
||||
echo "A human needs to resolve manually:"
|
||||
echo
|
||||
echo " git checkout staging"
|
||||
echo " git merge origin/main"
|
||||
echo " # resolve, commit, push"
|
||||
echo "A human needs to resolve manually."
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
- name: Push staging
|
||||
- name: Push auto-sync branch
|
||||
if: steps.check.outputs.needs_sync == 'true'
|
||||
run: |
|
||||
set -euo pipefail
|
||||
git push origin staging
|
||||
{
|
||||
if [ "${{ steps.ff.outputs.did_ff }}" = "true" ]; then
|
||||
echo "## ✅ staging fast-forwarded"
|
||||
echo
|
||||
echo "staging is now at \`$(git rev-parse --short=8 HEAD)\` (== origin/main)."
|
||||
# Force-with-lease so a concurrent auto-sync run can't
|
||||
# silently clobber an in-flight branch we just updated. If a
|
||||
# different writer touched the branch, we abort and the next
|
||||
# run picks up the latest state.
|
||||
git push --force-with-lease origin "${{ steps.check.outputs.branch }}"
|
||||
|
||||
- name: Open auto-sync PR + enable auto-merge
|
||||
if: steps.check.outputs.needs_sync == 'true'
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
BRANCH: ${{ steps.check.outputs.branch }}
|
||||
MAIN_SHORT: ${{ steps.check.outputs.main_short }}
|
||||
DID_FF: ${{ steps.prep.outputs.did_ff }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
# Find existing PR for this branch (idempotent on workflow
|
||||
# restart) before creating a new one.
|
||||
PR_NUM=$(gh pr list --head "$BRANCH" --base staging --state open --json number --jq '.[0].number // ""')
|
||||
|
||||
if [ -z "$PR_NUM" ]; then
|
||||
# Body lives in a temp file to keep the multi-line content
|
||||
# out of the YAML block scalar (un-indented newlines inside
|
||||
# an inline shell string break YAML parsing).
|
||||
BODY_FILE=$(mktemp)
|
||||
if [ "$DID_FF" = "true" ]; then
|
||||
TITLE="chore: sync main → staging (auto, ff to ${MAIN_SHORT})"
|
||||
cat > "$BODY_FILE" <<EOFBODY
|
||||
Automated fast-forward of \`staging\` to \`origin/main\` (\`${MAIN_SHORT}\`). Staging has no in-flight commits that diverge from main. Merge queue lands this; no human action needed.
|
||||
|
||||
This PR is auto-generated by \`.github/workflows/auto-sync-main-to-staging.yml\` on every push to \`main\`. It exists because this repo's \`staging\` branch has a \`merge_queue\` ruleset that blocks direct pushes — even from the GitHub Actions integration.
|
||||
EOFBODY
|
||||
else
|
||||
echo "## ✅ staging absorbed main"
|
||||
echo
|
||||
echo "staging is now at \`$(git rev-parse --short=8 HEAD)\` with a merge commit absorbing main's tip."
|
||||
TITLE="chore: sync main → staging (auto, merge ${MAIN_SHORT})"
|
||||
cat > "$BODY_FILE" <<EOFBODY
|
||||
Automated merge of \`origin/main\` (\`${MAIN_SHORT}\`) into \`staging\`. Staging has commits main doesn't, so this is a non-ff merge that absorbs main's tip. Merge queue lands this.
|
||||
|
||||
This PR is auto-generated by \`.github/workflows/auto-sync-main-to-staging.yml\` on every push to \`main\`.
|
||||
EOFBODY
|
||||
fi
|
||||
|
||||
# gh pr create prints the URL on stdout; extract the PR number.
|
||||
PR_URL=$(gh pr create \
|
||||
--base staging \
|
||||
--head "$BRANCH" \
|
||||
--title "$TITLE" \
|
||||
--body-file "$BODY_FILE")
|
||||
PR_NUM=$(echo "$PR_URL" | grep -oE '[0-9]+$' | tail -1)
|
||||
rm -f "$BODY_FILE"
|
||||
echo "::notice::Opened PR #${PR_NUM}"
|
||||
else
|
||||
echo "::notice::Re-using existing PR #${PR_NUM} for ${BRANCH}"
|
||||
fi
|
||||
|
||||
# Enable auto-merge — the merge queue picks it up once
|
||||
# required gates are green. Use --merge for merge commits
|
||||
# (matches the rest of this repo's PR convention).
|
||||
if ! gh pr merge "$PR_NUM" --auto --merge 2>&1; then
|
||||
echo "::warning::Failed to enable auto-merge on PR #${PR_NUM} — operator may need to merge manually."
|
||||
fi
|
||||
|
||||
{
|
||||
echo "## ✅ Auto-sync PR opened"
|
||||
echo
|
||||
echo "- Branch: \`$BRANCH\`"
|
||||
echo "- PR: #$PR_NUM"
|
||||
echo "- Strategy: $([ "$DID_FF" = "true" ] && echo "ff" || echo "merge commit")"
|
||||
echo
|
||||
echo "Merge queue lands the PR once required gates are green; no human action needed unless gates fail."
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
|
||||
2
.github/workflows/auto-tag-runtime.yml
vendored
2
.github/workflows/auto-tag-runtime.yml
vendored
@ -38,7 +38,7 @@ jobs:
|
||||
tag:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
with:
|
||||
fetch-depth: 0 # need full tag history for `git describe` / sort
|
||||
|
||||
|
||||
2
.github/workflows/block-internal-paths.yml
vendored
2
.github/workflows/block-internal-paths.yml
vendored
@ -26,7 +26,7 @@ jobs:
|
||||
name: Block forbidden paths
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
with:
|
||||
fetch-depth: 2 # need previous commit to diff against on push events
|
||||
|
||||
|
||||
6
.github/workflows/canary-staging.yml
vendored
6
.github/workflows/canary-staging.yml
vendored
@ -66,7 +66,7 @@ jobs:
|
||||
E2E_RUN_ID: "canary-${{ github.run_id }}"
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
- name: Verify admin token present
|
||||
run: |
|
||||
@ -98,7 +98,7 @@ jobs:
|
||||
# next deploy window.
|
||||
- name: Open issue on failure
|
||||
if: failure()
|
||||
uses: actions/github-script@v7
|
||||
uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7
|
||||
env:
|
||||
# Inject the workflow path explicitly — context.workflow is
|
||||
# the *name*, not the file path the actions API needs.
|
||||
@ -165,7 +165,7 @@ jobs:
|
||||
|
||||
- name: Auto-close canary issue on success
|
||||
if: success()
|
||||
uses: actions/github-script@v7
|
||||
uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7
|
||||
with:
|
||||
script: |
|
||||
const title = '🔴 Canary failing: staging SaaS smoke';
|
||||
|
||||
4
.github/workflows/canary-verify.yml
vendored
4
.github/workflows/canary-verify.yml
vendored
@ -40,7 +40,7 @@ jobs:
|
||||
smoke_ran: ${{ steps.smoke.outputs.ran }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
- name: Compute sha
|
||||
id: compute
|
||||
@ -143,7 +143,7 @@ jobs:
|
||||
if: ${{ needs.canary-smoke.result == 'success' && needs.canary-smoke.outputs.smoke_ran == 'true' }}
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: imjasonh/setup-crane@v0.4
|
||||
- uses: imjasonh/setup-crane@31b88efe9de28ae0ffa220711af4b60be9435f6e # v0.4
|
||||
|
||||
- name: GHCR login
|
||||
run: |
|
||||
|
||||
@ -36,7 +36,7 @@ jobs:
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- name: Verify merge_group trigger on required-check workflows
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
16
.github/workflows/ci.yml
vendored
16
.github/workflows/ci.yml
vendored
@ -32,7 +32,7 @@ jobs:
|
||||
python: ${{ steps.check.outputs.python }}
|
||||
scripts: ${{ steps.check.outputs.scripts }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- id: check
|
||||
@ -72,8 +72,8 @@ jobs:
|
||||
run:
|
||||
working-directory: workspace-server
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-go@v5
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
|
||||
with:
|
||||
go-version: 'stable'
|
||||
- run: go mod download
|
||||
@ -187,8 +187,8 @@ jobs:
|
||||
run:
|
||||
working-directory: canvas
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-node@v4
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
|
||||
with:
|
||||
node-version: '22'
|
||||
- run: rm -f package-lock.json && npm install
|
||||
@ -210,7 +210,7 @@ jobs:
|
||||
if: needs.changes.outputs.scripts == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- name: Run shellcheck on tests/e2e/*.sh and infra/scripts/*.sh
|
||||
# shellcheck is pre-installed on ubuntu-latest runners (via apt).
|
||||
# infra/scripts/ is included because setup.sh + nuke.sh gate the
|
||||
@ -276,8 +276,8 @@ jobs:
|
||||
run:
|
||||
working-directory: workspace
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: '3.11'
|
||||
cache: pip
|
||||
|
||||
12
.github/workflows/codeql.yml
vendored
12
.github/workflows/codeql.yml
vendored
@ -53,14 +53,14 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
- name: Checkout sibling plugin repo
|
||||
# Same reasoning as publish-workspace-server-image.yml — the Go
|
||||
# module's replace directive needs the plugin source so
|
||||
# CodeQL's "go build" phase can resolve.
|
||||
if: matrix.language == 'go'
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
with:
|
||||
repository: Molecule-AI/molecule-ai-plugin-github-app-auth
|
||||
path: molecule-ai-plugin-github-app-auth
|
||||
@ -69,7 +69,7 @@ jobs:
|
||||
# jq is pre-installed on ubuntu-latest — no setup step needed.
|
||||
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v3
|
||||
uses: github/codeql-action/init@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
# security-extended widens past the default to include the
|
||||
@ -77,11 +77,11 @@ jobs:
|
||||
queries: security-extended
|
||||
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@v3
|
||||
uses: github/codeql-action/autobuild@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
id: analyze
|
||||
uses: github/codeql-action/analyze@v3
|
||||
uses: github/codeql-action/analyze@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
|
||||
with:
|
||||
category: "/language:${{ matrix.language }}"
|
||||
# upload: never — GHAS isn't enabled on this repo, so the
|
||||
@ -121,7 +121,7 @@ jobs:
|
||||
# 14-day retention — longer than default 3, short enough not
|
||||
# to bloat quota.
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
||||
with:
|
||||
name: codeql-sarif-${{ matrix.language }}
|
||||
path: sarif-results/${{ matrix.language }}/
|
||||
|
||||
29
.github/workflows/e2e-api.yml
vendored
29
.github/workflows/e2e-api.yml
vendored
@ -27,7 +27,17 @@ on:
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: e2e-api-${{ github.ref }}
|
||||
# Per-SHA grouping (changed 2026-04-28 from per-ref). Per-ref had the
|
||||
# same auto-promote-staging brittleness as e2e-staging-canvas — back-
|
||||
# to-back staging pushes share refs/heads/staging, so the older push's
|
||||
# queued run gets cancelled when a newer push lands. Auto-promote-
|
||||
# staging then sees `completed/cancelled` for the older SHA and stays
|
||||
# put; the newer SHA's gates may eventually save the day, but if the
|
||||
# newer push gets cancelled too, we deadlock.
|
||||
#
|
||||
# See e2e-staging-canvas.yml's identical concurrency block for the full
|
||||
# rationale and the 2026-04-28 incident reference.
|
||||
group: e2e-api-${{ github.event.pull_request.head.sha || github.sha }}
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
@ -36,8 +46,8 @@ jobs:
|
||||
outputs:
|
||||
api: ${{ steps.decide.outputs.api }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: dorny/paths-filter@v3
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- uses: dorny/paths-filter@fbd0ab8f3e69293af611ebaee6363fc25e6d187d # v4.0.1
|
||||
id: filter
|
||||
with:
|
||||
filters: |
|
||||
@ -56,9 +66,18 @@ jobs:
|
||||
echo "api=${{ steps.filter.outputs.api }}" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
# Same `name:` as the real job below so the check-run produced by the
|
||||
# no-op path is indistinguishable from the real one for branch
|
||||
# protection purposes. Without this, the real job was always skipped on
|
||||
# paths-filtered commits → branch protection on `main` saw "E2E API
|
||||
# Smoke Test" as a missing required check → auto-promote-staging's
|
||||
# `git push origin main` got rejected with GH006. Observed 2026-04-28
|
||||
# 00:22 UTC blocking the staging→main promote despite all gates
|
||||
# actually passing at the workflow level.
|
||||
no-op:
|
||||
needs: detect-changes
|
||||
if: needs.detect-changes.outputs.api != 'true'
|
||||
name: E2E API Smoke Test
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- run: |
|
||||
@ -78,8 +97,8 @@ jobs:
|
||||
PG_CONTAINER: molecule-ci-postgres
|
||||
REDIS_CONTAINER: molecule-ci-redis
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-go@v5
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
|
||||
with:
|
||||
go-version: 'stable'
|
||||
cache: true
|
||||
|
||||
37
.github/workflows/e2e-staging-canvas.yml
vendored
37
.github/workflows/e2e-staging-canvas.yml
vendored
@ -37,7 +37,25 @@ on:
|
||||
- cron: '0 8 * * 0'
|
||||
|
||||
concurrency:
|
||||
group: e2e-staging-canvas
|
||||
# Per-SHA grouping (changed 2026-04-28 from a single global group). The
|
||||
# global group made auto-promote-staging brittle: when a staging push
|
||||
# queued behind an in-flight run and a third entrant (a PR run, a
|
||||
# follow-on push) entered the group, the staging push got cancelled —
|
||||
# leaving auto-promote-staging looking at `completed/cancelled` for a
|
||||
# required gate and refusing to advance main. Observed 2026-04-28
|
||||
# 23:51-23:53 on staging tip 3f99fede.
|
||||
#
|
||||
# The original intent of the global group was to throttle parallel
|
||||
# E2E provisions (each spins a fresh EC2). At our scale that throttle
|
||||
# isn't worth the correctness cost — fresh-org-per-run isolates the
|
||||
# state, and the cost of two parallel runs (~$0.001/min × 10min × 2)
|
||||
# is rounding error vs. the cost of a stuck pipeline.
|
||||
#
|
||||
# Per-SHA still dedupes accidental double-triggers for the SAME SHA.
|
||||
# It does NOT cancel obsolete-PR-version runs on force-push; that
|
||||
# wasted CI is acceptable given the alternative is losing staging-tip
|
||||
# data that auto-promote-staging needs.
|
||||
group: e2e-staging-canvas-${{ github.event.pull_request.head.sha || github.sha }}
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
@ -46,8 +64,8 @@ jobs:
|
||||
outputs:
|
||||
canvas: ${{ steps.decide.outputs.canvas }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: dorny/paths-filter@v3
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- uses: dorny/paths-filter@fbd0ab8f3e69293af611ebaee6363fc25e6d187d # v4.0.1
|
||||
id: filter
|
||||
with:
|
||||
filters: |
|
||||
@ -64,9 +82,14 @@ jobs:
|
||||
echo "canvas=${{ steps.filter.outputs.canvas }}" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
# Same `name:` as the playwright job below so the check-run is
|
||||
# indistinguishable from the real one for branch protection. Mirrors
|
||||
# the e2e-api.yml fix in the same PR — see that file for the
|
||||
# 2026-04-28 incident reference.
|
||||
no-op:
|
||||
needs: detect-changes
|
||||
if: needs.detect-changes.outputs.canvas != 'true'
|
||||
name: Canvas tabs E2E
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- run: |
|
||||
@ -90,7 +113,7 @@ jobs:
|
||||
working-directory: canvas
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
- name: Verify admin token present
|
||||
run: |
|
||||
@ -100,7 +123,7 @@ jobs:
|
||||
fi
|
||||
|
||||
- name: Set up Node
|
||||
uses: actions/setup-node@v4
|
||||
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
|
||||
with:
|
||||
node-version: '20'
|
||||
cache: 'npm'
|
||||
@ -117,7 +140,7 @@ jobs:
|
||||
|
||||
- name: Upload Playwright report on failure
|
||||
if: failure()
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
||||
with:
|
||||
name: playwright-report-staging
|
||||
path: canvas/playwright-report-staging/
|
||||
@ -125,7 +148,7 @@ jobs:
|
||||
|
||||
- name: Upload screenshots on failure
|
||||
if: failure()
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
|
||||
with:
|
||||
name: playwright-screenshots
|
||||
path: canvas/test-results/
|
||||
|
||||
2
.github/workflows/e2e-staging-saas.yml
vendored
2
.github/workflows/e2e-staging-saas.yml
vendored
@ -92,7 +92,7 @@ jobs:
|
||||
E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
- name: Verify admin token present
|
||||
run: |
|
||||
|
||||
4
.github/workflows/e2e-staging-sanity.yml
vendored
4
.github/workflows/e2e-staging-sanity.yml
vendored
@ -50,7 +50,7 @@ jobs:
|
||||
E2E_INTENTIONAL_FAILURE: "1"
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
- name: Verify admin token present
|
||||
run: |
|
||||
@ -89,7 +89,7 @@ jobs:
|
||||
|
||||
- name: Open issue if safety net is broken
|
||||
if: failure()
|
||||
uses: actions/github-script@v7
|
||||
uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7
|
||||
with:
|
||||
script: |
|
||||
const title = "🚨 E2E teardown safety net broken";
|
||||
|
||||
2
.github/workflows/promote-latest.yml
vendored
2
.github/workflows/promote-latest.yml
vendored
@ -34,7 +34,7 @@ jobs:
|
||||
promote:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: imjasonh/setup-crane@v0.4
|
||||
- uses: imjasonh/setup-crane@31b88efe9de28ae0ffa220711af4b60be9435f6e # v0.4
|
||||
|
||||
- name: GHCR login
|
||||
run: |
|
||||
|
||||
8
.github/workflows/publish-canvas-image.yml
vendored
8
.github/workflows/publish-canvas-image.yml
vendored
@ -42,17 +42,17 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
- name: Log in to GHCR
|
||||
uses: docker/login-action@v3
|
||||
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
|
||||
- name: Compute tags
|
||||
id: tags
|
||||
@ -85,7 +85,7 @@ jobs:
|
||||
echo "ws_url=${WS_URL}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Build & push canvas image to GHCR
|
||||
uses: docker/build-push-action@v6
|
||||
uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6
|
||||
with:
|
||||
context: ./canvas
|
||||
file: ./canvas/Dockerfile
|
||||
|
||||
29
.github/workflows/publish-runtime.yml
vendored
29
.github/workflows/publish-runtime.yml
vendored
@ -81,9 +81,9 @@ jobs:
|
||||
version: ${{ steps.version.outputs.version }}
|
||||
wheel_sha256: ${{ steps.wheel_hash.outputs.wheel_sha256 }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: "3.11"
|
||||
cache: pip
|
||||
@ -419,10 +419,33 @@ jobs:
|
||||
RUNTIME_VERSION: ${{ needs.publish.outputs.version }}
|
||||
run: |
|
||||
set +e # don't abort on a single repo failure — collect them all
|
||||
# Schedule-vs-dispatch behaviour split (hardened 2026-04-28
|
||||
# after the sweep-cf-orphans soft-skip incident — same class
|
||||
# of bug):
|
||||
#
|
||||
# The earlier "skipping cascade. templates will pick up the
|
||||
# new version on their own next rebuild" message was wrong —
|
||||
# templates only build on this dispatch trigger; without it
|
||||
# they stay pinned to whatever runtime version they last saw.
|
||||
# A silent skip here means "PyPI is current, templates are
|
||||
# not" and the gap is invisible until someone notices a
|
||||
# template still on the old version weeks later.
|
||||
#
|
||||
# - push → exit 1 (red CI surfaces the gap)
|
||||
# - workflow_dispatch → exit 0 with a warning (operator
|
||||
# ran this ad-hoc; let them rerun
|
||||
# after fixing the secret)
|
||||
if [ -z "$DISPATCH_TOKEN" ]; then
|
||||
echo "::warning::TEMPLATE_DISPATCH_TOKEN secret not set — skipping cascade. PyPI was published; templates will pick up the new version on their own next rebuild."
|
||||
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
||||
echo "::warning::TEMPLATE_DISPATCH_TOKEN secret not set — skipping cascade."
|
||||
echo "::warning::set it at Settings → Secrets and Variables → Actions, then rerun. Templates will stay on the prior runtime version until either this token is set or each template is rebuilt manually."
|
||||
exit 0
|
||||
fi
|
||||
echo "::error::TEMPLATE_DISPATCH_TOKEN secret missing — cascade cannot fan out."
|
||||
echo "::error::PyPI was published, but the 8 template repos will NOT pick up the new version until this token is restored and a republish dispatches the cascade."
|
||||
echo "::error::set it at Settings → Secrets and Variables → Actions; then re-trigger publish-runtime via workflow_dispatch."
|
||||
exit 1
|
||||
fi
|
||||
VERSION="$RUNTIME_VERSION"
|
||||
if [ -z "$VERSION" ]; then
|
||||
echo "::error::publish job did not expose a version output — cascade cannot fan out"
|
||||
|
||||
@ -27,7 +27,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
- name: Checkout sibling plugin repo
|
||||
# workspace-server/Dockerfile expects
|
||||
@ -42,21 +42,21 @@ jobs:
|
||||
# The PAT needs Contents:Read on Molecule-AI/molecule-ai-plugin-
|
||||
# github-app-auth. Falls back to the default token for the (rare)
|
||||
# case where an operator made the plugin repo public.
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
with:
|
||||
repository: Molecule-AI/molecule-ai-plugin-github-app-auth
|
||||
path: molecule-ai-plugin-github-app-auth
|
||||
token: ${{ secrets.PLUGIN_REPO_PAT || secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Log in to GHCR
|
||||
uses: docker/login-action@v3
|
||||
uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
|
||||
- name: Compute tags
|
||||
id: tags
|
||||
@ -87,7 +87,7 @@ jobs:
|
||||
# applyRuntimeModelEnv and caused every E2E to route hermes+openai
|
||||
# through openrouter → 401). See issue filed with this PR.
|
||||
- name: Build & push platform image to GHCR (staging-<sha> + staging-latest)
|
||||
uses: docker/build-push-action@v6
|
||||
uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6
|
||||
with:
|
||||
context: .
|
||||
file: ./workspace-server/Dockerfile
|
||||
@ -104,7 +104,7 @@ jobs:
|
||||
org.opencontainers.image.description=Molecule AI platform (Go API server) — pending canary verify
|
||||
|
||||
- name: Build & push tenant image to GHCR (staging-<sha> + staging-latest)
|
||||
uses: docker/build-push-action@v6
|
||||
uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6
|
||||
with:
|
||||
context: .
|
||||
file: ./workspace-server/Dockerfile.tenant
|
||||
|
||||
4
.github/workflows/runtime-pin-compat.yml
vendored
4
.github/workflows/runtime-pin-compat.yml
vendored
@ -60,8 +60,8 @@ jobs:
|
||||
name: PyPI-latest install + import smoke
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: '3.11'
|
||||
cache: pip
|
||||
|
||||
4
.github/workflows/runtime-prbuild-compat.yml
vendored
4
.github/workflows/runtime-prbuild-compat.yml
vendored
@ -61,8 +61,8 @@ jobs:
|
||||
name: PR-built wheel + import smoke
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: '3.11'
|
||||
cache: pip
|
||||
|
||||
2
.github/workflows/secret-pattern-drift.yml
vendored
2
.github/workflows/secret-pattern-drift.yml
vendored
@ -49,7 +49,7 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
|
||||
2
.github/workflows/secret-scan.yml
vendored
2
.github/workflows/secret-scan.yml
vendored
@ -40,7 +40,7 @@ jobs:
|
||||
name: Scan diff for credential-shaped strings
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
with:
|
||||
fetch-depth: 2 # need previous commit to diff against on push events
|
||||
|
||||
|
||||
36
.github/workflows/sweep-cf-orphans.yml
vendored
36
.github/workflows/sweep-cf-orphans.yml
vendored
@ -78,15 +78,30 @@ jobs:
|
||||
MAX_DELETE_PCT: ${{ github.event.inputs.max_delete_pct || '50' }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
- name: Verify required secrets present
|
||||
id: verify
|
||||
# Soft skip when secrets aren't configured. The 6 secrets have
|
||||
# to be set on the repo manually before this workflow can do
|
||||
# real work; until they are, the schedule is a no-op rather
|
||||
# than a recurring red CI run. workflow_dispatch surfaces a
|
||||
# warning so an operator running it ad-hoc sees the gap.
|
||||
# Schedule-vs-dispatch behaviour split (hardened 2026-04-28
|
||||
# after the silent-no-op incident below):
|
||||
#
|
||||
# The earlier soft-skip-on-schedule policy hid a real leak. All
|
||||
# six secrets were unset on this repo for an unknown duration;
|
||||
# every hourly run printed a yellow ::warning:: and exited 0,
|
||||
# so the workflow registered as "passing" while doing nothing.
|
||||
# CF orphans accumulated to 152/200 (~76% of the zone quota
|
||||
# gone) before a manual `dig`-driven audit caught it. Anything
|
||||
# that runs as a janitor and reports green while idle is
|
||||
# indistinguishable from "the janitor is healthy" — so we now
|
||||
# treat schedule (and any future workflow_run/push triggers)
|
||||
# as a hard-fail when secrets are missing.
|
||||
#
|
||||
# - schedule / workflow_run / push → exit 1 (red CI run
|
||||
# surfaces the misconfiguration the next tick)
|
||||
# - workflow_dispatch → exit 0 with a warning
|
||||
# (an operator ran this ad-hoc; they already accepted the
|
||||
# state of the repo and want the workflow to short-circuit
|
||||
# so they can rerun after fixing the secret)
|
||||
run: |
|
||||
missing=()
|
||||
for var in CF_API_TOKEN CF_ZONE_ID CP_PROD_ADMIN_TOKEN CP_STAGING_ADMIN_TOKEN AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY; do
|
||||
@ -95,10 +110,17 @@ jobs:
|
||||
fi
|
||||
done
|
||||
if [ ${#missing[@]} -gt 0 ]; then
|
||||
echo "::warning::skipping sweep — secrets not yet configured: ${missing[*]}"
|
||||
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
||||
echo "::warning::skipping sweep — secrets not configured: ${missing[*]}"
|
||||
echo "::warning::set them at Settings → Secrets and Variables → Actions, then rerun."
|
||||
echo "skip=true" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
echo "::error::sweep cannot run — required secrets missing: ${missing[*]}"
|
||||
echo "::error::set them at Settings → Secrets and Variables → Actions, or disable this workflow."
|
||||
echo "::error::a silent skip masked an active CF DNS leak (152/200 zone records) caught only by a manual audit on 2026-04-28; this gate exists to make the gap visible."
|
||||
exit 1
|
||||
fi
|
||||
echo "All required secrets present ✓"
|
||||
echo "skip=false" >> "$GITHUB_OUTPUT"
|
||||
|
||||
|
||||
4
.github/workflows/test-ops-scripts.yml
vendored
4
.github/workflows/test-ops-scripts.yml
vendored
@ -27,8 +27,8 @@ jobs:
|
||||
name: Ops scripts (unittest)
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: '3.11'
|
||||
- name: Run unittest
|
||||
|
||||
775
canvas/package-lock.json
generated
775
canvas/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -32,13 +32,13 @@
|
||||
"@playwright/test": "^1.59.1",
|
||||
"@testing-library/jest-dom": "^6.6.0",
|
||||
"@testing-library/react": "^16.1.0",
|
||||
"@types/node": "^22.0.0",
|
||||
"@types/node": "^25.6.0",
|
||||
"@types/react": "^19.0.0",
|
||||
"@types/react-dom": "^19.0.0",
|
||||
"@vitejs/plugin-react": "^6.0.1",
|
||||
"@vitest/coverage-v8": "^4.1.5",
|
||||
"autoprefixer": "^10.4.0",
|
||||
"jsdom": "^25.0.0",
|
||||
"jsdom": "^29.1.0",
|
||||
"postcss": "^8.5.12",
|
||||
"tailwindcss": "^3.4.0",
|
||||
"typescript": "^5.7.0",
|
||||
|
||||
@ -83,6 +83,7 @@ SUBPACKAGES = {
|
||||
"adapters",
|
||||
"builtin_tools",
|
||||
"lib",
|
||||
"platform_tools",
|
||||
"plugins_registry",
|
||||
"policies",
|
||||
"skill_loader",
|
||||
|
||||
199
scripts/measure-coordinator-task-bounds.sh
Executable file
199
scripts/measure-coordinator-task-bounds.sh
Executable file
@ -0,0 +1,199 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# Measure platform-side bounds (or absence thereof) on a coordinator's
|
||||
# task execution. Reproduction harness for Issue 4 of the 2026-04-28
|
||||
# CP review, surfaced in the RFC at molecule-core#2251.
|
||||
#
|
||||
# What Issue 4 hypothesized
|
||||
# -------------------------
|
||||
# A coordinator workspace receives an A2A kickoff, delegates to children,
|
||||
# then enters a synthesis phase whose duration the platform does not
|
||||
# bound. `DELEGATION_TIMEOUT` (300s, in workspace/builtin_tools/
|
||||
# delegation.py) governs the parent→child HTTP request, NOT the
|
||||
# coordinator's own task-execution budget. So a coordinator that's
|
||||
# spent 10min synthesizing past delegation will keep going until the
|
||||
# LLM returns or its host runtime crashes — never bounded by a platform
|
||||
# ceiling.
|
||||
#
|
||||
# Issue 4 explicitly hedged ("This isn't necessarily a platform bug —
|
||||
# could be that the Design Director's system prompt told it to do
|
||||
# complex synthesis work that exceeded the A2A response window"). This
|
||||
# script is the empirical test of which side that ambiguity lands on.
|
||||
#
|
||||
# What this script does NOT do
|
||||
# ----------------------------
|
||||
# - It does NOT assert pass/fail. The "bug" is absence-of-bound, which
|
||||
# is hard to assert in a single run. The script outputs measurement
|
||||
# data; the team interprets.
|
||||
# - It does NOT simulate a coordinator hang via runtime modification.
|
||||
# Instead, it drives a real coordinator with a synthesis-heavy task
|
||||
# and observes the duration the platform tolerates.
|
||||
# - It does NOT clean up on failure. Use scripts/cleanup-rogue-workspaces.sh.
|
||||
#
|
||||
# What "bug confirmed" looks like (per Issue 4)
|
||||
# ---------------------------------------------
|
||||
# coordinator_response_secs > 300 AND no platform_intervention=true
|
||||
# in the heartbeat trace → coordinator ran past DELEGATION_TIMEOUT
|
||||
# (HTTP-level) without any platform ceiling kicking in. The RFC's
|
||||
# V1.0 operator ceiling would convert this into an explicit
|
||||
# `terminated` response at MAX_TASK_EXECUTION_SECS.
|
||||
#
|
||||
# What "bug refuted" looks like
|
||||
# -----------------------------
|
||||
# coordinator_response_secs cleanly bounded by either the LLM API
|
||||
# timeout or some other platform mechanism → Issue 4's premise that
|
||||
# "no platform-enforced timeout" is wrong, V1.0 of the RFC needs
|
||||
# re-justification.
|
||||
#
|
||||
# Usage
|
||||
# -----
|
||||
# PLATFORM=http://localhost:8080 OPENROUTER_API_KEY=... \
|
||||
# bash scripts/measure-coordinator-task-bounds.sh
|
||||
#
|
||||
# Or against staging-api (requires a tenant admin token):
|
||||
#
|
||||
# PLATFORM=https://your-staging-tenant.example \
|
||||
# OPENROUTER_API_KEY=... \
|
||||
# bash scripts/measure-coordinator-task-bounds.sh
|
||||
#
|
||||
set -euo pipefail
|
||||
|
||||
PLATFORM="${PLATFORM:-http://localhost:8080}"
|
||||
OR_KEY="${OPENROUTER_API_KEY:-${OPENAI_API_KEY:?Set OPENROUTER_API_KEY (or OPENAI_API_KEY)}}"
|
||||
# Synthesis prompt knob — choose the size of the post-delegation work
|
||||
# the coordinator is asked to do. Default exercises 3 delegation rounds
|
||||
# with non-trivial aggregation.
|
||||
SYNTHESIS_DEPTH="${SYNTHESIS_DEPTH:-3}"
|
||||
# Max time we'll wait on the coordinator's A2A response before giving
|
||||
# up on this measurement. Set generously (10min) so we don't truncate
|
||||
# a slow-but-eventually-completing case.
|
||||
A2A_TIMEOUT="${A2A_TIMEOUT:-600}"
|
||||
|
||||
ts() { date -u +%Y-%m-%dT%H:%M:%S.%3NZ 2>/dev/null || date -u +%Y-%m-%dT%H:%M:%SZ; }
|
||||
|
||||
emit() {
|
||||
# One JSON line per event so the output is machine-readable.
|
||||
printf '{"ts":"%s","event":"%s","data":%s}\n' "$(ts)" "$1" "${2:-null}"
|
||||
}
|
||||
|
||||
emit "run_started" "{\"platform\":\"$PLATFORM\",\"synthesis_depth\":$SYNTHESIS_DEPTH,\"a2a_timeout_secs\":$A2A_TIMEOUT}"
|
||||
|
||||
# ---- Setup: coordinator + 1 child ----
|
||||
emit "provisioning_pm" null
|
||||
R=$(curl -s -X POST "$PLATFORM/workspaces" -H 'Content-Type: application/json' \
|
||||
-d '{"name":"PM","role":"Coordinator — delegates and synthesizes","tier":2,"template":"claude-code-default"}')
|
||||
PM_ID=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))")
|
||||
[ -n "$PM_ID" ] || { echo "ERROR: PM create failed: $R" >&2; exit 1; }
|
||||
emit "pm_provisioned" "{\"workspace_id\":\"$PM_ID\"}"
|
||||
|
||||
emit "provisioning_child" null
|
||||
R=$(curl -s -X POST "$PLATFORM/workspaces" -H 'Content-Type: application/json' \
|
||||
-d '{"name":"Researcher","role":"Returns short research findings","tier":2,"template":"langgraph"}')
|
||||
CHILD_ID=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))")
|
||||
[ -n "$CHILD_ID" ] || { echo "ERROR: child create failed: $R" >&2; exit 1; }
|
||||
emit "child_provisioned" "{\"workspace_id\":\"$CHILD_ID\"}"
|
||||
|
||||
curl -s -X PATCH "$PLATFORM/workspaces/$CHILD_ID" -H 'Content-Type: application/json' \
|
||||
-d "{\"parent_id\":\"$PM_ID\"}" > /dev/null
|
||||
curl -s -X POST "$PLATFORM/workspaces/$CHILD_ID/secrets" -H 'Content-Type: application/json' \
|
||||
-d "{\"key\":\"OPENROUTER_API_KEY\",\"value\":\"$OR_KEY\"}" > /dev/null
|
||||
|
||||
# ---- Wait for both online ----
|
||||
wait_online() {
|
||||
local id="$1"; local label="$2"
|
||||
for i in $(seq 1 30); do
|
||||
s=$(curl -s "$PLATFORM/workspaces/$id" | python3 -c "import sys,json; print(json.load(sys.stdin).get('status',''))" 2>/dev/null)
|
||||
[ "$s" = "online" ] && { emit "online" "{\"workspace\":\"$label\",\"after_polls\":$i}"; return 0; }
|
||||
sleep 3
|
||||
done
|
||||
emit "online_timeout" "{\"workspace\":\"$label\"}"
|
||||
return 1
|
||||
}
|
||||
wait_online "$PM_ID" "PM" || exit 2
|
||||
wait_online "$CHILD_ID" "child" || exit 2
|
||||
|
||||
# ---- Build a synthesis-heavy kickoff task ----
|
||||
# The task asks the coordinator to delegate N times, each time with a
|
||||
# different sub-question, then aggregate findings into a single report.
|
||||
# The synthesis phase happens entirely inside the coordinator's A2A
|
||||
# handler post-delegation, which is the exact code path Issue 4 named.
|
||||
TASK="You are coordinating a research analysis. Delegate $SYNTHESIS_DEPTH separate sub-questions to the Researcher (one at a time, sequentially — wait for each response before sending the next), then synthesize all findings into a single coherent report. Sub-questions: (a) historical context of distributed consensus, (b) modern Byzantine-fault-tolerant protocols, (c) practical trade-offs between Raft and Paxos. After all delegations complete, write a 600-word synthesis comparing the three responses and drawing one cross-cutting insight. Do not respond until the synthesis is complete."
|
||||
|
||||
# ---- Time the A2A kickoff round-trip ----
|
||||
emit "a2a_kickoff_sent" "{\"to\":\"$PM_ID\",\"task_chars\":${#TASK}}"
|
||||
START_NS=$(python3 -c 'import time; print(int(time.time_ns()))')
|
||||
|
||||
# Use --max-time to bound this measurement (else the script could itself
|
||||
# hang past sensible limits). The bound is a measurement-side timeout,
|
||||
# NOT a platform-side timeout — the latter is what we're trying to
|
||||
# detect.
|
||||
RESP=$(curl -s --max-time "$A2A_TIMEOUT" -X POST "$PLATFORM/workspaces/$PM_ID/a2a" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$(python3 -c "
|
||||
import json,sys
|
||||
print(json.dumps({
|
||||
'method':'message/send',
|
||||
'params':{
|
||||
'message':{
|
||||
'role':'user',
|
||||
'parts':[{'type':'text','text':sys.argv[1]}]
|
||||
}
|
||||
}
|
||||
}))
|
||||
" "$TASK")" || RESP="<curl_failed_or_timed_out>")
|
||||
|
||||
END_NS=$(python3 -c 'import time; print(int(time.time_ns()))')
|
||||
ELAPSED_SECS=$(python3 -c "print(round(($END_NS - $START_NS) / 1e9, 2))")
|
||||
|
||||
emit "a2a_response_observed" "{\"elapsed_secs\":$ELAPSED_SECS,\"response_chars\":${#RESP},\"response_head\":$(python3 -c "import json,sys; print(json.dumps(sys.argv[1][:200]))" "$RESP")}"
|
||||
|
||||
# ---- Pull heartbeat trace from the platform ----
|
||||
# The heartbeat endpoint records workspace liveness pings. If the
|
||||
# platform implements per-task bounds, the trace will show a status
|
||||
# transition (e.g. terminated) within the run window. Absence of any
|
||||
# such transition over a 10min synthesis is the empirical evidence
|
||||
# that no platform ceiling fired.
|
||||
emit "fetching_heartbeat_trace" null
|
||||
HB=$(curl -s "$PLATFORM/workspaces/$PM_ID/heartbeat-history?since_secs=$A2A_TIMEOUT" 2>&1 || echo "<endpoint_unavailable>")
|
||||
emit "heartbeat_trace" "{\"raw\":$(python3 -c "import json,sys; print(json.dumps(sys.argv[1]))" "$HB")}"
|
||||
|
||||
# ---- Summary ----
|
||||
emit "run_completed" "{\"elapsed_secs\":$ELAPSED_SECS,\"pm_id\":\"$PM_ID\",\"child_id\":\"$CHILD_ID\"}"
|
||||
|
||||
cat <<EOF >&2
|
||||
|
||||
=========================================
|
||||
Measurement complete.
|
||||
Coordinator response time: ${ELAPSED_SECS}s
|
||||
PM workspace: $PM_ID
|
||||
Child workspace: $CHILD_ID
|
||||
=========================================
|
||||
|
||||
Interpretation guide:
|
||||
|
||||
ELAPSED_SECS < 60 → Synthesis completed quickly; not informative
|
||||
about platform bounds (LLM was just fast).
|
||||
Re-run with SYNTHESIS_DEPTH=8 to force longer
|
||||
synthesis.
|
||||
|
||||
60 <= ELAPSED < 300 → Within DELEGATION_TIMEOUT. Doesn't prove or
|
||||
refute Issue 4 — the HTTP-level timeout would
|
||||
be sufficient if synthesis happened to fall
|
||||
under it.
|
||||
|
||||
ELAPSED >= 300 → BUG CONFIRMED IF heartbeat_trace shows no
|
||||
platform-side transition. Coordinator ran past
|
||||
DELEGATION_TIMEOUT without any platform ceiling
|
||||
kicking in — exactly the gap the RFC V1.0 plans
|
||||
to close with MAX_TASK_EXECUTION_SECS.
|
||||
|
||||
curl_failed_or_timed_out → \$A2A_TIMEOUT exceeded. Either the
|
||||
coordinator is genuinely hung (likely) or
|
||||
synthesis is just very slow. Pull workspace
|
||||
status separately to disambiguate.
|
||||
|
||||
Cleanup:
|
||||
curl -X DELETE $PLATFORM/workspaces/$PM_ID
|
||||
curl -X DELETE $PLATFORM/workspaces/$CHILD_ID
|
||||
|
||||
EOF
|
||||
@ -9,45 +9,45 @@ require (
|
||||
github.com/alicebob/miniredis/v2 v2.37.0
|
||||
github.com/creack/pty v1.1.18
|
||||
github.com/docker/docker v28.5.2+incompatible
|
||||
github.com/docker/go-connections v0.6.0
|
||||
github.com/gin-contrib/cors v1.7.2
|
||||
github.com/gin-gonic/gin v1.10.0
|
||||
github.com/docker/go-connections v0.7.0
|
||||
github.com/gin-contrib/cors v1.7.7
|
||||
github.com/gin-gonic/gin v1.12.0
|
||||
github.com/go-telegram-bot-api/telegram-bot-api/v5 v5.5.1
|
||||
github.com/golang-jwt/jwt/v5 v5.2.2
|
||||
github.com/golang-jwt/jwt/v5 v5.3.1
|
||||
github.com/google/uuid v1.6.0
|
||||
github.com/gorilla/websocket v1.5.3
|
||||
github.com/lib/pq v1.10.9
|
||||
github.com/opencontainers/image-spec v1.1.1
|
||||
github.com/redis/go-redis/v9 v9.7.3
|
||||
github.com/redis/go-redis/v9 v9.19.0
|
||||
github.com/robfig/cron/v3 v3.0.1
|
||||
golang.org/x/crypto v0.49.0
|
||||
golang.org/x/crypto v0.50.0
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/Microsoft/go-winio v0.4.21 // indirect
|
||||
github.com/bytedance/sonic v1.11.6 // indirect
|
||||
github.com/bytedance/sonic/loader v0.1.1 // indirect
|
||||
github.com/Microsoft/go-winio v0.6.2 // indirect
|
||||
github.com/bytedance/gopkg v0.1.3 // indirect
|
||||
github.com/bytedance/sonic v1.15.0 // indirect
|
||||
github.com/bytedance/sonic/loader v0.5.0 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||
github.com/cloudwego/base64x v0.1.4 // indirect
|
||||
github.com/cloudwego/iasm v0.2.0 // indirect
|
||||
github.com/cloudwego/base64x v0.1.6 // indirect
|
||||
github.com/containerd/errdefs v1.0.0 // indirect
|
||||
github.com/containerd/errdefs/pkg v0.3.0 // indirect
|
||||
github.com/containerd/log v0.1.0 // indirect
|
||||
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
|
||||
github.com/distribution/reference v0.6.0 // indirect
|
||||
github.com/docker/go-units v0.5.0 // indirect
|
||||
github.com/felixge/httpsnoop v1.0.4 // indirect
|
||||
github.com/gabriel-vasile/mimetype v1.4.3 // indirect
|
||||
github.com/gin-contrib/sse v0.1.0 // indirect
|
||||
github.com/gabriel-vasile/mimetype v1.4.12 // indirect
|
||||
github.com/gin-contrib/sse v1.1.0 // indirect
|
||||
github.com/go-logr/logr v1.4.3 // indirect
|
||||
github.com/go-logr/stdr v1.2.2 // indirect
|
||||
github.com/go-playground/locales v0.14.1 // indirect
|
||||
github.com/go-playground/universal-translator v0.18.1 // indirect
|
||||
github.com/go-playground/validator/v10 v10.20.0 // indirect
|
||||
github.com/goccy/go-json v0.10.2 // indirect
|
||||
github.com/go-playground/validator/v10 v10.30.1 // indirect
|
||||
github.com/goccy/go-json v0.10.5 // indirect
|
||||
github.com/goccy/go-yaml v1.19.2 // indirect
|
||||
github.com/json-iterator/go v1.1.12 // indirect
|
||||
github.com/klauspost/cpuid/v2 v2.2.7 // indirect
|
||||
github.com/klauspost/cpuid/v2 v2.3.0 // indirect
|
||||
github.com/leodido/go-urn v1.4.0 // indirect
|
||||
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||
github.com/moby/docker-image-spec v1.3.1 // indirect
|
||||
@ -57,11 +57,14 @@ require (
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
github.com/morikuni/aec v1.1.0 // indirect
|
||||
github.com/opencontainers/go-digest v1.0.0 // indirect
|
||||
github.com/pelletier/go-toml/v2 v2.2.2 // indirect
|
||||
github.com/pelletier/go-toml/v2 v2.2.4 // indirect
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/quic-go/qpack v0.6.0 // indirect
|
||||
github.com/quic-go/quic-go v0.59.0 // indirect
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
|
||||
github.com/ugorji/go/codec v1.2.12 // indirect
|
||||
github.com/ugorji/go/codec v1.3.1 // indirect
|
||||
github.com/yuin/gopher-lua v1.1.1 // indirect
|
||||
go.mongodb.org/mongo-driver/v2 v2.5.0 // indirect
|
||||
go.opentelemetry.io/auto/sdk v1.2.1 // indirect
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.67.0 // indirect
|
||||
go.opentelemetry.io/otel v1.43.0 // indirect
|
||||
@ -70,10 +73,11 @@ require (
|
||||
go.opentelemetry.io/otel/sdk v1.43.0 // indirect
|
||||
go.opentelemetry.io/otel/sdk/metric v1.43.0 // indirect
|
||||
go.opentelemetry.io/otel/trace v1.43.0 // indirect
|
||||
golang.org/x/arch v0.8.0 // indirect
|
||||
go.uber.org/atomic v1.11.0 // indirect
|
||||
golang.org/x/arch v0.23.0 // indirect
|
||||
golang.org/x/net v0.52.0 // indirect
|
||||
golang.org/x/sys v0.42.0 // indirect
|
||||
golang.org/x/text v0.35.0 // indirect
|
||||
golang.org/x/sys v0.43.0 // indirect
|
||||
golang.org/x/text v0.36.0 // indirect
|
||||
golang.org/x/time v0.15.0 // indirect
|
||||
google.golang.org/protobuf v1.36.11 // indirect
|
||||
gotest.tools/v3 v3.5.2 // indirect
|
||||
|
||||
@ -2,8 +2,8 @@ github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c h1:udKWzYgxTojEK
|
||||
github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
|
||||
github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU=
|
||||
github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU=
|
||||
github.com/Microsoft/go-winio v0.4.21 h1:+6mVbXh4wPzUrl1COX9A+ZCvEpYsOBZ6/+kwDnvLyro=
|
||||
github.com/Microsoft/go-winio v0.4.21/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84=
|
||||
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
|
||||
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
|
||||
github.com/Molecule-AI/molecule-ai-plugin-gh-identity v0.0.0-20260424033845-4fd5ac7be30f h1:YkLRhUg+9qr9OV9N8dG1Hj0Ml7TThHlRwh5F//oUJVs=
|
||||
github.com/Molecule-AI/molecule-ai-plugin-gh-identity v0.0.0-20260424033845-4fd5ac7be30f/go.mod h1:NqdtlWZDJvpXNJRHnMkPhTKHdA1LZTNH+63TB66JSOU=
|
||||
github.com/Molecule-AI/molecule-ai-plugin-github-app-auth v0.0.0-20260421064811-7d98ae51e31d h1:GpYhP6FxaJZc1Ljy5/YJ9ZIVGvfOqZBmDolNr2S5x2g=
|
||||
@ -14,18 +14,18 @@ github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
|
||||
github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c=
|
||||
github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
|
||||
github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
|
||||
github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0=
|
||||
github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4=
|
||||
github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM=
|
||||
github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
|
||||
github.com/bytedance/gopkg v0.1.3 h1:TPBSwH8RsouGCBcMBktLt1AymVo2TVsBVCY4b6TnZ/M=
|
||||
github.com/bytedance/gopkg v0.1.3/go.mod h1:576VvJ+eJgyCzdjS+c4+77QF3p7ubbtiKARP3TxducM=
|
||||
github.com/bytedance/sonic v1.15.0 h1:/PXeWFaR5ElNcVE84U0dOHjiMHQOwNIx3K4ymzh/uSE=
|
||||
github.com/bytedance/sonic v1.15.0/go.mod h1:tFkWrPz0/CUCLEF4ri4UkHekCIcdnkqXw9VduqpJh0k=
|
||||
github.com/bytedance/sonic/loader v0.5.0 h1:gXH3KVnatgY7loH5/TkeVyXPfESoqSBSBEiDd5VjlgE=
|
||||
github.com/bytedance/sonic/loader v0.5.0/go.mod h1:AR4NYCk5DdzZizZ5djGqQ92eEhCCcdf5x77udYiSJRo=
|
||||
github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM=
|
||||
github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
|
||||
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
||||
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
|
||||
github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
|
||||
github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg=
|
||||
github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
|
||||
github.com/cloudwego/base64x v0.1.6 h1:t11wG9AECkCDk5fMSoxmufanudBtJ+/HemLstXDLI2M=
|
||||
github.com/cloudwego/base64x v0.1.6/go.mod h1:OFcloc187FXDaYHvrNIjxSe8ncn0OOM8gEHfghB2IPU=
|
||||
github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI=
|
||||
github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M=
|
||||
github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE=
|
||||
@ -37,26 +37,24 @@ github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
|
||||
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
|
||||
github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
|
||||
github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
|
||||
github.com/docker/docker v28.5.2+incompatible h1:DBX0Y0zAjZbSrm1uzOkdr1onVghKaftjlSWt4AFexzM=
|
||||
github.com/docker/docker v28.5.2+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
|
||||
github.com/docker/go-connections v0.6.0 h1:LlMG9azAe1TqfR7sO+NJttz1gy6KO7VJBh+pMmjSD94=
|
||||
github.com/docker/go-connections v0.6.0/go.mod h1:AahvXYshr6JgfUJGdDCs2b5EZG/vmaMAntpSFH5BFKE=
|
||||
github.com/docker/go-connections v0.7.0 h1:6SsRfJddP22WMrCkj19x9WKjEDTB+ahsdiGYf0mN39c=
|
||||
github.com/docker/go-connections v0.7.0/go.mod h1:no1qkHdjq7kLMGUXYAduOhYPSJxxvgWBh7ogVvptn3Q=
|
||||
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
|
||||
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
|
||||
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
|
||||
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
|
||||
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
|
||||
github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
|
||||
github.com/gin-contrib/cors v1.7.2 h1:oLDHxdg8W/XDoN/8zamqk/Drgt4oVZDvaV0YmvVICQw=
|
||||
github.com/gin-contrib/cors v1.7.2/go.mod h1:SUJVARKgQ40dmrzgXEVxj2m7Ig1v1qIboQkPDTQ9t2E=
|
||||
github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
|
||||
github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
|
||||
github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU=
|
||||
github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
|
||||
github.com/gabriel-vasile/mimetype v1.4.12 h1:e9hWvmLYvtp846tLHam2o++qitpguFiYCKbn0w9jyqw=
|
||||
github.com/gabriel-vasile/mimetype v1.4.12/go.mod h1:d+9Oxyo1wTzWdyVUPMmXFvp4F9tea18J8ufA774AB3s=
|
||||
github.com/gin-contrib/cors v1.7.7 h1:Oh9joP463x7Mw72vhvJ61YQm8ODh9b04YR7vsOErD0Q=
|
||||
github.com/gin-contrib/cors v1.7.7/go.mod h1:K5tW0RkzJtWSiOdikXloy8VEZlgdVNpHNw8FpjUPNrE=
|
||||
github.com/gin-contrib/sse v1.1.0 h1:n0w2GMuUpWDVp7qSpvze6fAu9iRxJY4Hmj6AmBOU05w=
|
||||
github.com/gin-contrib/sse v1.1.0/go.mod h1:hxRZ5gVpWMT7Z0B0gSNYqqsSCNIJMjzvm6fqCz9vjwM=
|
||||
github.com/gin-gonic/gin v1.12.0 h1:b3YAbrZtnf8N//yjKeU2+MQsh2mY5htkZidOM7O0wG8=
|
||||
github.com/gin-gonic/gin v1.12.0/go.mod h1:VxccKfsSllpKshkBWgVgRniFFAzFb9csfngsqANjnLc=
|
||||
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
||||
github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
|
||||
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
|
||||
@ -68,14 +66,16 @@ github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/o
|
||||
github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
|
||||
github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
|
||||
github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
|
||||
github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBExVwjEviJTixqxL8=
|
||||
github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
|
||||
github.com/go-playground/validator/v10 v10.30.1 h1:f3zDSN/zOma+w6+1Wswgd9fLkdwy06ntQJp0BBvFG0w=
|
||||
github.com/go-playground/validator/v10 v10.30.1/go.mod h1:oSuBIQzuJxL//3MelwSLD5hc2Tu889bF0Idm9Dg26cM=
|
||||
github.com/go-telegram-bot-api/telegram-bot-api/v5 v5.5.1 h1:wG8n/XJQ07TmjbITcGiUaOtXxdrINDz1b0J1w0SzqDc=
|
||||
github.com/go-telegram-bot-api/telegram-bot-api/v5 v5.5.1/go.mod h1:A2S0CWkNylc2phvKXWBBdD3K0iGnDBGbzRpISP2zBl8=
|
||||
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
|
||||
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
||||
github.com/golang-jwt/jwt/v5 v5.2.2 h1:Rl4B7itRWVtYIHFrSNd7vhTiz9UpLdi6gZhZ3wEeDy8=
|
||||
github.com/golang-jwt/jwt/v5 v5.2.2/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk=
|
||||
github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4=
|
||||
github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
|
||||
github.com/goccy/go-yaml v1.19.2 h1:PmFC1S6h8ljIz6gMRBopkjP1TVT7xuwrButHID66PoM=
|
||||
github.com/goccy/go-yaml v1.19.2/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA=
|
||||
github.com/golang-jwt/jwt/v5 v5.3.1 h1:kYf81DTWFe7t+1VvL7eS+jKFVWaUnK9cB1qbwn63YCY=
|
||||
github.com/golang-jwt/jwt/v5 v5.3.1/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE=
|
||||
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||
@ -88,10 +88,8 @@ github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0/go.mod h1:JfhWUomR1baixubs02l8
|
||||
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
||||
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
||||
github.com/kisielk/sqlstruct v0.0.0-20201105191214-5f3e10d3ab46/go.mod h1:yyMNCyc/Ib3bDTKd379tNMpB/7/H5TjM2Y9QJ5THLbE=
|
||||
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
|
||||
github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=
|
||||
github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
|
||||
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
|
||||
github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y=
|
||||
github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0=
|
||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
@ -121,41 +119,45 @@ github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8
|
||||
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
|
||||
github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040=
|
||||
github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M=
|
||||
github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM=
|
||||
github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs=
|
||||
github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4=
|
||||
github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
|
||||
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/redis/go-redis/v9 v9.7.3 h1:YpPyAayJV+XErNsatSElgRZZVCwXX9QzkKYNvO7x0wM=
|
||||
github.com/redis/go-redis/v9 v9.7.3/go.mod h1:bGUrSggJ9X9GUmZpZNEOQKaANxSGgOEBRltRTZHSvrA=
|
||||
github.com/quic-go/qpack v0.6.0 h1:g7W+BMYynC1LbYLSqRt8PBg5Tgwxn214ZZR34VIOjz8=
|
||||
github.com/quic-go/qpack v0.6.0/go.mod h1:lUpLKChi8njB4ty2bFLX2x4gzDqXwUpaO1DP9qMDZII=
|
||||
github.com/quic-go/quic-go v0.59.0 h1:OLJkp1Mlm/aS7dpKgTc6cnpynnD2Xg7C1pwL6vy/SAw=
|
||||
github.com/quic-go/quic-go v0.59.0/go.mod h1:upnsH4Ju1YkqpLXC305eW3yDZ4NfnNbmQRCMWS58IKU=
|
||||
github.com/redis/go-redis/v9 v9.19.0 h1:XPVaaPSnG6RhYf7p+rmSa9zZfeVAnWsH5h3lxthOm/k=
|
||||
github.com/redis/go-redis/v9 v9.19.0/go.mod h1:v/M13XI1PVCDcm01VtPFOADfZtHf8YW3baQf57KlIkA=
|
||||
github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs=
|
||||
github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro=
|
||||
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
|
||||
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
|
||||
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
|
||||
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
|
||||
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
||||
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
|
||||
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
|
||||
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
|
||||
github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
|
||||
github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
|
||||
github.com/ugorji/go/codec v1.3.1 h1:waO7eEiFDwidsBN6agj1vJQ4AG7lh2yqXyOXqhgQuyY=
|
||||
github.com/ugorji/go/codec v1.3.1/go.mod h1:pRBVtBSKl77K30Bv8R2P+cLSGaTtex6fsA2Wjqmfxj4=
|
||||
github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M=
|
||||
github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw=
|
||||
github.com/zeebo/xxh3 v1.1.0 h1:s7DLGDK45Dyfg7++yxI0khrfwq9661w9EN78eP/UZVs=
|
||||
github.com/zeebo/xxh3 v1.1.0/go.mod h1:IisAie1LELR4xhVinxWS5+zf1lA4p0MW4T+w+W07F5s=
|
||||
go.mongodb.org/mongo-driver/v2 v2.5.0 h1:yXUhImUjjAInNcpTcAlPHiT7bIXhshCTL3jVBkF3xaE=
|
||||
go.mongodb.org/mongo-driver/v2 v2.5.0/go.mod h1:yOI9kBsufol30iFsl1slpdq1I0eHPzybRWdyYUs8K/0=
|
||||
go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64=
|
||||
go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.67.0 h1:OyrsyzuttWTSur2qN/Lm0m2a8yqyIjUVBZcxFPuXq2o=
|
||||
@ -176,21 +178,21 @@ go.opentelemetry.io/otel/trace v1.43.0 h1:BkNrHpup+4k4w+ZZ86CZoHHEkohws8AY+WTX09
|
||||
go.opentelemetry.io/otel/trace v1.43.0/go.mod h1:/QJhyVBUUswCphDVxq+8mld+AvhXZLhe+8WVFxiFff0=
|
||||
go.opentelemetry.io/proto/otlp v1.10.0 h1:IQRWgT5srOCYfiWnpqUYz9CVmbO8bFmKcwYxpuCSL2g=
|
||||
go.opentelemetry.io/proto/otlp v1.10.0/go.mod h1:/CV4QoCR/S9yaPj8utp3lvQPoqMtxXdzn7ozvvozVqk=
|
||||
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
|
||||
golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
|
||||
golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
|
||||
golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4=
|
||||
golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA=
|
||||
go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE=
|
||||
go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0=
|
||||
go.uber.org/mock v0.6.0 h1:hyF9dfmbgIX5EfOdasqLsWD6xqpNZlXblLB/Dbnwv3Y=
|
||||
go.uber.org/mock v0.6.0/go.mod h1:KiVJ4BqZJaMj4svdfmHM0AUx4NJYO8ZNpPnZn1Z+BBU=
|
||||
golang.org/x/arch v0.23.0 h1:lKF64A2jF6Zd8L0knGltUnegD62JMFBiCPBmQpToHhg=
|
||||
golang.org/x/arch v0.23.0/go.mod h1:dNHoOeKiyja7GTvF9NJS1l3Z2yntpQNzgrjh1cU103A=
|
||||
golang.org/x/crypto v0.50.0 h1:zO47/JPrL6vsNkINmLoo/PH1gcxpls50DNogFvB5ZGI=
|
||||
golang.org/x/crypto v0.50.0/go.mod h1:3muZ7vA7PBCE6xgPX7nkzzjiUq87kRItoJQM1Yo8S+Q=
|
||||
golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0=
|
||||
golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw=
|
||||
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
|
||||
golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
|
||||
golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8=
|
||||
golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA=
|
||||
golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI=
|
||||
golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
|
||||
golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg=
|
||||
golang.org/x/text v0.36.0/go.mod h1:NIdBknypM8iqVmPiuco0Dh6P5Jcdk8lJL0CUebqK164=
|
||||
golang.org/x/time v0.15.0 h1:bbrp8t3bGUeFOx08pvsMYRTCVSMk89u4tKbNOZbp88U=
|
||||
golang.org/x/time v0.15.0/go.mod h1:Y4YMaQmXwGQZoFaVFk4YpCt4FLQMYKZe9oeV/f4MSno=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 h1:VPWxll4HlMw1Vs/qXtN7BvhZqsS9cdAittCNvVENElA=
|
||||
@ -209,5 +211,3 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gotest.tools/v3 v3.5.2 h1:7koQfIKdy+I8UTetycgUqXWSDwpgv193Ka+qRsmBY8Q=
|
||||
gotest.tools/v3 v3.5.2/go.mod h1:LtdLGcnqToBH83WByAAi/wiwSFCArdFIUV/xxN4pcjA=
|
||||
nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=
|
||||
rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
|
||||
|
||||
@ -27,6 +27,7 @@ from a2a_tools import (
|
||||
tool_recall_memory,
|
||||
tool_send_message_to_user,
|
||||
)
|
||||
from platform_tools.registry import TOOLS as _PLATFORM_TOOL_SPECS
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -45,158 +46,27 @@ from a2a_client import ( # noqa: F401, E402
|
||||
from a2a_tools import report_activity # noqa: F401, E402
|
||||
|
||||
# --- Tool definitions (schemas) ---
|
||||
#
|
||||
# Built once at import time from the platform_tools registry. The MCP
|
||||
# `description` field is the spec's `short` line — that's the unified
|
||||
# tool description used by both the MCP tool listing AND the bullet
|
||||
# rendering in the agent-facing system-prompt section. The deeper
|
||||
# `when_to_use` guidance is appended to the system prompt only (it's
|
||||
# too long to live in MCP `description` without bloating every
|
||||
# tool-list response the model sees).
|
||||
|
||||
TOOLS = [
|
||||
{
|
||||
"name": "delegate_task",
|
||||
"description": "Delegate a task to another workspace via A2A protocol and WAIT for the response. Use for quick tasks. The target must be a peer (sibling or parent/child). Use list_peers to find available targets.",
|
||||
"inputSchema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"workspace_id": {
|
||||
"type": "string",
|
||||
"description": "Target workspace ID (from list_peers)",
|
||||
},
|
||||
"task": {
|
||||
"type": "string",
|
||||
"description": "The task description to send to the target workspace",
|
||||
},
|
||||
},
|
||||
"required": ["workspace_id", "task"],
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "delegate_task_async",
|
||||
"description": "Send a task to another workspace with a short timeout (fire-and-forget). Returns immediately — the target continues processing. Best when you don't need the result right away. Note: check_task_status may not work with all workspace implementations.",
|
||||
"inputSchema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"workspace_id": {
|
||||
"type": "string",
|
||||
"description": "Target workspace ID (from list_peers)",
|
||||
},
|
||||
"task": {
|
||||
"type": "string",
|
||||
"description": "The task description to send to the target workspace",
|
||||
},
|
||||
},
|
||||
"required": ["workspace_id", "task"],
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "check_task_status",
|
||||
"description": "Check the status of a previously submitted async task via tasks/get. Note: only works if the target workspace's A2A implementation supports task persistence. May return 'not found' for completed tasks.",
|
||||
"inputSchema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"workspace_id": {
|
||||
"type": "string",
|
||||
"description": "The workspace ID the task was sent to",
|
||||
},
|
||||
"task_id": {
|
||||
"type": "string",
|
||||
"description": "The task_id returned by delegate_task_async",
|
||||
},
|
||||
},
|
||||
"required": ["workspace_id", "task_id"],
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "list_peers",
|
||||
"description": "List all workspaces this agent can communicate with (siblings and parent/children). Returns name, ID, status, and role for each peer.",
|
||||
"inputSchema": {"type": "object", "properties": {}},
|
||||
},
|
||||
{
|
||||
"name": "get_workspace_info",
|
||||
"description": "Get this workspace's own info — ID, name, role, tier, parent, status.",
|
||||
"inputSchema": {"type": "object", "properties": {}},
|
||||
},
|
||||
{
|
||||
"name": "send_message_to_user",
|
||||
"description": "Send a message directly to the user's canvas chat — pushed instantly via WebSocket. Use this to: (1) acknowledge a task immediately ('Got it, I'll start working on this'), (2) send interim progress updates while doing long work, (3) deliver follow-up results after delegation completes, (4) attach files (zip, pdf, csv, image) for the user to download via the `attachments` field (NEVER paste file URLs in `message`). The message appears in the user's chat as if you're proactively reaching out.",
|
||||
"inputSchema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"message": {
|
||||
"type": "string",
|
||||
# The "no URLs in message text" rule is the single biggest
|
||||
# cause of bad chat UX: agents drop catbox.moe / file://
|
||||
# / temporary upload-host links into the prose, the
|
||||
# canvas renders them as plain markdown links the user
|
||||
# can't preview, and SaaS deployments often can't even
|
||||
# reach those external hosts. Every download MUST go
|
||||
# through the structured `attachments` field below.
|
||||
"description": (
|
||||
"Caption text for the chat bubble. Required even when sending "
|
||||
"attachments — set to a short label like 'Here's the build:' "
|
||||
"or 'Done — see attached.'\n\n"
|
||||
"DO NOT paste file URLs, download links, or container paths in "
|
||||
"this string. Files MUST go through the `attachments` field, "
|
||||
"which renders as a clickable download chip and works on SaaS "
|
||||
"deployments where external file-host URLs (catbox.moe, file://, "
|
||||
"etc.) are unreachable from the user's browser."
|
||||
),
|
||||
},
|
||||
"attachments": {
|
||||
"type": "array",
|
||||
"description": (
|
||||
"REQUIRED for any file delivery. Pass absolute file paths inside "
|
||||
"THIS container (e.g. ['/tmp/build.zip', '/workspace/report.pdf']) "
|
||||
"— the platform uploads each file and returns a download chip "
|
||||
"with the file's icon + name + size in the user's chat. The chip "
|
||||
"works in SaaS deployments because the URL is platform-served, "
|
||||
"not an external host.\n\n"
|
||||
"USE THIS instead of: pasting URLs in `message`, base64-encoding "
|
||||
"in the body, or telling the user to look at a path on disk. "
|
||||
"If the file isn't already on disk, write it first (Bash, Write "
|
||||
"tool, etc.) then pass its path here. 25 MB per file cap."
|
||||
),
|
||||
"items": {"type": "string"},
|
||||
},
|
||||
},
|
||||
"required": ["message"],
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "commit_memory",
|
||||
"description": "Append a new memory row to persistent storage. Each call CREATES a row — does not overwrite existing memories with the same content. Use to remember decisions, task results, and context that should survive a restart. Scope: LOCAL (this workspace only), TEAM (parent + siblings), GLOBAL (entire org). GLOBAL writes require tier-0 (root) workspace; lower-tier callers get an RBAC error.",
|
||||
"inputSchema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {
|
||||
"type": "string",
|
||||
"description": "The information to remember — be detailed and specific",
|
||||
},
|
||||
"scope": {
|
||||
"type": "string",
|
||||
"enum": ["LOCAL", "TEAM", "GLOBAL"],
|
||||
"description": "Memory scope (default: LOCAL)",
|
||||
},
|
||||
},
|
||||
"required": ["content"],
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "recall_memory",
|
||||
"description": "Substring-search persistent memory and return ALL matching rows (no pagination). Empty query returns every memory accessible at the given scope. Server-side filter is case-insensitive substring match on `content`. Use at the start of conversations to recall prior context — calling once with empty query is cheap and avoids missing relevant memories that don't match a narrow keyword.",
|
||||
"inputSchema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "Search query (empty returns all memories)",
|
||||
},
|
||||
"scope": {
|
||||
"type": "string",
|
||||
"enum": ["LOCAL", "TEAM", "GLOBAL", ""],
|
||||
"description": "Filter by scope (empty returns all accessible)",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
"name": _spec.name,
|
||||
"description": _spec.short,
|
||||
"inputSchema": _spec.input_schema,
|
||||
}
|
||||
for _spec in _PLATFORM_TOOL_SPECS
|
||||
]
|
||||
|
||||
|
||||
|
||||
|
||||
# --- Tool dispatch ---
|
||||
|
||||
async def handle_tool_call(name: str, arguments: dict) -> str:
|
||||
|
||||
@ -421,8 +421,8 @@ class BaseAdapter(ABC):
|
||||
from coordinator import get_children, get_parent_context, build_children_description
|
||||
from prompt import build_system_prompt, get_peer_capabilities, get_platform_instructions
|
||||
from builtin_tools.approval import request_approval
|
||||
from builtin_tools.delegation import delegate_to_workspace, check_delegation_status
|
||||
from builtin_tools.memory import commit_memory, search_memory
|
||||
from builtin_tools.delegation import delegate_task, delegate_task_async, check_task_status
|
||||
from builtin_tools.memory import commit_memory, recall_memory
|
||||
from builtin_tools.sandbox import run_code
|
||||
|
||||
platform_url = os.environ.get("PLATFORM_URL", "http://host.docker.internal:8080")
|
||||
@ -455,8 +455,14 @@ class BaseAdapter(ABC):
|
||||
seen_skill_ids.add(skill.metadata.id)
|
||||
logger.info(f"Loaded {len(loaded_skills)} skills: {[s.metadata.id for s in loaded_skills]}")
|
||||
|
||||
# Assemble tools: 6 core + skill tools
|
||||
all_tools = [delegate_to_workspace, check_delegation_status, request_approval, commit_memory, search_memory, run_code]
|
||||
# Core platform tools — names mirror the platform_tools registry,
|
||||
# so the names referenced in get_a2a_instructions/get_hma_instructions
|
||||
# are guaranteed to exist as @tool symbols here. The structural
|
||||
# alignment test in tests/test_platform_tools.py pins this.
|
||||
all_tools = [
|
||||
delegate_task, delegate_task_async, check_task_status,
|
||||
request_approval, commit_memory, recall_memory, run_code,
|
||||
]
|
||||
for skill in loaded_skills:
|
||||
all_tools.extend(skill.tools)
|
||||
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
Delegations are non-blocking: the tool fires the A2A request in the background
|
||||
and returns immediately with a task_id. The agent can check status anytime via
|
||||
check_delegation_status, or just continue working and check later.
|
||||
check_task_status, or just continue working and check later.
|
||||
|
||||
When the delegate responds, the result is stored and the agent is notified
|
||||
via a status update.
|
||||
@ -44,7 +44,7 @@ class DelegationStatus(str, Enum):
|
||||
# The reply will arrive via the platform's stitch path when the
|
||||
# peer finishes its current work. The LLM should WAIT, not retry,
|
||||
# and definitely not fall back to doing the work itself — see the
|
||||
# check_delegation_status docstring for the prompt-side guidance.
|
||||
# check_task_status docstring for the prompt-side guidance.
|
||||
QUEUED = "queued"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
@ -110,7 +110,7 @@ async def _record_delegation_on_platform(task_id: str, target_workspace_id: str,
|
||||
Best-effort POST to /workspaces/<self>/delegations/record. The agent still
|
||||
fires A2A directly for speed + OTEL propagation, but the platform's
|
||||
GET /delegations endpoint now mirrors the same set an agent's local
|
||||
check_delegation_status sees.
|
||||
check_task_status sees.
|
||||
"""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10) as client:
|
||||
@ -129,11 +129,11 @@ async def _record_delegation_on_platform(task_id: str, target_workspace_id: str,
|
||||
async def _refresh_queued_from_platform(task_id: str) -> bool:
|
||||
"""Lazy-refresh a QUEUED delegation's local state from the platform.
|
||||
|
||||
Called by check_delegation_status when local status is QUEUED. The
|
||||
Called by check_task_status when local status is QUEUED. The
|
||||
platform's drain stitch (a2a_queue.go) updates the delegate_result
|
||||
activity_logs row when a queued delegation eventually completes,
|
||||
but it has no callback to this runtime — without this lazy refresh,
|
||||
the LLM polling check_delegation_status would see "queued" forever
|
||||
the LLM polling check_task_status would see "queued" forever
|
||||
even after the platform has the result.
|
||||
|
||||
Returns True if the local delegation was updated to a terminal state
|
||||
@ -215,7 +215,7 @@ async def _execute_delegation(task_id: str, workspace_id: str, task: str):
|
||||
delegation.status = DelegationStatus.IN_PROGRESS
|
||||
|
||||
# #64: register on the platform so GET /workspaces/<self>/delegations
|
||||
# sees the same set as check_delegation_status. Best-effort — platform
|
||||
# sees the same set as check_task_status. Best-effort — platform
|
||||
# unreachability must not block the actual A2A delegation.
|
||||
await _record_delegation_on_platform(task_id, workspace_id, task)
|
||||
|
||||
@ -286,7 +286,7 @@ async def _execute_delegation(task_id: str, workspace_id: str, task: str):
|
||||
# accepted the request but the peer's runtime is
|
||||
# mid-task. Platform-side drain will deliver the
|
||||
# reply asynchronously. Mark QUEUED locally so
|
||||
# check_delegation_status can surface that state
|
||||
# check_task_status can surface that state
|
||||
# to the LLM with explicit "wait, don't bypass"
|
||||
# guidance. Do NOT mark FAILED — the request is
|
||||
# alive in the platform's queue, not lost.
|
||||
@ -371,14 +371,36 @@ async def _execute_delegation(task_id: str, workspace_id: str, task: str):
|
||||
|
||||
|
||||
@tool
|
||||
async def delegate_to_workspace(
|
||||
async def delegate_task(
|
||||
workspace_id: str,
|
||||
task: str,
|
||||
) -> str:
|
||||
"""Delegate a task to a peer workspace via A2A and WAIT for the response.
|
||||
|
||||
Synchronous variant — blocks until the peer replies (or the platform's
|
||||
A2A round-trip times out). Use this for QUICK questions and small
|
||||
sub-tasks where you can afford to wait inline.
|
||||
|
||||
For longer-running work (research, multi-minute jobs) use
|
||||
delegate_task_async + check_task_status instead so you don't hold
|
||||
this workspace busy waiting.
|
||||
|
||||
Tool name + description are sourced from the platform_tools registry —
|
||||
a single ToolSpec drives MCP, LangChain, and system-prompt docs.
|
||||
"""
|
||||
from a2a_tools import tool_delegate_task
|
||||
return await tool_delegate_task(workspace_id, task)
|
||||
|
||||
|
||||
@tool
|
||||
async def delegate_task_async(
|
||||
workspace_id: str,
|
||||
task: str,
|
||||
) -> dict:
|
||||
"""Delegate a task to a peer workspace via A2A protocol (non-blocking).
|
||||
|
||||
Sends the task in the background and returns immediately with a task_id.
|
||||
Use check_delegation_status to poll for the result, or continue working
|
||||
Use check_task_status to poll for the result, or continue working
|
||||
and check later. The delegate works independently.
|
||||
|
||||
Args:
|
||||
@ -386,7 +408,7 @@ async def delegate_to_workspace(
|
||||
task: The task description to send to the peer.
|
||||
|
||||
Returns:
|
||||
A dict with task_id and status="delegated". Use check_delegation_status(task_id) to get results.
|
||||
A dict with task_id and status="delegated". Use check_task_status(task_id) to get results.
|
||||
"""
|
||||
task_id = str(uuid.uuid4())
|
||||
|
||||
@ -417,12 +439,12 @@ async def delegate_to_workspace(
|
||||
"success": True,
|
||||
"task_id": task_id,
|
||||
"status": "delegated",
|
||||
"message": f"Task delegated to {workspace_id}. Use check_delegation_status('{task_id}') to get the result when ready.",
|
||||
"message": f"Task delegated to {workspace_id}. Use check_task_status('{task_id}') to get the result when ready.",
|
||||
}
|
||||
|
||||
|
||||
@tool
|
||||
async def check_delegation_status(
|
||||
async def check_task_status(
|
||||
task_id: str = "",
|
||||
) -> dict:
|
||||
"""Check the status of a delegated task, or list all active delegations.
|
||||
@ -434,7 +456,7 @@ async def check_delegation_status(
|
||||
processing a prior task. The reply WILL arrive — the platform's
|
||||
drain re-dispatches when the peer is free. This tool transparently
|
||||
polls the platform for the eventual outcome on each call, so
|
||||
keep polling check_delegation_status periodically and you'll see
|
||||
keep polling check_task_status periodically and you'll see
|
||||
the status flip to "completed" / "failed" automatically.
|
||||
Do NOT retry the delegation. Do NOT do the work yourself.
|
||||
Acknowledge to the user that the peer is busy and will reply,
|
||||
@ -445,7 +467,7 @@ async def check_delegation_status(
|
||||
yourself if status is "failed", never if status is "queued".
|
||||
|
||||
Args:
|
||||
task_id: The task_id returned by delegate_to_workspace. If empty, lists all delegations.
|
||||
task_id: The task_id returned by delegate_task_async. If empty, lists all delegations.
|
||||
|
||||
Returns:
|
||||
Status and result (if completed) of the delegation.
|
||||
@ -493,4 +515,14 @@ async def check_delegation_status(
|
||||
elif delegation.status == DelegationStatus.FAILED:
|
||||
result["error"] = delegation.error
|
||||
|
||||
# RFC #2251 V1.0 reproduction-harness instrumentation. Every poll of
|
||||
# check_task_status emits a phase=check_status line so the harness
|
||||
# operator can tell whether a coordinator stuck for 8 minutes was
|
||||
# polling-children-the-whole-time vs synthesizing-after-children-done.
|
||||
# `grep rfc2251_phase=check_status` in the workspace's container log
|
||||
# gives the polling pattern. Strip when V1.0 ships.
|
||||
logger.info(
|
||||
"rfc2251_phase=check_status task_id=%s peer=%s status=%s",
|
||||
task_id, delegation.workspace_id, delegation.status.value,
|
||||
)
|
||||
return result
|
||||
|
||||
@ -8,7 +8,7 @@ Hierarchical Memory Architecture:
|
||||
RBAC enforcement
|
||||
----------------
|
||||
``commit_memory`` requires the ``"memory.write"`` action.
|
||||
``search_memory`` requires the ``"memory.read"`` action.
|
||||
``recall_memory`` requires the ``"memory.read"`` action.
|
||||
Roles are read from ``config.yaml`` under ``rbac.roles`` (default: operator).
|
||||
|
||||
Audit trail
|
||||
@ -188,7 +188,7 @@ async def commit_memory(content: str, scope: str = "LOCAL") -> dict:
|
||||
|
||||
|
||||
@tool
|
||||
async def search_memory(query: str = "", scope: str = "") -> dict:
|
||||
async def recall_memory(query: str = "", scope: str = "") -> dict:
|
||||
"""Search stored memories.
|
||||
|
||||
Args:
|
||||
|
||||
@ -81,7 +81,7 @@ def build_children_description(children: list[dict]) -> str:
|
||||
children,
|
||||
heading="## Your Team (sub-workspaces you coordinate)",
|
||||
instruction=(
|
||||
"Use the `delegate_to_workspace` tool to send tasks to the chosen member. "
|
||||
"Use the `delegate_task_async` tool to send tasks to the chosen member. "
|
||||
"Only delegate to members listed above."
|
||||
),
|
||||
)
|
||||
@ -92,7 +92,7 @@ def build_children_description(children: list[dict]) -> str:
|
||||
"",
|
||||
"### Coordination Rules — MANDATORY",
|
||||
"1. You are a COORDINATOR. Your ONLY job is to delegate and synthesize. NEVER do the work yourself.",
|
||||
"2. For EVERY task, use `delegate_to_workspace` to send it to the appropriate team member(s). "
|
||||
"2. For EVERY task, use `delegate_task_async` to send it to the appropriate team member(s). "
|
||||
"Do this BEFORE writing any analysis, code, or research yourself.",
|
||||
"3. If a task spans multiple members, delegate to ALL of them in parallel and aggregate results.",
|
||||
"4. If ALL members are offline/paused, tell the caller which members are unavailable. "
|
||||
@ -120,23 +120,56 @@ async def route_task_to_team(
|
||||
task: The task description to route.
|
||||
preferred_member_id: Optional — directly delegate to this member.
|
||||
"""
|
||||
from builtin_tools.delegation import delegate_to_workspace as delegate
|
||||
import time
|
||||
from builtin_tools.delegation import delegate_task_async as delegate
|
||||
|
||||
# RFC #2251 V1.0 reproduction-harness instrumentation. Phase-tagged log
|
||||
# lines correlate with scripts/measure-coordinator-task-bounds.sh's
|
||||
# external timing trace, so an operator running the harness against
|
||||
# staging can answer "what phase was the coordinator in at minute 7?".
|
||||
# `grep rfc2251_phase` on the workspace's container logs is the query.
|
||||
# Strip when V1.0 ships and the phase data lands in the structured
|
||||
# heartbeat payload instead.
|
||||
_phase_t0 = time.monotonic()
|
||||
logger.info(
|
||||
"rfc2251_phase=route_start task_chars=%d preferred_member_id=%s",
|
||||
len(task), preferred_member_id or "none",
|
||||
)
|
||||
|
||||
children = await get_children()
|
||||
logger.info(
|
||||
"rfc2251_phase=children_fetched count=%d elapsed_ms=%d",
|
||||
len(children), int((time.monotonic() - _phase_t0) * 1000),
|
||||
)
|
||||
|
||||
decision = build_team_routing_payload(
|
||||
children,
|
||||
task=task,
|
||||
preferred_member_id=preferred_member_id,
|
||||
)
|
||||
logger.info(
|
||||
"rfc2251_phase=routing_decided action=%s elapsed_ms=%d",
|
||||
decision.get("action", "unknown"), int((time.monotonic() - _phase_t0) * 1000),
|
||||
)
|
||||
|
||||
if decision.get("action") == "delegate_to_preferred_member":
|
||||
# Async delegation — returns immediately with task_id
|
||||
target = decision["preferred_member_id"]
|
||||
logger.info(
|
||||
"rfc2251_phase=delegate_invoked target=%s elapsed_ms=%d",
|
||||
target, int((time.monotonic() - _phase_t0) * 1000),
|
||||
)
|
||||
result = await delegate.ainvoke(
|
||||
{
|
||||
"workspace_id": decision["preferred_member_id"],
|
||||
"task": task,
|
||||
}
|
||||
{"workspace_id": target, "task": task}
|
||||
)
|
||||
logger.info(
|
||||
"rfc2251_phase=delegate_returned target=%s task_id=%s elapsed_ms=%d",
|
||||
target, result.get("task_id", "n/a"), int((time.monotonic() - _phase_t0) * 1000),
|
||||
)
|
||||
return result
|
||||
|
||||
logger.info(
|
||||
"rfc2251_phase=route_returning_decision_only elapsed_ms=%d",
|
||||
int((time.monotonic() - _phase_t0) * 1000),
|
||||
)
|
||||
return decision
|
||||
|
||||
@ -273,29 +273,19 @@ def get_system_prompt(config_path: str, fallback: str | None = None) -> str | No
|
||||
return fallback
|
||||
|
||||
|
||||
_A2A_INSTRUCTIONS_MCP = """## Inter-Agent Communication
|
||||
You have MCP tools for communicating with other workspaces:
|
||||
- list_peers: discover available peer workspaces (name, ID, status, role)
|
||||
- delegate_task: send a task and WAIT for the response (for quick tasks)
|
||||
- delegate_task_async: send a task and return immediately with a task_id (for long tasks)
|
||||
- check_task_status: poll an async task's status and get results when done
|
||||
- get_workspace_info: get your own workspace info
|
||||
|
||||
For quick questions, use delegate_task (synchronous).
|
||||
For long-running work (building pages, running audits), use delegate_task_async + check_task_status.
|
||||
Always use list_peers first to discover available workspace IDs.
|
||||
Access control is enforced — you can only reach siblings and parent/children.
|
||||
|
||||
PROACTIVE MESSAGING: Use send_message_to_user to push messages to the user's chat at ANY time:
|
||||
- Acknowledge tasks immediately: "Got it, delegating to the team now..."
|
||||
- Send progress updates during long work: "Research Lead finished, waiting on Dev Lead..."
|
||||
- Deliver follow-up results: "All teams reported back. Here's the synthesis: ..."
|
||||
This lets you respond quickly ("I'll work on this") and come back later with results.
|
||||
|
||||
If delegate_task returns a DELEGATION FAILED message, do NOT forward the raw error to the user.
|
||||
Instead: (1) try delegating to a different peer, (2) handle the task yourself, or
|
||||
(3) tell the user which peer is unavailable and provide your own best answer."""
|
||||
# Tool-usage instructions for system-prompt injection. Generated from
|
||||
# the platform_tools registry — every tool name, description, and usage
|
||||
# guidance comes from the canonical ToolSpec. Adding/renaming a tool in
|
||||
# registry.py automatically flows through here.
|
||||
|
||||
_A2A_FOOTER = (
|
||||
"Always use list_peers first to discover available workspace IDs. "
|
||||
"Access control is enforced — you can only reach siblings and parent/children. "
|
||||
"If a delegation returns a DELEGATION FAILED message, do NOT forward "
|
||||
"the raw error to the user. Instead: (1) try a different peer, "
|
||||
"(2) handle the task yourself, or (3) tell the user which peer is "
|
||||
"unavailable and provide your own best answer."
|
||||
)
|
||||
|
||||
_A2A_INSTRUCTIONS_CLI = """## Inter-Agent Communication
|
||||
You can delegate tasks to other workspaces using the a2a command:
|
||||
@ -309,39 +299,55 @@ For quick questions, use sync delegate. For long tasks, use --async + status.
|
||||
Only delegate to peers listed by the peers command (access control enforced)."""
|
||||
|
||||
|
||||
def _render_section(heading: str, specs, footer: str = "") -> str:
|
||||
"""Render a section: heading, per-tool bullet, per-tool when_to_use, footer."""
|
||||
parts = [heading, ""]
|
||||
for spec in specs:
|
||||
parts.append(f"- **{spec.name}**: {spec.short}")
|
||||
parts.append("")
|
||||
for spec in specs:
|
||||
parts.append(f"### {spec.name}")
|
||||
parts.append(spec.when_to_use)
|
||||
parts.append("")
|
||||
if footer:
|
||||
parts.append(footer)
|
||||
return "\n".join(parts).rstrip() + "\n"
|
||||
|
||||
|
||||
def get_a2a_instructions(mcp: bool = True) -> str:
|
||||
"""Return inter-agent communication instructions for system-prompt injection.
|
||||
|
||||
Pass `mcp=True` (default) for MCP-capable runtimes (Claude Code via SDK,
|
||||
Codex). Pass `mcp=False` for CLI-only runtimes (Ollama, custom) that have
|
||||
to call a2a_cli.py as a subprocess.
|
||||
Generated from the platform_tools registry. Pass `mcp=True` (default)
|
||||
for MCP-capable runtimes (claude-code, hermes, langchain, crewai).
|
||||
Pass `mcp=False` for CLI-only runtimes (ollama, custom subprocess
|
||||
runtimes that don't speak MCP) — those get a static block describing
|
||||
the molecule_runtime.a2a_cli subprocess interface instead.
|
||||
"""
|
||||
return _A2A_INSTRUCTIONS_MCP if mcp else _A2A_INSTRUCTIONS_CLI
|
||||
|
||||
|
||||
_HMA_INSTRUCTIONS = """## Hierarchical Memory (HMA)
|
||||
You have persistent memory tools that survive across sessions and restarts:
|
||||
|
||||
- **commit_memory(content, scope)**: Save important information.
|
||||
- LOCAL: private to you only (default)
|
||||
- TEAM: shared with your parent workspace and siblings (same team)
|
||||
- GLOBAL: shared with the entire org (only root workspaces can write)
|
||||
|
||||
- **recall_memory(query)**: Search your accessible memories. Returns LOCAL + TEAM + GLOBAL matches.
|
||||
|
||||
**When to use memory:**
|
||||
- After making a decision or learning something non-obvious → commit_memory("decision X because Y", scope="TEAM")
|
||||
- Before starting work → recall_memory("what did the team decide about X")
|
||||
- When you discover org-wide knowledge (repo locations, API patterns, conventions) → commit_memory(fact, scope="GLOBAL") if you are a root workspace, or scope="TEAM" to share with your team
|
||||
- After completing a task → commit_memory("completed task X, PR #N opened", scope="TEAM") so your lead and teammates know
|
||||
|
||||
**Memory is automatically recalled** at the start of each new session. Use it proactively during work to share context.
|
||||
"""
|
||||
if not mcp:
|
||||
return _A2A_INSTRUCTIONS_CLI
|
||||
from platform_tools.registry import a2a_tools
|
||||
return _render_section(
|
||||
"## Inter-Agent Communication",
|
||||
a2a_tools(),
|
||||
footer=_A2A_FOOTER,
|
||||
)
|
||||
|
||||
|
||||
def get_hma_instructions() -> str:
|
||||
"""Return HMA memory instructions for system-prompt injection."""
|
||||
return _HMA_INSTRUCTIONS
|
||||
"""Return HMA persistent-memory instructions for system-prompt injection.
|
||||
|
||||
Generated from the platform_tools registry.
|
||||
"""
|
||||
from platform_tools.registry import memory_tools
|
||||
return _render_section(
|
||||
"## Hierarchical Memory (HMA)",
|
||||
memory_tools(),
|
||||
footer=(
|
||||
"Memory is automatically recalled at the start of each new "
|
||||
"session. Use commit_memory proactively during work so future "
|
||||
"sessions and teammates can recall what you learned."
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
# ========================================================================
|
||||
|
||||
@ -337,11 +337,16 @@ async def main(): # pragma: no cover
|
||||
# Rebuild the agent's tool list from updated skills
|
||||
if hasattr(adapter, "all_tools") and hasattr(adapter, "system_prompt"):
|
||||
from builtin_tools.approval import request_approval
|
||||
from builtin_tools.delegation import delegate_to_workspace
|
||||
from builtin_tools.memory import commit_memory, search_memory
|
||||
from builtin_tools.delegation import delegate_task, delegate_task_async, check_task_status
|
||||
from builtin_tools.memory import commit_memory, recall_memory
|
||||
from builtin_tools.sandbox import run_code
|
||||
base_tools = [delegate_to_workspace, request_approval,
|
||||
commit_memory, search_memory, run_code]
|
||||
# Core platform tools mirror adapter_base.all_tools — must
|
||||
# match the platform_tools registry names so docs and tools
|
||||
# never drift.
|
||||
base_tools = [
|
||||
delegate_task, delegate_task_async, check_task_status,
|
||||
request_approval, commit_memory, recall_memory, run_code,
|
||||
]
|
||||
skill_tools = []
|
||||
for sk in adapter.loaded_skills:
|
||||
skill_tools.extend(sk.tools)
|
||||
|
||||
13
workspace/platform_tools/__init__.py
Normal file
13
workspace/platform_tools/__init__.py
Normal file
@ -0,0 +1,13 @@
|
||||
"""Platform tools — single source of truth for tool naming and docs.
|
||||
|
||||
The platform owns A2A and persistent-memory tooling (cross-cutting
|
||||
runtime concerns per project memory project_runtime_native_pluggable.md).
|
||||
Tools are defined ONCE in `registry.py`. Every adapter — MCP server,
|
||||
LangChain wrapper, any future SDK integration — consumes the specs to
|
||||
register the tool in its native format. Doc generators (system-prompt
|
||||
injection, canvas help, future doc sites) read from the same place.
|
||||
|
||||
Adding a tool: append a ToolSpec to TOOLS in registry.py. Every
|
||||
adapter picks it up automatically; structural tests fail if any side
|
||||
drifts from the registry.
|
||||
"""
|
||||
388
workspace/platform_tools/registry.py
Normal file
388
workspace/platform_tools/registry.py
Normal file
@ -0,0 +1,388 @@
|
||||
"""Canonical registry of platform tool specs.
|
||||
|
||||
Every tool the platform offers to agents (A2A delegation, persistent
|
||||
memory, broadcast, introspection) is defined ONCE in TOOLS below.
|
||||
Adapters consume these specs to register the tool in their native
|
||||
runtime format:
|
||||
|
||||
- a2a_mcp_server.py iterates `TOOLS` to build the MCP TOOLS list +
|
||||
dispatches calls to spec.impl. No tool name or description is
|
||||
hardcoded there.
|
||||
|
||||
- builtin_tools/{delegation,memory}.py define LangChain `@tool`
|
||||
wrappers using `name=` from the spec; the wrapper body just
|
||||
calls spec.impl.
|
||||
|
||||
- executor_helpers.get_a2a_instructions() / get_hma_instructions()
|
||||
GENERATE the system-prompt doc string from `TOOLS` — no
|
||||
hand-maintained instruction text.
|
||||
|
||||
Adding a new tool: append a ToolSpec to `TOOLS` below. Every adapter
|
||||
picks it up. Structural alignment tests (workspace/tests/test_platform_tools.py)
|
||||
fail if any side drifts from the registry.
|
||||
|
||||
Renaming a tool: change `name` here. Search workspace/ for the old
|
||||
literal in case any non-adapter consumer (tests, plugin code) hard-coded
|
||||
it; update those manually. The grep is the audit, the test is the gate.
|
||||
|
||||
Removing a tool: delete the entry. Adapters stop registering it
|
||||
automatically; doc generators stop mentioning it.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Awaitable, Callable
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Literal
|
||||
|
||||
from a2a_tools import (
|
||||
tool_check_task_status,
|
||||
tool_commit_memory,
|
||||
tool_delegate_task,
|
||||
tool_delegate_task_async,
|
||||
tool_get_workspace_info,
|
||||
tool_list_peers,
|
||||
tool_recall_memory,
|
||||
tool_send_message_to_user,
|
||||
)
|
||||
|
||||
# Section name maps to the heading in the agent-facing system prompt.
|
||||
# Adding a new section: add a constant + create a corresponding
|
||||
# generator in executor_helpers (or generalize get_*_instructions).
|
||||
A2A_SECTION = "a2a"
|
||||
MEMORY_SECTION = "memory"
|
||||
|
||||
Section = Literal["a2a", "memory"]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ToolSpec:
|
||||
"""Runtime-agnostic definition of one platform tool.
|
||||
|
||||
Each adapter (MCP, LangChain, future SDK) consumes the same spec.
|
||||
Doc generators consume the same spec. There is no other source
|
||||
of truth for tool naming or description.
|
||||
"""
|
||||
|
||||
name: str
|
||||
"""The exact name agents see. MUST match every adapter's
|
||||
registered name and the literal that appears in agent-facing
|
||||
instruction docs. Structural test enforces this."""
|
||||
|
||||
short: str
|
||||
"""One-line description. Used as the MCP `description` field
|
||||
AND as the bullet line in agent-facing instruction docs."""
|
||||
|
||||
when_to_use: str
|
||||
"""Two-to-three-sentence agent-facing usage guidance — when
|
||||
to call this tool, what it returns, what NOT to confuse it
|
||||
with. Concatenated into the system prompt below the tool list."""
|
||||
|
||||
input_schema: dict[str, Any]
|
||||
"""JSON Schema for the tool's input parameters. Consumed
|
||||
directly by the MCP server. LangChain derives its schema from
|
||||
Python type annotations on the @tool function — alignment is
|
||||
pinned by the structural test."""
|
||||
|
||||
impl: Callable[..., Awaitable[str]]
|
||||
"""The actual coroutine. Both adapters call this; only the
|
||||
wrapping differs."""
|
||||
|
||||
section: Section
|
||||
"""Which agent-prompt section this tool belongs to (controls
|
||||
which instruction generator emits it)."""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# A2A — inter-agent communication & broadcast
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_DELEGATE_TASK = ToolSpec(
|
||||
name="delegate_task",
|
||||
short=(
|
||||
"Delegate a task to a peer workspace via A2A and WAIT for the "
|
||||
"response (synchronous)."
|
||||
),
|
||||
when_to_use=(
|
||||
"Use for QUICK questions and small sub-tasks where you can "
|
||||
"afford to wait inline. Returns the peer's response text "
|
||||
"directly. For longer-running work (research, multi-minute "
|
||||
"jobs) use delegate_task_async + check_task_status instead "
|
||||
"so you don't hold this workspace busy waiting."
|
||||
),
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"workspace_id": {
|
||||
"type": "string",
|
||||
"description": "Target workspace ID (from list_peers).",
|
||||
},
|
||||
"task": {
|
||||
"type": "string",
|
||||
"description": "Task description to send to the peer.",
|
||||
},
|
||||
},
|
||||
"required": ["workspace_id", "task"],
|
||||
},
|
||||
impl=tool_delegate_task,
|
||||
section=A2A_SECTION,
|
||||
)
|
||||
|
||||
_DELEGATE_TASK_ASYNC = ToolSpec(
|
||||
name="delegate_task_async",
|
||||
short=(
|
||||
"Send a task to a peer and return immediately with a task_id "
|
||||
"(non-blocking)."
|
||||
),
|
||||
when_to_use=(
|
||||
"Use for long-running work where you want to keep doing other "
|
||||
"things while the peer processes. Poll with check_task_status "
|
||||
"to retrieve the result. The platform's A2A queue handles "
|
||||
"delivery + retries; the peer works independently."
|
||||
),
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"workspace_id": {
|
||||
"type": "string",
|
||||
"description": "Target workspace ID (from list_peers).",
|
||||
},
|
||||
"task": {
|
||||
"type": "string",
|
||||
"description": "Task description to send to the peer.",
|
||||
},
|
||||
},
|
||||
"required": ["workspace_id", "task"],
|
||||
},
|
||||
impl=tool_delegate_task_async,
|
||||
section=A2A_SECTION,
|
||||
)
|
||||
|
||||
_CHECK_TASK_STATUS = ToolSpec(
|
||||
name="check_task_status",
|
||||
short=(
|
||||
"Poll the status of a task started with delegate_task_async; "
|
||||
"returns result when done."
|
||||
),
|
||||
when_to_use=(
|
||||
"Statuses: pending/in_progress (peer still working — wait), "
|
||||
"queued (peer is busy with a prior task — DO NOT retry, the "
|
||||
"platform stitches the response when it finishes), completed "
|
||||
"(result available), failed (real error — fall back to a "
|
||||
"different peer or handle it yourself)."
|
||||
),
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"workspace_id": {
|
||||
"type": "string",
|
||||
"description": "Workspace ID the task was sent to.",
|
||||
},
|
||||
"task_id": {
|
||||
"type": "string",
|
||||
"description": "task_id returned by delegate_task_async.",
|
||||
},
|
||||
},
|
||||
"required": ["workspace_id", "task_id"],
|
||||
},
|
||||
impl=tool_check_task_status,
|
||||
section=A2A_SECTION,
|
||||
)
|
||||
|
||||
_LIST_PEERS = ToolSpec(
|
||||
name="list_peers",
|
||||
short=(
|
||||
"List the workspaces this agent can communicate with — name, "
|
||||
"ID, status, role for each."
|
||||
),
|
||||
when_to_use=(
|
||||
"Call this first when you need to delegate but don't know the "
|
||||
"target's ID. Access control is enforced — you only see "
|
||||
"siblings, parent, and direct children."
|
||||
),
|
||||
input_schema={"type": "object", "properties": {}},
|
||||
impl=tool_list_peers,
|
||||
section=A2A_SECTION,
|
||||
)
|
||||
|
||||
_GET_WORKSPACE_INFO = ToolSpec(
|
||||
name="get_workspace_info",
|
||||
short="Get this workspace's own info — ID, name, role, tier, parent, status.",
|
||||
when_to_use=(
|
||||
"Use to introspect your own identity (e.g. before reporting "
|
||||
"back to the user, or to determine whether you're a tier-0 "
|
||||
"root that can write GLOBAL memory)."
|
||||
),
|
||||
input_schema={"type": "object", "properties": {}},
|
||||
impl=tool_get_workspace_info,
|
||||
section=A2A_SECTION,
|
||||
)
|
||||
|
||||
_SEND_MESSAGE_TO_USER = ToolSpec(
|
||||
name="send_message_to_user",
|
||||
short=(
|
||||
"Send a message directly to the user's canvas chat — pushed instantly "
|
||||
"via WebSocket. Use this to: (1) acknowledge a task immediately ('Got "
|
||||
"it, I'll start working on this'), (2) send interim progress updates "
|
||||
"while doing long work, (3) deliver follow-up results after delegation "
|
||||
"completes, (4) attach files (zip, pdf, csv, image) for the user to "
|
||||
"download via the `attachments` field (NEVER paste file URLs in "
|
||||
"`message`). The message appears in the user's chat as if you're "
|
||||
"proactively reaching out."
|
||||
),
|
||||
when_to_use=(
|
||||
"Use proactively across the lifecycle of a task — early to "
|
||||
"acknowledge, mid-flight to update, late to deliver. Never paste "
|
||||
"file URLs in the message body — always pass absolute paths in "
|
||||
"`attachments` so the platform serves them as download chips "
|
||||
"(works on SaaS where external file hosts are unreachable)."
|
||||
),
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"message": {
|
||||
"type": "string",
|
||||
# The "no URLs in message text" rule is the single biggest
|
||||
# cause of bad chat UX: agents drop catbox.moe / file://
|
||||
# / temporary upload-host links into the prose, the
|
||||
# canvas renders them as plain markdown links the user
|
||||
# can't preview, and SaaS deployments often can't even
|
||||
# reach those external hosts. Every download MUST go
|
||||
# through the structured `attachments` field below.
|
||||
"description": (
|
||||
"Caption text for the chat bubble. Required even when sending "
|
||||
"attachments — set to a short label like 'Here's the build:' "
|
||||
"or 'Done — see attached.'\n\n"
|
||||
"DO NOT paste file URLs, download links, or container paths in "
|
||||
"this string. Files MUST go through the `attachments` field, "
|
||||
"which renders as a clickable download chip and works on SaaS "
|
||||
"deployments where external file-host URLs (catbox.moe, file://, "
|
||||
"etc.) are unreachable from the user's browser."
|
||||
),
|
||||
},
|
||||
"attachments": {
|
||||
"type": "array",
|
||||
"description": (
|
||||
"REQUIRED for any file delivery. Pass absolute file paths inside "
|
||||
"THIS container (e.g. ['/tmp/build.zip', '/workspace/report.pdf']) "
|
||||
"— the platform uploads each file and returns a download chip "
|
||||
"with the file's icon + name + size in the user's chat. The chip "
|
||||
"works in SaaS deployments because the URL is platform-served, "
|
||||
"not an external host.\n\n"
|
||||
"USE THIS instead of: pasting URLs in `message`, base64-encoding "
|
||||
"in the body, or telling the user to look at a path on disk. "
|
||||
"If the file isn't already on disk, write it first (Bash, Write "
|
||||
"tool, etc.) then pass its path here. 25 MB per file cap."
|
||||
),
|
||||
"items": {"type": "string"},
|
||||
},
|
||||
},
|
||||
"required": ["message"],
|
||||
},
|
||||
impl=tool_send_message_to_user,
|
||||
section=A2A_SECTION,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# HMA — hierarchical persistent memory
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_COMMIT_MEMORY = ToolSpec(
|
||||
name="commit_memory",
|
||||
short="Save a fact to persistent memory; survives across sessions and restarts.",
|
||||
when_to_use=(
|
||||
"Scopes: LOCAL (private to you, default), TEAM (shared with "
|
||||
"parent + siblings), GLOBAL (entire org — only tier-0 root "
|
||||
"workspaces can write). Commit decisions, learned facts, and "
|
||||
"completed-task summaries so future sessions and teammates "
|
||||
"can recall them."
|
||||
),
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {
|
||||
"type": "string",
|
||||
"description": "What to remember — be specific.",
|
||||
},
|
||||
"scope": {
|
||||
"type": "string",
|
||||
"enum": ["LOCAL", "TEAM", "GLOBAL"],
|
||||
"description": "Memory scope (default LOCAL).",
|
||||
},
|
||||
},
|
||||
"required": ["content"],
|
||||
},
|
||||
impl=tool_commit_memory,
|
||||
section=MEMORY_SECTION,
|
||||
)
|
||||
|
||||
_RECALL_MEMORY = ToolSpec(
|
||||
name="recall_memory",
|
||||
short="Search persistent memory; returns matching LOCAL + TEAM + GLOBAL rows.",
|
||||
when_to_use=(
|
||||
"Call at the start of new work and when picking up something "
|
||||
"you may have done before. Empty query returns ALL accessible "
|
||||
"memories — cheap and avoids missing rows that don't match a "
|
||||
"narrow keyword. Memory is automatically recalled at session "
|
||||
"start; use this to refresh mid-session."
|
||||
),
|
||||
input_schema={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "Search query (empty returns all).",
|
||||
},
|
||||
"scope": {
|
||||
"type": "string",
|
||||
"enum": ["LOCAL", "TEAM", "GLOBAL", ""],
|
||||
"description": "Filter by scope (empty = all accessible).",
|
||||
},
|
||||
},
|
||||
},
|
||||
impl=tool_recall_memory,
|
||||
section=MEMORY_SECTION,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public registry. Keep alphabetically grouped by section for stable
|
||||
# adapter listings + diff-friendly review.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
TOOLS: list[ToolSpec] = [
|
||||
# A2A
|
||||
_DELEGATE_TASK,
|
||||
_DELEGATE_TASK_ASYNC,
|
||||
_CHECK_TASK_STATUS,
|
||||
_LIST_PEERS,
|
||||
_GET_WORKSPACE_INFO,
|
||||
_SEND_MESSAGE_TO_USER,
|
||||
# HMA
|
||||
_COMMIT_MEMORY,
|
||||
_RECALL_MEMORY,
|
||||
]
|
||||
|
||||
|
||||
def a2a_tools() -> list[ToolSpec]:
|
||||
"""All A2A-section tools, in registration order."""
|
||||
return [t for t in TOOLS if t.section == A2A_SECTION]
|
||||
|
||||
|
||||
def memory_tools() -> list[ToolSpec]:
|
||||
"""All memory-section tools, in registration order."""
|
||||
return [t for t in TOOLS if t.section == MEMORY_SECTION]
|
||||
|
||||
|
||||
def by_name(name: str) -> ToolSpec:
|
||||
"""Look up a spec by its canonical name. Raises KeyError if absent."""
|
||||
for t in TOOLS:
|
||||
if t.name == name:
|
||||
return t
|
||||
raise KeyError(f"no platform tool named {name!r}")
|
||||
|
||||
|
||||
def tool_names() -> list[str]:
|
||||
"""Canonical names in registration order."""
|
||||
return [t.name for t in TOOLS]
|
||||
@ -64,7 +64,7 @@ def build_team_routing_payload(
|
||||
"action": "choose_member",
|
||||
"message": (
|
||||
f"You have {len(members)} team members. "
|
||||
"Choose the best one for this task and call delegate_to_workspace with their ID."
|
||||
"Choose the best one for this task and call delegate_task_async with their ID."
|
||||
),
|
||||
"task": task,
|
||||
"members": members,
|
||||
|
||||
@ -4,6 +4,7 @@ import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from executor_helpers import get_a2a_instructions, get_hma_instructions
|
||||
from skill_loader.loader import LoadedSkill
|
||||
from shared_runtime import build_peer_section
|
||||
|
||||
@ -68,6 +69,7 @@ def build_system_prompt(
|
||||
plugin_prompts: list[str] | None = None,
|
||||
parent_context: list[dict] | None = None,
|
||||
platform_instructions: str = "",
|
||||
a2a_mcp: bool = True,
|
||||
) -> str:
|
||||
"""Build the complete system prompt.
|
||||
|
||||
@ -154,6 +156,20 @@ def build_system_prompt(
|
||||
parts.append(skill.instructions)
|
||||
parts.append("")
|
||||
|
||||
# Platform tool instructions: A2A (inter-agent communication) and HMA
|
||||
# (persistent memory). These document how to call delegate_task,
|
||||
# commit_memory, etc — without them, agents see the tools registered
|
||||
# but have no instructions on when/how to use them. Placed between
|
||||
# Skills and Peers so the A2A docs precede the peer list (which is
|
||||
# the data shape the A2A tools operate over).
|
||||
#
|
||||
# a2a_mcp=True: MCP tool variant (claude-code, hermes, langchain,
|
||||
# crewai). a2a_mcp=False: CLI subprocess variant (ollama, custom
|
||||
# runtimes that don't speak MCP). Default True matches the
|
||||
# MCP-capable majority; CLI-only adapters override at the call site.
|
||||
parts.append(get_a2a_instructions(mcp=a2a_mcp))
|
||||
parts.append(get_hma_instructions())
|
||||
|
||||
# Add peer capabilities with a single shared renderer.
|
||||
peer_section = build_peer_section(peers)
|
||||
if peer_section:
|
||||
|
||||
@ -9,10 +9,10 @@
|
||||
a2a-sdk[http-server]>=1.0.0,<2.0
|
||||
|
||||
# HTTP / server
|
||||
httpx>=0.27.0
|
||||
uvicorn>=0.30.0
|
||||
httpx>=0.28.1
|
||||
uvicorn>=0.46.0
|
||||
starlette>=0.38.0
|
||||
websockets>=12.0
|
||||
websockets>=16.0
|
||||
|
||||
# Config parsing
|
||||
pyyaml>=6.0
|
||||
@ -24,7 +24,7 @@ langchain-core>=0.3.0
|
||||
# tools/telemetry.py gracefully degrades (noop) when these are absent,
|
||||
# but they are required for actual trace export.
|
||||
opentelemetry-api>=1.24.0
|
||||
opentelemetry-sdk>=1.24.0
|
||||
opentelemetry-sdk>=1.41.1
|
||||
# OTLP/HTTP exporter: sends spans to any OTEL collector and to Langfuse ≥4
|
||||
opentelemetry-exporter-otlp-proto-http>=1.24.0
|
||||
|
||||
@ -36,4 +36,4 @@ sqlalchemy>=2.0.0
|
||||
# tasks survive crashes and can resume. The module and TemporalWorkflowWrapper
|
||||
# load cleanly without this package — all paths fall back to direct execution.
|
||||
# Requires a running Temporal server; set TEMPORAL_HOST=<host>:7233 to enable.
|
||||
temporalio>=1.7.0
|
||||
temporalio>=1.26.0
|
||||
|
||||
@ -140,7 +140,7 @@ def build_peer_section(
|
||||
*,
|
||||
heading: str = "## Your Peers (workspaces you can delegate to)",
|
||||
instruction: str = (
|
||||
"Use the `delegate_to_workspace` tool to send tasks to peers. "
|
||||
"Use the `delegate_task_async` tool to send tasks to peers. "
|
||||
"Only delegate to peers listed above."
|
||||
),
|
||||
) -> str:
|
||||
|
||||
@ -113,10 +113,12 @@ def _make_tools_mocks():
|
||||
tools_mod.__path__ = [] # Make it a proper package
|
||||
|
||||
tools_delegation_mod = ModuleType("builtin_tools.delegation")
|
||||
tools_delegation_mod.delegate_to_workspace = MagicMock()
|
||||
tools_delegation_mod.delegate_to_workspace.name = "delegate_to_workspace"
|
||||
tools_delegation_mod.check_delegation_status = MagicMock()
|
||||
tools_delegation_mod.check_delegation_status.name = "check_delegation_status"
|
||||
tools_delegation_mod.delegate_task = MagicMock()
|
||||
tools_delegation_mod.delegate_task.name = "delegate_task"
|
||||
tools_delegation_mod.delegate_task_async = MagicMock()
|
||||
tools_delegation_mod.delegate_task_async.name = "delegate_task_async"
|
||||
tools_delegation_mod.check_task_status = MagicMock()
|
||||
tools_delegation_mod.check_task_status.name = "check_task_status"
|
||||
|
||||
tools_approval_mod = ModuleType("builtin_tools.approval")
|
||||
tools_approval_mod.request_approval = MagicMock()
|
||||
@ -125,8 +127,8 @@ def _make_tools_mocks():
|
||||
tools_memory_mod = ModuleType("builtin_tools.memory")
|
||||
tools_memory_mod.commit_memory = MagicMock()
|
||||
tools_memory_mod.commit_memory.name = "commit_memory"
|
||||
tools_memory_mod.search_memory = MagicMock()
|
||||
tools_memory_mod.search_memory.name = "search_memory"
|
||||
tools_memory_mod.recall_memory = MagicMock()
|
||||
tools_memory_mod.recall_memory.name = "recall_memory"
|
||||
|
||||
tools_sandbox_mod = ModuleType("builtin_tools.sandbox")
|
||||
tools_sandbox_mod.run_code = MagicMock()
|
||||
|
||||
@ -28,7 +28,7 @@ async def test_route_task_to_team_delegates_preferred_member(monkeypatch):
|
||||
|
||||
delegate = MagicMock()
|
||||
delegate.ainvoke = AsyncMock(return_value={"ok": True})
|
||||
monkeypatch.setattr(sys.modules["builtin_tools.delegation"], "delegate_to_workspace", delegate)
|
||||
monkeypatch.setattr(sys.modules["builtin_tools.delegation"], "delegate_task_async", delegate)
|
||||
|
||||
result = await coordinator.route_task_to_team(
|
||||
"Do the thing",
|
||||
@ -58,4 +58,4 @@ def test_build_children_description_reuses_shared_renderer():
|
||||
assert "## Your Team (sub-workspaces you coordinate)" in description
|
||||
assert "**Alpha** (id: `child-1`, status: online)" in description
|
||||
assert "Skills: research" in description
|
||||
assert "delegate_to_workspace" in description
|
||||
assert "delegate_task_async" in description
|
||||
|
||||
@ -4,7 +4,7 @@ The delegation tool now returns immediately with a task_id and runs the
|
||||
A2A request in the background. Tests verify:
|
||||
1. Immediate return with task_id
|
||||
2. Background task completion
|
||||
3. check_delegation_status retrieval
|
||||
3. check_task_status retrieval
|
||||
4. Error handling (RBAC, discovery, network)
|
||||
"""
|
||||
|
||||
@ -109,22 +109,22 @@ def delegation_mocks(monkeypatch):
|
||||
|
||||
|
||||
async def _invoke(mod, workspace_id="target", task="do stuff"):
|
||||
"""Call delegate_to_workspace and return the immediate result."""
|
||||
fn = mod.delegate_to_workspace
|
||||
"""Call delegate_task_async and return the immediate result."""
|
||||
fn = mod.delegate_task_async
|
||||
if hasattr(fn, "ainvoke"):
|
||||
return await fn.ainvoke({"workspace_id": workspace_id, "task": task})
|
||||
return await fn(workspace_id=workspace_id, task=task)
|
||||
|
||||
|
||||
async def _invoke_and_wait(mod, workspace_id="target", task="do stuff"):
|
||||
"""Call delegate_to_workspace, wait for background task, return status."""
|
||||
"""Call delegate_task_async, wait for background task, return status."""
|
||||
result = await _invoke(mod, workspace_id, task)
|
||||
# Wait for all background tasks to complete
|
||||
if mod._background_tasks:
|
||||
await asyncio.gather(*mod._background_tasks, return_exceptions=True)
|
||||
# Get final status
|
||||
if "task_id" in result:
|
||||
fn = mod.check_delegation_status
|
||||
fn = mod.check_task_status
|
||||
if hasattr(fn, "ainvoke"):
|
||||
return await fn.ainvoke({"task_id": result["task_id"]})
|
||||
return await fn(task_id=result["task_id"])
|
||||
@ -182,7 +182,7 @@ class TestAsyncDelegation:
|
||||
await _invoke(mod, workspace_id="ws-a", task="task A")
|
||||
await _invoke(mod, workspace_id="ws-b", task="task B")
|
||||
|
||||
fn = mod.check_delegation_status
|
||||
fn = mod.check_task_status
|
||||
if hasattr(fn, "ainvoke"):
|
||||
result = await fn.ainvoke({"task_id": ""})
|
||||
else:
|
||||
@ -194,7 +194,7 @@ class TestAsyncDelegation:
|
||||
async def test_check_delegation_not_found(self, delegation_mocks):
|
||||
mod, *_ = delegation_mocks
|
||||
|
||||
fn = mod.check_delegation_status
|
||||
fn = mod.check_task_status
|
||||
if hasattr(fn, "ainvoke"):
|
||||
result = await fn.ainvoke({"task_id": "nonexistent"})
|
||||
else:
|
||||
@ -354,7 +354,7 @@ class TestA2AQueued:
|
||||
|
||||
|
||||
class TestQueuedLazyRefresh:
|
||||
"""When a delegation is QUEUED, check_delegation_status must lazily
|
||||
"""When a delegation is QUEUED, check_task_status must lazily
|
||||
refresh from the platform's GET /delegations to pick up drain-stitch
|
||||
completions. Without this refresh, the LLM sees "queued" forever
|
||||
because the platform never pushes back to the runtime.
|
||||
@ -401,7 +401,7 @@ class TestQueuedLazyRefresh:
|
||||
refresh_cls.return_value.__aexit__ = AsyncMock(return_value=False)
|
||||
|
||||
with patch("httpx.AsyncClient", refresh_cls):
|
||||
fn = mod.check_delegation_status
|
||||
fn = mod.check_task_status
|
||||
if hasattr(fn, "ainvoke"):
|
||||
refreshed = await fn.ainvoke({"task_id": task_id})
|
||||
else:
|
||||
@ -443,7 +443,7 @@ class TestQueuedLazyRefresh:
|
||||
refresh_cls.return_value.__aexit__ = AsyncMock(return_value=False)
|
||||
|
||||
with patch("httpx.AsyncClient", refresh_cls):
|
||||
fn = mod.check_delegation_status
|
||||
fn = mod.check_task_status
|
||||
if hasattr(fn, "ainvoke"):
|
||||
refreshed = await fn.ainvoke({"task_id": task_id})
|
||||
else:
|
||||
@ -486,7 +486,7 @@ class TestQueuedLazyRefresh:
|
||||
refresh_cls.return_value.__aexit__ = AsyncMock(return_value=False)
|
||||
|
||||
with patch("httpx.AsyncClient", refresh_cls):
|
||||
fn = mod.check_delegation_status
|
||||
fn = mod.check_task_status
|
||||
if hasattr(fn, "ainvoke"):
|
||||
refreshed = await fn.ainvoke({"task_id": task_id})
|
||||
else:
|
||||
@ -515,7 +515,7 @@ class TestQueuedLazyRefresh:
|
||||
refresh_cls.return_value.__aexit__ = AsyncMock(return_value=False)
|
||||
|
||||
with patch("httpx.AsyncClient", refresh_cls):
|
||||
fn = mod.check_delegation_status
|
||||
fn = mod.check_task_status
|
||||
if hasattr(fn, "ainvoke"):
|
||||
refreshed = await fn.ainvoke({"task_id": task_id})
|
||||
else:
|
||||
|
||||
@ -438,9 +438,12 @@ def test_get_system_prompt_handles_non_utf8(tmp_path):
|
||||
|
||||
def test_get_a2a_instructions_mcp_default():
|
||||
out = get_a2a_instructions()
|
||||
assert "MCP tools" in out
|
||||
# Section heading is the canonical agent-facing label.
|
||||
assert "## Inter-Agent Communication" in out
|
||||
# Every A2A tool from the registry must appear by name.
|
||||
assert "list_peers" in out
|
||||
assert "send_message_to_user" in out
|
||||
assert "delegate_task" in out
|
||||
|
||||
|
||||
def test_get_a2a_instructions_cli_variant():
|
||||
@ -468,32 +471,27 @@ def test_a2a_cli_instructions_use_module_invocation_not_legacy_app_path():
|
||||
|
||||
|
||||
def test_a2a_mcp_instructions_reference_existing_tools():
|
||||
"""The MCP instructions text must only reference tools that are actually
|
||||
registered in a2a_mcp_server.py. If someone renames a server tool, the
|
||||
prompt text must be updated in lockstep — this test catches the drift.
|
||||
"""Pin the registry-driven alignment: every tool name appearing in the
|
||||
agent-facing A2A instructions must be a tool the MCP server actually
|
||||
registers. Both sides now derive from platform_tools.registry, so the
|
||||
real test is that the registry's a2a_tools() set drives both surfaces
|
||||
consistently.
|
||||
"""
|
||||
import re
|
||||
import pathlib
|
||||
mcp_server = pathlib.Path(__file__).parent.parent / "a2a_mcp_server.py"
|
||||
registered = set(re.findall(r'"name":\s*"([a-z_]+)"', mcp_server.read_text()))
|
||||
# The server advertises itself by name; strip that false positive.
|
||||
registered.discard("a2a-delegation")
|
||||
from a2a_mcp_server import TOOLS as MCP_TOOLS
|
||||
from platform_tools.registry import a2a_tools
|
||||
|
||||
registered = {t["name"] for t in MCP_TOOLS}
|
||||
instructions = get_a2a_instructions(mcp=True)
|
||||
|
||||
# Every tool called out by name in the instructions must exist on the
|
||||
# server. (We allow the server to have extras the prompt doesn't mention.)
|
||||
referenced = {
|
||||
"list_peers",
|
||||
"delegate_task",
|
||||
"delegate_task_async",
|
||||
"check_task_status",
|
||||
"get_workspace_info",
|
||||
"send_message_to_user",
|
||||
}
|
||||
for name in referenced:
|
||||
assert name in instructions, f"prompt missing {name}"
|
||||
assert name in registered, f"MCP server no longer registers {name}"
|
||||
for spec in a2a_tools():
|
||||
assert spec.name in instructions, (
|
||||
f"A2A instructions are missing the tool {spec.name!r} that "
|
||||
f"the registry declares — the doc generator drifted."
|
||||
)
|
||||
assert spec.name in registered, (
|
||||
f"MCP server no longer registers {spec.name!r} that the registry "
|
||||
f"declares — the MCP TOOLS list drifted from the registry."
|
||||
)
|
||||
|
||||
|
||||
# ======================================================================
|
||||
|
||||
@ -98,7 +98,7 @@ def test_commit_memory_uses_awareness_client_when_configured(monkeypatch, memory
|
||||
assert captured["json"] == {"content": "remember this", "scope": "TEAM"}
|
||||
|
||||
|
||||
def test_search_memory_uses_platform_fallback_without_awareness(monkeypatch, memory_modules):
|
||||
def test_recall_memory_uses_platform_fallback_without_awareness(monkeypatch, memory_modules):
|
||||
memory, _awareness_client = memory_modules
|
||||
captured = {}
|
||||
|
||||
@ -119,7 +119,7 @@ def test_search_memory_uses_platform_fallback_without_awareness(monkeypatch, mem
|
||||
|
||||
monkeypatch.setattr(memory.httpx, "AsyncClient", FakeAsyncClient)
|
||||
|
||||
result = asyncio.run(memory.search_memory("status", "local"))
|
||||
result = asyncio.run(memory.recall_memory("status", "local"))
|
||||
|
||||
assert result == {
|
||||
"success": True,
|
||||
@ -236,10 +236,10 @@ def test_commit_memory_promoted_packet_logs_skill_promotion(monkeypatch, tmp_pat
|
||||
assert not (tmp_path / "skills").exists()
|
||||
|
||||
|
||||
def test_search_memory_rejects_invalid_scope(memory_modules):
|
||||
def test_recall_memory_rejects_invalid_scope(memory_modules):
|
||||
memory, _awareness_client = memory_modules
|
||||
|
||||
result = asyncio.run(memory.search_memory("status", "bad"))
|
||||
result = asyncio.run(memory.recall_memory("status", "bad"))
|
||||
|
||||
assert result == {"error": "scope must be LOCAL, TEAM, GLOBAL, or empty"}
|
||||
|
||||
@ -457,15 +457,15 @@ def test_commit_memory_result_failure(memory_modules_with_mocks):
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# search_memory — RBAC deny
|
||||
# recall_memory — RBAC deny
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_search_memory_rbac_deny(memory_modules_with_mocks):
|
||||
def test_recall_memory_rbac_deny(memory_modules_with_mocks):
|
||||
memory, mock_audit, _ = memory_modules_with_mocks
|
||||
mock_audit.check_permission.return_value = False
|
||||
mock_audit.get_workspace_roles.return_value = (["read-only-special"], {})
|
||||
|
||||
result = asyncio.run(memory.search_memory("find something", "local"))
|
||||
result = asyncio.run(memory.recall_memory("find something", "local"))
|
||||
|
||||
assert result["success"] is False
|
||||
assert "RBAC" in result["error"]
|
||||
@ -473,22 +473,22 @@ def test_search_memory_rbac_deny(memory_modules_with_mocks):
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# search_memory — invalid scope
|
||||
# recall_memory — invalid scope
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_search_memory_invalid_scope(memory_modules_with_mocks):
|
||||
def test_recall_memory_invalid_scope(memory_modules_with_mocks):
|
||||
memory, _mock_audit, _ = memory_modules_with_mocks
|
||||
|
||||
result = asyncio.run(memory.search_memory("q", "BAD"))
|
||||
result = asyncio.run(memory.recall_memory("q", "BAD"))
|
||||
|
||||
assert result == {"error": "scope must be LOCAL, TEAM, GLOBAL, or empty"}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# search_memory — awareness_client success
|
||||
# recall_memory — awareness_client success
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_search_memory_awareness_client_success(memory_modules_with_mocks):
|
||||
def test_recall_memory_awareness_client_success(memory_modules_with_mocks):
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
memory, mock_audit, mock_awareness_mod = memory_modules_with_mocks
|
||||
|
||||
@ -501,7 +501,7 @@ def test_search_memory_awareness_client_success(memory_modules_with_mocks):
|
||||
# Patch directly on the loaded module since it imported the name at load time
|
||||
memory.build_awareness_client = MagicMock(return_value=mock_ac)
|
||||
|
||||
result = asyncio.run(memory.search_memory("find", "team"))
|
||||
result = asyncio.run(memory.recall_memory("find", "team"))
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["count"] == 2
|
||||
@ -509,10 +509,10 @@ def test_search_memory_awareness_client_success(memory_modules_with_mocks):
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# search_memory — awareness_client raises
|
||||
# recall_memory — awareness_client raises
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_search_memory_awareness_client_exception(memory_modules_with_mocks):
|
||||
def test_recall_memory_awareness_client_exception(memory_modules_with_mocks):
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
memory, mock_audit, mock_awareness_mod = memory_modules_with_mocks
|
||||
|
||||
@ -521,7 +521,7 @@ def test_search_memory_awareness_client_exception(memory_modules_with_mocks):
|
||||
# Patch directly on the loaded module since it imported the name at load time
|
||||
memory.build_awareness_client = MagicMock(return_value=mock_ac)
|
||||
|
||||
result = asyncio.run(memory.search_memory("query", "local"))
|
||||
result = asyncio.run(memory.recall_memory("query", "local"))
|
||||
|
||||
assert result["success"] is False
|
||||
assert "awareness search failed" in result["error"]
|
||||
@ -530,10 +530,10 @@ def test_search_memory_awareness_client_exception(memory_modules_with_mocks):
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# search_memory — httpx 200 success (no awareness_client)
|
||||
# recall_memory — httpx 200 success (no awareness_client)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_search_memory_httpx_200_success(memory_modules_with_mocks):
|
||||
def test_recall_memory_httpx_200_success(memory_modules_with_mocks):
|
||||
memory, _mock_audit, _ = memory_modules_with_mocks
|
||||
|
||||
class FakeAsyncClient:
|
||||
@ -545,7 +545,7 @@ def test_search_memory_httpx_200_success(memory_modules_with_mocks):
|
||||
|
||||
memory.httpx.AsyncClient = FakeAsyncClient
|
||||
|
||||
result = asyncio.run(memory.search_memory("find", "global"))
|
||||
result = asyncio.run(memory.recall_memory("find", "global"))
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["count"] == 2
|
||||
@ -553,10 +553,10 @@ def test_search_memory_httpx_200_success(memory_modules_with_mocks):
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# search_memory — httpx non-200
|
||||
# recall_memory — httpx non-200
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_search_memory_httpx_non_200(memory_modules_with_mocks):
|
||||
def test_recall_memory_httpx_non_200(memory_modules_with_mocks):
|
||||
memory, mock_audit, _ = memory_modules_with_mocks
|
||||
|
||||
class FakeAsyncClient:
|
||||
@ -568,17 +568,17 @@ def test_search_memory_httpx_non_200(memory_modules_with_mocks):
|
||||
|
||||
memory.httpx.AsyncClient = FakeAsyncClient
|
||||
|
||||
result = asyncio.run(memory.search_memory("q", ""))
|
||||
result = asyncio.run(memory.recall_memory("q", ""))
|
||||
|
||||
assert result["success"] is False
|
||||
assert "server error" in result["error"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# search_memory — httpx raises
|
||||
# recall_memory — httpx raises
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_search_memory_httpx_exception(memory_modules_with_mocks):
|
||||
def test_recall_memory_httpx_exception(memory_modules_with_mocks):
|
||||
memory, mock_audit, _ = memory_modules_with_mocks
|
||||
|
||||
class FakeAsyncClient:
|
||||
@ -590,7 +590,7 @@ def test_search_memory_httpx_exception(memory_modules_with_mocks):
|
||||
|
||||
memory.httpx.AsyncClient = FakeAsyncClient
|
||||
|
||||
result = asyncio.run(memory.search_memory("query", "local"))
|
||||
result = asyncio.run(memory.recall_memory("query", "local"))
|
||||
|
||||
assert result["success"] is False
|
||||
assert "request timed out" in result["error"]
|
||||
@ -672,7 +672,7 @@ def test_commit_memory_awareness_exception_span_record_fails(memory_modules_with
|
||||
assert result["success"] is False # error propagated despite span failure
|
||||
|
||||
|
||||
def test_search_memory_awareness_exception_span_record_fails(memory_modules_with_mocks):
|
||||
def test_recall_memory_awareness_exception_span_record_fails(memory_modules_with_mocks):
|
||||
"""awareness_client.search raises + span.record_exception also raises: error still returned."""
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
memory, mock_audit, mock_awareness_mod = memory_modules_with_mocks
|
||||
@ -685,7 +685,7 @@ def test_search_memory_awareness_exception_span_record_fails(memory_modules_with
|
||||
mock_ac.search = AsyncMock(side_effect=RuntimeError("awareness down"))
|
||||
memory.build_awareness_client = MagicMock(return_value=mock_ac)
|
||||
|
||||
result = asyncio.run(memory.search_memory("test", "local"))
|
||||
result = asyncio.run(memory.recall_memory("test", "local"))
|
||||
assert result["success"] is False
|
||||
|
||||
|
||||
@ -711,8 +711,8 @@ def test_commit_memory_httpx_exception_span_record_fails(memory_modules_with_moc
|
||||
assert result["success"] is False
|
||||
|
||||
|
||||
def test_search_memory_httpx_exception_span_record_fails(memory_modules_with_mocks):
|
||||
"""httpx raises in search_memory + span.record_exception also raises: error still returned."""
|
||||
def test_recall_memory_httpx_exception_span_record_fails(memory_modules_with_mocks):
|
||||
"""httpx raises in recall_memory + span.record_exception also raises: error still returned."""
|
||||
from unittest.mock import MagicMock
|
||||
memory, mock_audit, mock_awareness_mod = memory_modules_with_mocks
|
||||
|
||||
@ -729,7 +729,7 @@ def test_search_memory_httpx_exception_span_record_fails(memory_modules_with_moc
|
||||
|
||||
memory.httpx.AsyncClient = FakeAsyncClient
|
||||
|
||||
result = asyncio.run(memory.search_memory("query", "local"))
|
||||
result = asyncio.run(memory.recall_memory("query", "local"))
|
||||
assert result["success"] is False
|
||||
|
||||
|
||||
|
||||
123
workspace/tests/test_platform_tools.py
Normal file
123
workspace/tests/test_platform_tools.py
Normal file
@ -0,0 +1,123 @@
|
||||
"""Structural alignment tests — every adapter must agree with the registry.
|
||||
|
||||
The registry in workspace/platform_tools/registry.py is the single source
|
||||
of truth for tool naming + docs. These tests fail if any consumer
|
||||
(MCP server, LangChain @tool wrappers, doc generators) drifts.
|
||||
|
||||
If you add a tool: append a ToolSpec to registry.TOOLS, then add the
|
||||
matching @tool wrapper in builtin_tools/. These tests catch the case
|
||||
where the registry has a name that has no LangChain @tool counterpart
|
||||
(or vice versa).
|
||||
|
||||
If you rename a tool: edit registry.TOOLS only. These tests fail loudly
|
||||
if the LangChain @tool name or MCP TOOLS["name"] still has the old name.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from platform_tools.registry import TOOLS, a2a_tools, by_name, memory_tools, tool_names
|
||||
|
||||
|
||||
def test_registry_names_are_unique():
|
||||
"""Every ToolSpec must have a distinct name — duplicate is a typo."""
|
||||
names = tool_names()
|
||||
assert len(names) == len(set(names)), f"duplicate tool names: {names}"
|
||||
|
||||
|
||||
def test_registry_a2a_and_memory_partition_is_complete():
|
||||
"""Every tool belongs to exactly one section. No orphans."""
|
||||
a2a = {t.name for t in a2a_tools()}
|
||||
mem = {t.name for t in memory_tools()}
|
||||
all_names = set(tool_names())
|
||||
assert a2a | mem == all_names
|
||||
assert not (a2a & mem), f"tool in both sections: {a2a & mem}"
|
||||
|
||||
|
||||
def test_by_name_lookup_works():
|
||||
spec = by_name("delegate_task")
|
||||
assert spec.name == "delegate_task"
|
||||
assert spec.section == "a2a"
|
||||
with pytest.raises(KeyError):
|
||||
by_name("nonexistent_tool")
|
||||
|
||||
|
||||
def test_mcp_server_registers_every_registry_tool():
|
||||
"""The MCP server's TOOLS list is built from the registry. Every
|
||||
spec must produce a corresponding entry — if not, the import-time
|
||||
list comprehension is broken or the registry has an entry the
|
||||
server isn't picking up.
|
||||
"""
|
||||
from a2a_mcp_server import TOOLS as MCP_TOOLS
|
||||
|
||||
mcp_names = {t["name"] for t in MCP_TOOLS}
|
||||
registry_names = set(tool_names())
|
||||
assert mcp_names == registry_names, (
|
||||
f"MCP and registry diverged. MCP-only: {mcp_names - registry_names}; "
|
||||
f"registry-only: {registry_names - mcp_names}"
|
||||
)
|
||||
|
||||
|
||||
def test_mcp_tool_descriptions_match_registry_short():
|
||||
"""Each MCP tool's description IS the registry's `short` field —
|
||||
the bullet-line description shown to the model. The deeper
|
||||
when_to_use guidance lives only in the system prompt.
|
||||
"""
|
||||
from a2a_mcp_server import TOOLS as MCP_TOOLS
|
||||
|
||||
by_mcp_name = {t["name"]: t for t in MCP_TOOLS}
|
||||
for spec in TOOLS:
|
||||
assert by_mcp_name[spec.name]["description"] == spec.short, (
|
||||
f"MCP description for {spec.name!r} drifted from registry.short. "
|
||||
f"Edit registry.py, not the MCP server's TOOLS list."
|
||||
)
|
||||
|
||||
|
||||
def test_mcp_tool_input_schemas_match_registry():
|
||||
"""Schemas must come from the registry, never duplicated in the server."""
|
||||
from a2a_mcp_server import TOOLS as MCP_TOOLS
|
||||
|
||||
by_mcp_name = {t["name"]: t for t in MCP_TOOLS}
|
||||
for spec in TOOLS:
|
||||
assert by_mcp_name[spec.name]["inputSchema"] == spec.input_schema, (
|
||||
f"MCP inputSchema for {spec.name!r} drifted from registry."
|
||||
)
|
||||
|
||||
|
||||
def test_a2a_instructions_text_includes_every_a2a_tool():
|
||||
"""get_a2a_instructions must mention every a2a-section tool by name."""
|
||||
from executor_helpers import get_a2a_instructions
|
||||
|
||||
instructions = get_a2a_instructions(mcp=True)
|
||||
for spec in a2a_tools():
|
||||
assert spec.name in instructions, (
|
||||
f"agent-facing A2A docs missing tool {spec.name!r} from registry"
|
||||
)
|
||||
|
||||
|
||||
def test_hma_instructions_text_includes_every_memory_tool():
|
||||
"""get_hma_instructions must mention every memory-section tool by name."""
|
||||
from executor_helpers import get_hma_instructions
|
||||
|
||||
instructions = get_hma_instructions()
|
||||
for spec in memory_tools():
|
||||
assert spec.name in instructions, (
|
||||
f"agent-facing HMA docs missing tool {spec.name!r} from registry"
|
||||
)
|
||||
|
||||
|
||||
def test_old_pre_rename_names_not_present_in_docs():
|
||||
"""Pre-rename names (delegate_to_workspace, search_memory,
|
||||
check_delegation_status) must not leak back into the agent-facing
|
||||
docs. They're not in the registry; their absence is the canonical
|
||||
state.
|
||||
"""
|
||||
from executor_helpers import get_a2a_instructions, get_hma_instructions
|
||||
|
||||
blob = get_a2a_instructions(mcp=True) + get_hma_instructions()
|
||||
for stale in ("delegate_to_workspace", "search_memory", "check_delegation_status"):
|
||||
assert stale not in blob, (
|
||||
f"pre-rename name {stale!r} leaked into docs — registry "
|
||||
f"is the source of truth, not the doc generator."
|
||||
)
|
||||
@ -202,7 +202,7 @@ def test_peer_capabilities_format(tmp_path):
|
||||
assert "## Your Peers" in result
|
||||
assert "**Echo Agent** (id: `peer-1`, status: online)" in result
|
||||
assert "Skills: echo, repeat" in result
|
||||
assert "delegate_to_workspace" in result
|
||||
assert "delegate_task_async" in result
|
||||
# peer-2 has no agent_card but DOES have a DB name + status — must
|
||||
# still render so coordinators can delegate to freshly-created peers
|
||||
# whose A2A discovery hasn't populated a card yet (regression of the
|
||||
@ -395,3 +395,77 @@ async def test_get_peer_capabilities_exception():
|
||||
result = await get_peer_capabilities("http://platform:8080", "ws-abc")
|
||||
|
||||
assert result == []
|
||||
|
||||
|
||||
# Regression tests for the A2A + HMA tool-instruction injection. Pre-fix,
|
||||
# get_a2a_instructions() and get_hma_instructions() were defined in
|
||||
# executor_helpers.py but never called from build_system_prompt — workers
|
||||
# saw the platform's delegate_task / commit_memory tools registered but
|
||||
# had no documentation telling them how to use them.
|
||||
|
||||
def test_a2a_instructions_injected_default_mcp(tmp_path):
|
||||
"""build_system_prompt embeds A2A MCP-variant instructions by default."""
|
||||
(tmp_path / "system-prompt.md").write_text("Base.")
|
||||
|
||||
result = build_system_prompt(
|
||||
config_path=str(tmp_path),
|
||||
workspace_id="ws-1",
|
||||
loaded_skills=[],
|
||||
peers=[],
|
||||
)
|
||||
|
||||
assert "## Inter-Agent Communication" in result
|
||||
assert "delegate_task" in result
|
||||
assert "list_peers" in result
|
||||
assert "send_message_to_user" in result
|
||||
|
||||
|
||||
def test_a2a_instructions_cli_variant_when_disabled(tmp_path):
|
||||
"""a2a_mcp=False emits the CLI subprocess variant for non-MCP runtimes."""
|
||||
(tmp_path / "system-prompt.md").write_text("Base.")
|
||||
|
||||
result = build_system_prompt(
|
||||
config_path=str(tmp_path),
|
||||
workspace_id="ws-1",
|
||||
loaded_skills=[],
|
||||
peers=[],
|
||||
a2a_mcp=False,
|
||||
)
|
||||
|
||||
assert "## Inter-Agent Communication" in result
|
||||
assert "molecule_runtime.a2a_cli" in result
|
||||
# MCP-only details must NOT leak into the CLI variant.
|
||||
assert "send_message_to_user" not in result
|
||||
|
||||
|
||||
def test_hma_instructions_injected(tmp_path):
|
||||
"""build_system_prompt embeds HMA persistent-memory instructions."""
|
||||
(tmp_path / "system-prompt.md").write_text("Base.")
|
||||
|
||||
result = build_system_prompt(
|
||||
config_path=str(tmp_path),
|
||||
workspace_id="ws-1",
|
||||
loaded_skills=[],
|
||||
peers=[],
|
||||
)
|
||||
|
||||
assert "## Hierarchical Memory (HMA)" in result
|
||||
assert "commit_memory" in result
|
||||
assert "recall_memory" in result
|
||||
|
||||
|
||||
def test_tool_instructions_precede_peer_section(tmp_path):
|
||||
"""A2A docs must precede the peer list — peer IDs are operands of A2A tools."""
|
||||
(tmp_path / "system-prompt.md").write_text("Base.")
|
||||
|
||||
peers = [{"id": "p1", "name": "Worker", "status": "active", "agent_card": None}]
|
||||
result = build_system_prompt(
|
||||
config_path=str(tmp_path),
|
||||
workspace_id="ws-1",
|
||||
loaded_skills=[],
|
||||
peers=peers,
|
||||
)
|
||||
|
||||
a2a_idx = result.index("## Inter-Agent Communication")
|
||||
peers_idx = result.index("## Your Peers")
|
||||
assert a2a_idx < peers_idx, "A2A instructions must come before the peer list"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user