Compare commits
No commits in common. "main" and "fix/auto-sync-use-devops-token" have entirely different histories.
main
...
fix/auto-s
@ -1,118 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
# audit-force-merge — detect a §SOP-6 force-merge after PR close, emit
|
|
||||||
# `incident.force_merge` to stdout as structured JSON.
|
|
||||||
#
|
|
||||||
# Vector's docker_logs source picks up runner stdout; the JSON gets
|
|
||||||
# shipped to Loki on molecule-canonical-obs, indexable by event_type.
|
|
||||||
# Query example:
|
|
||||||
#
|
|
||||||
# {host="operator"} |= "event_type" |= "incident.force_merge" | json
|
|
||||||
#
|
|
||||||
# A force-merge is detected when a PR closed-with-merged=true had at
|
|
||||||
# least one of the repo's required-status-check contexts in a state
|
|
||||||
# other than "success" at the merge commit's SHA. That's exactly what
|
|
||||||
# the Gitea force_merge:true API call lets through, so it's a faithful
|
|
||||||
# detector of the override path.
|
|
||||||
#
|
|
||||||
# Triggers on `pull_request_target: closed` (loaded from base branch
|
|
||||||
# per §SOP-6 security model). No-op when merged=false.
|
|
||||||
#
|
|
||||||
# Required env (set by the workflow):
|
|
||||||
# GITEA_TOKEN, GITEA_HOST, REPO, PR_NUMBER, REQUIRED_CHECKS
|
|
||||||
#
|
|
||||||
# REQUIRED_CHECKS is a newline-separated list of status-check context
|
|
||||||
# names that branch protection requires. Declared in the workflow YAML
|
|
||||||
# rather than fetched from /branch_protections (which needs admin
|
|
||||||
# scope — sop-tier-bot has read-only). Trade dynamism for simplicity:
|
|
||||||
# when the required-check set changes, update both branch protection
|
|
||||||
# AND this env. Keeping them in sync is less complexity than granting
|
|
||||||
# the audit bot admin perms on every repo.
|
|
||||||
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
: "${GITEA_TOKEN:?required}"
|
|
||||||
: "${GITEA_HOST:?required}"
|
|
||||||
: "${REPO:?required}"
|
|
||||||
: "${PR_NUMBER:?required}"
|
|
||||||
: "${REQUIRED_CHECKS:?required (newline-separated context names)}"
|
|
||||||
|
|
||||||
OWNER="${REPO%%/*}"
|
|
||||||
NAME="${REPO##*/}"
|
|
||||||
API="https://${GITEA_HOST}/api/v1"
|
|
||||||
AUTH="Authorization: token ${GITEA_TOKEN}"
|
|
||||||
|
|
||||||
# 1. Fetch the PR. If not merged, no-op.
|
|
||||||
PR=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}")
|
|
||||||
MERGED=$(echo "$PR" | jq -r '.merged // false')
|
|
||||||
if [ "$MERGED" != "true" ]; then
|
|
||||||
echo "::notice::PR #${PR_NUMBER} closed without merge — no audit emission."
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
MERGE_SHA=$(echo "$PR" | jq -r '.merge_commit_sha // empty')
|
|
||||||
MERGED_BY=$(echo "$PR" | jq -r '.merged_by.login // "unknown"')
|
|
||||||
TITLE=$(echo "$PR" | jq -r '.title // ""')
|
|
||||||
BASE_BRANCH=$(echo "$PR" | jq -r '.base.ref // "main"')
|
|
||||||
HEAD_SHA=$(echo "$PR" | jq -r '.head.sha // empty')
|
|
||||||
|
|
||||||
if [ -z "$MERGE_SHA" ]; then
|
|
||||||
echo "::warning::PR #${PR_NUMBER} merged=true but no merge_commit_sha — cannot evaluate force-merge."
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
# 2. Required status checks declared in the workflow env.
|
|
||||||
REQUIRED="$REQUIRED_CHECKS"
|
|
||||||
if [ -z "${REQUIRED//[[:space:]]/}" ]; then
|
|
||||||
echo "::notice::REQUIRED_CHECKS empty — force-merge not applicable."
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
# 3. Status-check state at the PR HEAD (where checks ran). The merge
|
|
||||||
# commit doesn't get its own checks; we evaluate the PR's last
|
|
||||||
# commit, which is what branch protection compared against.
|
|
||||||
STATUS=$(curl -sS -H "$AUTH" \
|
|
||||||
"${API}/repos/${OWNER}/${NAME}/commits/${HEAD_SHA}/status")
|
|
||||||
declare -A CHECK_STATE
|
|
||||||
while IFS=$'\t' read -r ctx state; do
|
|
||||||
[ -n "$ctx" ] && CHECK_STATE[$ctx]="$state"
|
|
||||||
done < <(echo "$STATUS" | jq -r '.statuses // [] | .[] | "\(.context)\t\(.status)"')
|
|
||||||
|
|
||||||
# 4. For each required check, was it green at merge? YAML block scalars
|
|
||||||
# (`|`) leave a trailing newline; skip blank/whitespace-only lines.
|
|
||||||
FAILED_CHECKS=()
|
|
||||||
while IFS= read -r req; do
|
|
||||||
trimmed="${req#"${req%%[![:space:]]*}"}" # ltrim
|
|
||||||
trimmed="${trimmed%"${trimmed##*[![:space:]]}"}" # rtrim
|
|
||||||
[ -z "$trimmed" ] && continue
|
|
||||||
state="${CHECK_STATE[$trimmed]:-missing}"
|
|
||||||
if [ "$state" != "success" ]; then
|
|
||||||
FAILED_CHECKS+=("${trimmed}=${state}")
|
|
||||||
fi
|
|
||||||
done <<< "$REQUIRED"
|
|
||||||
|
|
||||||
if [ "${#FAILED_CHECKS[@]}" -eq 0 ]; then
|
|
||||||
echo "::notice::PR #${PR_NUMBER} merged with all required checks green — not a force-merge."
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
# 5. Emit structured audit event.
|
|
||||||
NOW=$(date -u +%Y-%m-%dT%H:%M:%SZ)
|
|
||||||
FAILED_JSON=$(printf '%s\n' "${FAILED_CHECKS[@]}" | jq -R . | jq -s .)
|
|
||||||
|
|
||||||
# Print as a single-line JSON so Vector's parse_json transform can pick
|
|
||||||
# it up cleanly from docker_logs.
|
|
||||||
jq -nc \
|
|
||||||
--arg event_type "incident.force_merge" \
|
|
||||||
--arg ts "$NOW" \
|
|
||||||
--arg repo "$REPO" \
|
|
||||||
--argjson pr "$PR_NUMBER" \
|
|
||||||
--arg title "$TITLE" \
|
|
||||||
--arg base "$BASE_BRANCH" \
|
|
||||||
--arg merged_by "$MERGED_BY" \
|
|
||||||
--arg merge_sha "$MERGE_SHA" \
|
|
||||||
--argjson failed_checks "$FAILED_JSON" \
|
|
||||||
'{event_type: $event_type, ts: $ts, repo: $repo, pr: $pr, title: $title,
|
|
||||||
base_branch: $base, merged_by: $merged_by, merge_sha: $merge_sha,
|
|
||||||
failed_checks: $failed_checks}'
|
|
||||||
|
|
||||||
echo "::warning::FORCE-MERGE detected on PR #${PR_NUMBER} by ${MERGED_BY}: ${#FAILED_CHECKS[@]} required check(s) not green at merge time."
|
|
||||||
@ -1,149 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
# sop-tier-check — verify a Gitea PR satisfies the §SOP-6 approval gate.
|
|
||||||
#
|
|
||||||
# Reads the PR's tier label, walks approving reviewers, and checks each
|
|
||||||
# approver's Gitea team membership against the tier's eligible-team set.
|
|
||||||
# Marks pass only when at least one non-author approver is in an eligible
|
|
||||||
# team.
|
|
||||||
#
|
|
||||||
# Invoked from `.gitea/workflows/sop-tier-check.yml`. The workflow sets
|
|
||||||
# the env vars below; this script does no IO outside of stdout/stderr +
|
|
||||||
# the Gitea API.
|
|
||||||
#
|
|
||||||
# Required env:
|
|
||||||
# GITEA_TOKEN — bot PAT with read:organization,read:user,
|
|
||||||
# read:issue,read:repository scopes
|
|
||||||
# GITEA_HOST — e.g. git.moleculesai.app
|
|
||||||
# REPO — owner/name (from github.repository)
|
|
||||||
# PR_NUMBER — int (from github.event.pull_request.number)
|
|
||||||
# PR_AUTHOR — login (from github.event.pull_request.user.login)
|
|
||||||
#
|
|
||||||
# Optional:
|
|
||||||
# SOP_DEBUG=1 — print per-API-call diagnostic lines (HTTP codes,
|
|
||||||
# raw response bodies). Default: off.
|
|
||||||
#
|
|
||||||
# Stale-status caveat: Gitea Actions does not always re-fire workflows
|
|
||||||
# on `labeled` / `pull_request_review:submitted` events. If the
|
|
||||||
# sop-tier-check status is stale (e.g. red after labels/approvals were
|
|
||||||
# added), push an empty commit to the PR branch to force a synchronize
|
|
||||||
# event, OR re-request reviews. Tracked: internal#46.
|
|
||||||
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
debug() {
|
|
||||||
if [ "${SOP_DEBUG:-}" = "1" ]; then
|
|
||||||
echo " [debug] $*" >&2
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# Validate env
|
|
||||||
: "${GITEA_TOKEN:?GITEA_TOKEN required}"
|
|
||||||
: "${GITEA_HOST:?GITEA_HOST required}"
|
|
||||||
: "${REPO:?REPO required (owner/name)}"
|
|
||||||
: "${PR_NUMBER:?PR_NUMBER required}"
|
|
||||||
: "${PR_AUTHOR:?PR_AUTHOR required}"
|
|
||||||
|
|
||||||
OWNER="${REPO%%/*}"
|
|
||||||
NAME="${REPO##*/}"
|
|
||||||
API="https://${GITEA_HOST}/api/v1"
|
|
||||||
AUTH="Authorization: token ${GITEA_TOKEN}"
|
|
||||||
echo "::notice::tier-check start: repo=$OWNER/$NAME pr=$PR_NUMBER author=$PR_AUTHOR"
|
|
||||||
|
|
||||||
# Sanity: token resolves to a user
|
|
||||||
WHOAMI=$(curl -sS -H "$AUTH" "${API}/user" | jq -r '.login // ""')
|
|
||||||
if [ -z "$WHOAMI" ]; then
|
|
||||||
echo "::error::GITEA_TOKEN cannot resolve a user via /api/v1/user — check the token scope and that the secret is wired correctly."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo "::notice::token resolves to user: $WHOAMI"
|
|
||||||
|
|
||||||
# 1. Read tier label
|
|
||||||
LABELS=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/issues/${PR_NUMBER}/labels" | jq -r '.[].name')
|
|
||||||
TIER=""
|
|
||||||
for L in $LABELS; do
|
|
||||||
case "$L" in
|
|
||||||
tier:low|tier:medium|tier:high)
|
|
||||||
if [ -n "$TIER" ]; then
|
|
||||||
echo "::error::Multiple tier labels: $TIER + $L. Apply exactly one."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
TIER="$L"
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
done
|
|
||||||
if [ -z "$TIER" ]; then
|
|
||||||
echo "::error::PR has no tier:low|tier:medium|tier:high label. Apply one before merge."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
debug "tier=$TIER"
|
|
||||||
|
|
||||||
# 2. Tier → eligible teams
|
|
||||||
case "$TIER" in
|
|
||||||
tier:low) ELIGIBLE="engineers managers ceo" ;;
|
|
||||||
tier:medium) ELIGIBLE="managers ceo" ;;
|
|
||||||
tier:high) ELIGIBLE="ceo" ;;
|
|
||||||
esac
|
|
||||||
debug "eligible_teams=$ELIGIBLE"
|
|
||||||
|
|
||||||
# Resolve team-name → team-id once. /orgs/{org}/teams/{slug}/... endpoints
|
|
||||||
# don't exist on Gitea 1.22; we have to use /teams/{id}.
|
|
||||||
ORG_TEAMS_FILE=$(mktemp)
|
|
||||||
trap 'rm -f "$ORG_TEAMS_FILE"' EXIT
|
|
||||||
HTTP_CODE=$(curl -sS -o "$ORG_TEAMS_FILE" -w '%{http_code}' -H "$AUTH" \
|
|
||||||
"${API}/orgs/${OWNER}/teams")
|
|
||||||
debug "teams-list HTTP=$HTTP_CODE size=$(wc -c <"$ORG_TEAMS_FILE")"
|
|
||||||
if [ "${SOP_DEBUG:-}" = "1" ]; then
|
|
||||||
echo " [debug] teams-list body (first 300 chars):" >&2
|
|
||||||
head -c 300 "$ORG_TEAMS_FILE" >&2; echo >&2
|
|
||||||
fi
|
|
||||||
if [ "$HTTP_CODE" != "200" ]; then
|
|
||||||
echo "::error::GET /orgs/${OWNER}/teams returned HTTP $HTTP_CODE — token likely lacks read:org scope. Add a SOP_TIER_CHECK_TOKEN secret with read:organization scope at the org level."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
declare -A TEAM_ID
|
|
||||||
for T in $ELIGIBLE; do
|
|
||||||
ID=$(jq -r --arg t "$T" '.[] | select(.name==$t) | .id' <"$ORG_TEAMS_FILE" | head -1)
|
|
||||||
if [ -z "$ID" ] || [ "$ID" = "null" ]; then
|
|
||||||
VISIBLE=$(jq -r '.[]?.name? // empty' <"$ORG_TEAMS_FILE" 2>/dev/null | tr '\n' ' ')
|
|
||||||
echo "::error::Team \"$T\" not found in org $OWNER. Teams visible: $VISIBLE"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
TEAM_ID[$T]="$ID"
|
|
||||||
debug "team-id: $T → $ID"
|
|
||||||
done
|
|
||||||
|
|
||||||
# 3. Read approving reviewers
|
|
||||||
REVIEWS=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}/reviews")
|
|
||||||
APPROVERS=$(echo "$REVIEWS" | jq -r '[.[] | select(.state=="APPROVED") | .user.login] | unique | .[]')
|
|
||||||
if [ -z "$APPROVERS" ]; then
|
|
||||||
echo "::error::No approving reviews. Tier $TIER requires approval from {$ELIGIBLE} (non-author)."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
debug "approvers: $(echo "$APPROVERS" | tr '\n' ' ')"
|
|
||||||
|
|
||||||
# 4. For each approver: check non-author + team membership (by id)
|
|
||||||
OK=""
|
|
||||||
for U in $APPROVERS; do
|
|
||||||
if [ "$U" = "$PR_AUTHOR" ]; then
|
|
||||||
debug "skip self-review by $U"
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
for T in $ELIGIBLE; do
|
|
||||||
ID="${TEAM_ID[$T]}"
|
|
||||||
CODE=$(curl -sS -o /dev/null -w '%{http_code}' -H "$AUTH" \
|
|
||||||
"${API}/teams/${ID}/members/${U}")
|
|
||||||
debug "probe: $U in team $T (id=$ID) → HTTP $CODE"
|
|
||||||
if [ "$CODE" = "200" ] || [ "$CODE" = "204" ]; then
|
|
||||||
echo "::notice::approver $U is in team $T (eligible for $TIER)"
|
|
||||||
OK="yes"
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
[ -n "$OK" ] && break
|
|
||||||
done
|
|
||||||
|
|
||||||
if [ -z "$OK" ]; then
|
|
||||||
echo "::error::Tier $TIER requires approval from a non-author member of {$ELIGIBLE}. Got approvers: $APPROVERS — none of them satisfied team membership. Set SOP_DEBUG=1 to see per-probe HTTP codes."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo "::notice::sop-tier-check passed: $TIER, approver in {$ELIGIBLE}"
|
|
||||||
@ -1,58 +0,0 @@
|
|||||||
# audit-force-merge — emit `incident.force_merge` to runner stdout when
|
|
||||||
# a PR is merged with required-status-checks not green. Vector picks
|
|
||||||
# the JSON line off docker_logs and ships to Loki on
|
|
||||||
# molecule-canonical-obs (per `reference_obs_stack_phase1`); query as:
|
|
||||||
#
|
|
||||||
# {host="operator"} |= "event_type" |= "incident.force_merge" | json
|
|
||||||
#
|
|
||||||
# Closes the §SOP-6 audit gap (the doc says force-merges write to
|
|
||||||
# `structure_events`, but that table lives in the platform DB, not
|
|
||||||
# Gitea-side; Loki is the practical equivalent for Gitea Actions
|
|
||||||
# events). When the credential / observability stack converges later,
|
|
||||||
# this can sync into structure_events from Loki via a backfill job —
|
|
||||||
# the structured JSON shape is forward-compatible.
|
|
||||||
#
|
|
||||||
# Logic in `.gitea/scripts/audit-force-merge.sh` per the same script-
|
|
||||||
# extract pattern as sop-tier-check.
|
|
||||||
|
|
||||||
name: audit-force-merge
|
|
||||||
|
|
||||||
# pull_request_target loads from the base branch — same security model
|
|
||||||
# as sop-tier-check. Without this, an attacker could rewrite the
|
|
||||||
# workflow on a PR and skip the audit emission for their own
|
|
||||||
# force-merge. See `.gitea/workflows/sop-tier-check.yml` for the full
|
|
||||||
# rationale.
|
|
||||||
on:
|
|
||||||
pull_request_target:
|
|
||||||
types: [closed]
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
audit:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
pull-requests: read
|
|
||||||
# Skip when PR is closed without merge — saves a runner.
|
|
||||||
if: github.event.pull_request.merged == true
|
|
||||||
steps:
|
|
||||||
- name: Check out base branch (for the script)
|
|
||||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
|
||||||
with:
|
|
||||||
ref: ${{ github.event.pull_request.base.sha }}
|
|
||||||
- name: Detect force-merge + emit audit event
|
|
||||||
env:
|
|
||||||
# Same org-level secret the sop-tier-check workflow uses.
|
|
||||||
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
|
|
||||||
GITEA_HOST: git.moleculesai.app
|
|
||||||
REPO: ${{ github.repository }}
|
|
||||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
|
||||||
# Required-status-check contexts to evaluate at merge time.
|
|
||||||
# Newline-separated. Mirror this against branch protection
|
|
||||||
# (settings → branches → protected branch → required checks).
|
|
||||||
# Declared here rather than fetched from /branch_protections
|
|
||||||
# because that endpoint requires admin write — sop-tier-bot is
|
|
||||||
# read-only by design (least-privilege).
|
|
||||||
REQUIRED_CHECKS: |
|
|
||||||
sop-tier-check / tier-check (pull_request)
|
|
||||||
Secret scan / Scan diff for credential-shaped strings (pull_request)
|
|
||||||
run: bash .gitea/scripts/audit-force-merge.sh
|
|
||||||
@ -1,191 +0,0 @@
|
|||||||
name: Secret scan
|
|
||||||
|
|
||||||
# Hard CI gate. Refuses any PR / push whose diff additions contain a
|
|
||||||
# recognisable credential. Defense-in-depth for the #2090-class incident
|
|
||||||
# (2026-04-24): GitHub's hosted Copilot Coding Agent leaked a ghs_*
|
|
||||||
# installation token into tenant-proxy/package.json via `npm init`
|
|
||||||
# slurping the URL from a token-embedded origin remote. We can't fix
|
|
||||||
# upstream's clone hygiene, so we gate here.
|
|
||||||
#
|
|
||||||
# Same regex set as the runtime's bundled pre-commit hook
|
|
||||||
# (molecule-ai-workspace-runtime: molecule_runtime/scripts/pre-commit-checks.sh).
|
|
||||||
# Keep the two sides aligned when adding patterns.
|
|
||||||
#
|
|
||||||
# Ported from .github/workflows/secret-scan.yml so the gate actually
|
|
||||||
# fires on Gitea Actions. Differences from the GitHub version:
|
|
||||||
# - drops `merge_group` event (Gitea has no merge queue)
|
|
||||||
# - drops `workflow_call` (no cross-repo reusable invocation on Gitea)
|
|
||||||
# - SELF path updated to .gitea/workflows/secret-scan.yml
|
|
||||||
# The job name + step name are identical to the GitHub workflow so the
|
|
||||||
# status-check context (`Secret scan / Scan diff for credential-shaped
|
|
||||||
# strings (pull_request)`) matches branch protection on molecule-core/main.
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
types: [opened, synchronize, reopened]
|
|
||||||
push:
|
|
||||||
branches: [main, staging]
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
scan:
|
|
||||||
name: Scan diff for credential-shaped strings
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
|
||||||
with:
|
|
||||||
fetch-depth: 2 # need previous commit to diff against on push events
|
|
||||||
|
|
||||||
# For pull_request events the diff base may be many commits behind
|
|
||||||
# HEAD and absent from the shallow clone. Fetch it explicitly.
|
|
||||||
- name: Fetch PR base SHA (pull_request events only)
|
|
||||||
if: github.event_name == 'pull_request'
|
|
||||||
run: git fetch --depth=1 origin ${{ github.event.pull_request.base.sha }}
|
|
||||||
|
|
||||||
- name: Refuse if credential-shaped strings appear in diff additions
|
|
||||||
env:
|
|
||||||
# Plumb event-specific SHAs through env so the script doesn't
|
|
||||||
# need conditional `${{ ... }}` interpolation per event type.
|
|
||||||
# github.event.before/after only exist on push events;
|
|
||||||
# pull_request has pull_request.base.sha / pull_request.head.sha.
|
|
||||||
PR_BASE_SHA: ${{ github.event.pull_request.base.sha }}
|
|
||||||
PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }}
|
|
||||||
PUSH_BEFORE: ${{ github.event.before }}
|
|
||||||
PUSH_AFTER: ${{ github.event.after }}
|
|
||||||
run: |
|
|
||||||
# Pattern set covers GitHub family (the actual #2090 vector),
|
|
||||||
# Anthropic / OpenAI / Slack / AWS. Anchored on prefixes with low
|
|
||||||
# false-positive rates against agent-generated content. Mirror of
|
|
||||||
# molecule-ai-workspace-runtime/molecule_runtime/scripts/pre-commit-checks.sh
|
|
||||||
# — keep aligned.
|
|
||||||
SECRET_PATTERNS=(
|
|
||||||
'ghp_[A-Za-z0-9]{36,}' # GitHub PAT (classic)
|
|
||||||
'ghs_[A-Za-z0-9]{36,}' # GitHub App installation token
|
|
||||||
'gho_[A-Za-z0-9]{36,}' # GitHub OAuth user-to-server
|
|
||||||
'ghu_[A-Za-z0-9]{36,}' # GitHub OAuth user
|
|
||||||
'ghr_[A-Za-z0-9]{36,}' # GitHub OAuth refresh
|
|
||||||
'github_pat_[A-Za-z0-9_]{82,}' # GitHub fine-grained PAT
|
|
||||||
'sk-ant-[A-Za-z0-9_-]{40,}' # Anthropic API key
|
|
||||||
'sk-proj-[A-Za-z0-9_-]{40,}' # OpenAI project key
|
|
||||||
'sk-svcacct-[A-Za-z0-9_-]{40,}' # OpenAI service-account key
|
|
||||||
'sk-cp-[A-Za-z0-9_-]{60,}' # MiniMax API key (F1088 vector — caught only after the fact)
|
|
||||||
'xox[baprs]-[A-Za-z0-9-]{20,}' # Slack tokens
|
|
||||||
'AKIA[0-9A-Z]{16}' # AWS access key ID
|
|
||||||
'ASIA[0-9A-Z]{16}' # AWS STS temp access key ID
|
|
||||||
)
|
|
||||||
|
|
||||||
# Determine the diff base. Each event type stores its SHAs in
|
|
||||||
# a different place — see the env block above.
|
|
||||||
case "${{ github.event_name }}" in
|
|
||||||
pull_request)
|
|
||||||
BASE="$PR_BASE_SHA"
|
|
||||||
HEAD="$PR_HEAD_SHA"
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
BASE="$PUSH_BEFORE"
|
|
||||||
HEAD="$PUSH_AFTER"
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
# On push events with shallow clones, BASE may be present in
|
|
||||||
# the event payload but absent from the local object DB
|
|
||||||
# (fetch-depth=2 doesn't always reach the previous commit
|
|
||||||
# across true merges). Try fetching it on demand. If the
|
|
||||||
# fetch fails — e.g. the SHA was force-overwritten — we fall
|
|
||||||
# through to the empty-BASE branch below, which scans the
|
|
||||||
# entire tree as if every file were new. Correct, just slow.
|
|
||||||
if [ -n "$BASE" ] && ! echo "$BASE" | grep -qE '^0+$'; then
|
|
||||||
if ! git cat-file -e "$BASE" 2>/dev/null; then
|
|
||||||
git fetch --depth=1 origin "$BASE" 2>/dev/null || true
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Files added or modified in this change.
|
|
||||||
if [ -z "$BASE" ] || echo "$BASE" | grep -qE '^0+$' || ! git cat-file -e "$BASE" 2>/dev/null; then
|
|
||||||
# New branch / no previous SHA / BASE unreachable — check the
|
|
||||||
# entire tree as added content. Slower, but correct on first
|
|
||||||
# push.
|
|
||||||
CHANGED=$(git ls-tree -r --name-only HEAD)
|
|
||||||
DIFF_RANGE=""
|
|
||||||
else
|
|
||||||
CHANGED=$(git diff --name-only --diff-filter=AM "$BASE" "$HEAD")
|
|
||||||
DIFF_RANGE="$BASE $HEAD"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ -z "$CHANGED" ]; then
|
|
||||||
echo "No changed files to inspect."
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Self-exclude: this workflow file legitimately contains the
|
|
||||||
# pattern strings as regex literals. Without an exclude it would
|
|
||||||
# block its own merge. Both the .github/ original and this
|
|
||||||
# .gitea/ port are excluded so a sync between them stays clean.
|
|
||||||
SELF_GITHUB=".github/workflows/secret-scan.yml"
|
|
||||||
SELF_GITEA=".gitea/workflows/secret-scan.yml"
|
|
||||||
|
|
||||||
OFFENDING=""
|
|
||||||
# `while IFS= read -r` (not `for f in $CHANGED`) so filenames
|
|
||||||
# containing whitespace don't word-split silently — a path
|
|
||||||
# with a space would otherwise produce two iterations on
|
|
||||||
# tokens that aren't real filenames, breaking the
|
|
||||||
# self-exclude + diff lookup.
|
|
||||||
while IFS= read -r f; do
|
|
||||||
[ -z "$f" ] && continue
|
|
||||||
[ "$f" = "$SELF_GITHUB" ] && continue
|
|
||||||
[ "$f" = "$SELF_GITEA" ] && continue
|
|
||||||
if [ -n "$DIFF_RANGE" ]; then
|
|
||||||
ADDED=$(git diff --no-color --unified=0 "$BASE" "$HEAD" -- "$f" 2>/dev/null | grep -E '^\+[^+]' || true)
|
|
||||||
else
|
|
||||||
# No diff range (new branch first push) — scan the full file
|
|
||||||
# contents as if every line were new.
|
|
||||||
ADDED=$(cat "$f" 2>/dev/null || true)
|
|
||||||
fi
|
|
||||||
[ -z "$ADDED" ] && continue
|
|
||||||
for pattern in "${SECRET_PATTERNS[@]}"; do
|
|
||||||
if echo "$ADDED" | grep -qE "$pattern"; then
|
|
||||||
OFFENDING="${OFFENDING}${f} (matched: ${pattern})\n"
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
done <<< "$CHANGED"
|
|
||||||
|
|
||||||
if [ -n "$OFFENDING" ]; then
|
|
||||||
echo "::error::Credential-shaped strings detected in diff additions:"
|
|
||||||
# `printf '%b' "$OFFENDING"` interprets backslash escapes
|
|
||||||
# (the literal `\n` we appended above becomes a newline)
|
|
||||||
# WITHOUT treating OFFENDING as a format string. Plain
|
|
||||||
# `printf "$OFFENDING"` is a format-string sink: a filename
|
|
||||||
# containing `%` would be interpreted as a conversion
|
|
||||||
# specifier, corrupting the error message (or printing
|
|
||||||
# `%(missing)` artifacts).
|
|
||||||
printf '%b' "$OFFENDING"
|
|
||||||
echo ""
|
|
||||||
echo "The actual matched values are NOT echoed here, deliberately —"
|
|
||||||
echo "round-tripping a leaked credential into CI logs widens the blast"
|
|
||||||
echo "radius (logs are searchable + retained)."
|
|
||||||
echo ""
|
|
||||||
echo "Recovery:"
|
|
||||||
echo " 1. Remove the secret from the file. Replace with an env var"
|
|
||||||
echo " reference (e.g. \${{ secrets.GITHUB_TOKEN }} in workflows,"
|
|
||||||
echo " process.env.X in code)."
|
|
||||||
echo " 2. If the credential was already pushed (this PR's commit"
|
|
||||||
echo " history reaches a public ref), treat it as compromised —"
|
|
||||||
echo " ROTATE it immediately, do not just remove it. The token"
|
|
||||||
echo " remains valid in git history forever and may be in any"
|
|
||||||
echo " log/cache that consumed this branch."
|
|
||||||
echo " 3. Force-push the cleaned commit (or stack a revert) and"
|
|
||||||
echo " re-run CI."
|
|
||||||
echo ""
|
|
||||||
echo "If the match is a false positive (test fixture, docs example,"
|
|
||||||
echo "or this workflow's own regex literals): use a clearly-fake"
|
|
||||||
echo "placeholder like ghs_EXAMPLE_DO_NOT_USE that doesn't satisfy"
|
|
||||||
echo "the length suffix, OR add the file path to the SELF exclude"
|
|
||||||
echo "list in this workflow with a short reason."
|
|
||||||
echo ""
|
|
||||||
echo "Mirror of the regex set lives in the runtime's bundled"
|
|
||||||
echo "pre-commit hook (molecule-ai-workspace-runtime:"
|
|
||||||
echo "molecule_runtime/scripts/pre-commit-checks.sh) — keep aligned."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "✓ No credential-shaped strings in this change."
|
|
||||||
@ -1,81 +0,0 @@
|
|||||||
# sop-tier-check — canonical Gitea Actions workflow for §SOP-6 enforcement.
|
|
||||||
#
|
|
||||||
# Logic lives in `.gitea/scripts/sop-tier-check.sh` (extracted 2026-05-09
|
|
||||||
# from the previous inline-bash version). The script is the single source
|
|
||||||
# of truth; this workflow file just sets env + invokes it.
|
|
||||||
#
|
|
||||||
# Copy BOTH files (`.gitea/workflows/sop-tier-check.yml` +
|
|
||||||
# `.gitea/scripts/sop-tier-check.sh`) into any repo that wants the
|
|
||||||
# §SOP-6 PR gate enforced. Pair with branch protection on the protected
|
|
||||||
# branch:
|
|
||||||
# required_status_checks: ["sop-tier-check / tier-check (pull_request)"]
|
|
||||||
# required_approving_reviews: 1
|
|
||||||
# approving_review_teams: ["ceo", "managers", "engineers"]
|
|
||||||
#
|
|
||||||
# Tier → eligible-team mapping (mirror of dev-sop §SOP-6):
|
|
||||||
# tier:low → engineers, managers, ceo
|
|
||||||
# tier:medium → managers, ceo
|
|
||||||
# tier:high → ceo
|
|
||||||
#
|
|
||||||
# Force-merge: Owners-team override remains available out-of-band via
|
|
||||||
# the Gitea merge API; force-merge writes `incident.force_merge` to
|
|
||||||
# `structure_events` per §Persistent structured logging gate (Phase 3).
|
|
||||||
#
|
|
||||||
# Set `SOP_DEBUG: '1'` in the env block to enable per-API-call diagnostic
|
|
||||||
# lines — useful when diagnosing token-scope or team-id-resolution
|
|
||||||
# issues. Default off.
|
|
||||||
|
|
||||||
name: sop-tier-check
|
|
||||||
|
|
||||||
# SECURITY: triggers MUST use `pull_request_target`, not `pull_request`.
|
|
||||||
# `pull_request_target` loads the workflow definition from the BASE
|
|
||||||
# branch (i.e. `main`), not the PR's HEAD. With `pull_request`, anyone
|
|
||||||
# with write access to a feature branch could rewrite this file in
|
|
||||||
# their PR to dump SOP_TIER_CHECK_TOKEN (org-read scope) to logs and
|
|
||||||
# exfiltrate it. Verified 2026-05-09 against Gitea 1.22.6 —
|
|
||||||
# `pull_request_target` (added in Gitea 1.21 via go-gitea/gitea#25229)
|
|
||||||
# is the documented mitigation.
|
|
||||||
#
|
|
||||||
# This workflow does NOT call `actions/checkout` of PR HEAD code, so no
|
|
||||||
# untrusted code is ever executed in the runner — we only HTTP-call the
|
|
||||||
# Gitea API. If a future change adds a checkout step, it MUST pin to
|
|
||||||
# `${{ github.event.pull_request.base.sha }}` (NOT `head.sha`) to keep
|
|
||||||
# the trust boundary.
|
|
||||||
on:
|
|
||||||
pull_request_target:
|
|
||||||
types: [opened, edited, synchronize, reopened, labeled, unlabeled]
|
|
||||||
pull_request_review:
|
|
||||||
types: [submitted, dismissed, edited]
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
tier-check:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
pull-requests: read
|
|
||||||
steps:
|
|
||||||
- name: Check out base branch (for the script)
|
|
||||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
|
||||||
with:
|
|
||||||
# Pin to base.sha — pull_request_target's protection only
|
|
||||||
# works if we never check out PR HEAD. Same SHA the workflow
|
|
||||||
# itself was loaded from.
|
|
||||||
ref: ${{ github.event.pull_request.base.sha }}
|
|
||||||
- name: Verify tier label + reviewer team membership
|
|
||||||
env:
|
|
||||||
# SOP_TIER_CHECK_TOKEN is the org-level secret for the
|
|
||||||
# sop-tier-bot PAT (read:organization,read:user,read:issue,
|
|
||||||
# read:repository). Stored at the org level
|
|
||||||
# (/api/v1/orgs/molecule-ai/actions/secrets) so per-repo
|
|
||||||
# configuration is unnecessary — every repo in the org
|
|
||||||
# picks it up automatically.
|
|
||||||
# Falls back to GITHUB_TOKEN with a clear error if missing.
|
|
||||||
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
|
|
||||||
GITEA_HOST: git.moleculesai.app
|
|
||||||
REPO: ${{ github.repository }}
|
|
||||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
|
||||||
PR_AUTHOR: ${{ github.event.pull_request.user.login }}
|
|
||||||
# Set to '1' for diagnostic per-API-call output. Off by default
|
|
||||||
# so production logs aren't noisy.
|
|
||||||
SOP_DEBUG: '0'
|
|
||||||
run: bash .gitea/scripts/sop-tier-check.sh
|
|
||||||
429
.github/workflows/auto-promote-on-e2e.yml
vendored
Normal file
429
.github/workflows/auto-promote-on-e2e.yml
vendored
Normal file
@ -0,0 +1,429 @@
|
|||||||
|
name: Auto-promote :latest after main image build
|
||||||
|
|
||||||
|
# Retags `ghcr.io/molecule-ai/{platform,platform-tenant}:staging-<sha>`
|
||||||
|
# → `:latest` after either the image build or E2E completes on a `main`
|
||||||
|
# push, gated on E2E Staging SaaS not being red for that SHA.
|
||||||
|
#
|
||||||
|
# Why two triggers:
|
||||||
|
#
|
||||||
|
# `publish-workspace-server-image` and `e2e-staging-saas` are both
|
||||||
|
# paths-filtered, but with DIFFERENT path sets:
|
||||||
|
#
|
||||||
|
# publish-workspace-server-image:
|
||||||
|
# workspace-server/**, canvas/**, manifest.json
|
||||||
|
#
|
||||||
|
# e2e-staging-saas (full lifecycle):
|
||||||
|
# workspace-server/internal/handlers/{registry,workspace_provision,
|
||||||
|
# a2a_proxy}.go, workspace-server/internal/middleware/**,
|
||||||
|
# workspace-server/internal/provisioner/**, tests/e2e/test_staging_full_saas.sh
|
||||||
|
#
|
||||||
|
# The E2E set is a strict SUBSET of the publish set. So:
|
||||||
|
# - canvas/** changes → publish fires, E2E does not
|
||||||
|
# - workspace-server/cmd/** changes → publish fires, E2E does not
|
||||||
|
# - workspace-server/internal/sweep/** → publish fires, E2E does not
|
||||||
|
#
|
||||||
|
# The previous version triggered ONLY on E2E completion, which meant
|
||||||
|
# non-E2E-path changes (canvas, cmd, sweep, etc.) rebuilt the image
|
||||||
|
# but never advanced `:latest`. Result: as of 2026-04-28 this workflow
|
||||||
|
# had run zero times since merge despite eight main pushes — `:latest`
|
||||||
|
# was ~7 hours / 9 PRs behind main with no human realising. See
|
||||||
|
# `molecule-core` Slack discussion 2026-04-28.
|
||||||
|
#
|
||||||
|
# Adding `publish-workspace-server-image` as a second trigger closes
|
||||||
|
# the gap: any image rebuild on main eligibly advances `:latest`.
|
||||||
|
#
|
||||||
|
# Why E2E remains a kill-switch (not the trigger):
|
||||||
|
#
|
||||||
|
# When E2E DID run for this SHA and ended red, we abort — `:latest`
|
||||||
|
# stays on the prior known-good digest. When E2E didn't run (paths
|
||||||
|
# filtered out), we proceed: pre-merge gates already validated this
|
||||||
|
# SHA on staging via auto-promote-staging requiring CI + E2E Canvas +
|
||||||
|
# E2E API + CodeQL all green. Image content for non-E2E-paths
|
||||||
|
# (canvas, cmd, sweep) is exercised by those staging gates.
|
||||||
|
#
|
||||||
|
# Why `main` only:
|
||||||
|
#
|
||||||
|
# `:latest` is what prod tenants pull. We only want SHAs that have
|
||||||
|
# reached main (via auto-promote-staging) to advance `:latest`.
|
||||||
|
# Triggering on staging would let a staging-only revert advance
|
||||||
|
# `:latest` to a SHA that never reaches main, breaking the "production
|
||||||
|
# runs what's on main" invariant.
|
||||||
|
#
|
||||||
|
# Idempotency:
|
||||||
|
#
|
||||||
|
# When a SHA touches paths that match BOTH publish and E2E, both
|
||||||
|
# workflows fire and complete. Both trigger this workflow on
|
||||||
|
# completion → two runs race. Both retag `:staging-<sha>` →
|
||||||
|
# `:latest`. crane tag is idempotent (re-tagging the same digest is a
|
||||||
|
# no-op), so the second run is harmless. concurrency group serializes
|
||||||
|
# them anyway.
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_run:
|
||||||
|
workflows:
|
||||||
|
- 'E2E Staging SaaS (full lifecycle)'
|
||||||
|
- 'publish-workspace-server-image'
|
||||||
|
types: [completed]
|
||||||
|
branches: [main]
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
sha:
|
||||||
|
description: 'Short sha to promote (override; defaults to upstream workflow_run head_sha)'
|
||||||
|
required: false
|
||||||
|
type: string
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
packages: write
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
# Serialize promotes per-SHA so the publish+E2E both-fired race lands
|
||||||
|
# cleanly. Different SHAs can promote in parallel.
|
||||||
|
group: auto-promote-latest-${{ github.event.workflow_run.head_sha || github.event.inputs.sha || github.sha }}
|
||||||
|
cancel-in-progress: false
|
||||||
|
|
||||||
|
env:
|
||||||
|
IMAGE_NAME: ghcr.io/molecule-ai/platform
|
||||||
|
TENANT_IMAGE_NAME: ghcr.io/molecule-ai/platform-tenant
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
promote:
|
||||||
|
# Proceed if upstream succeeded OR manual dispatch. Upstream-failure
|
||||||
|
# paths are filtered here; the E2E-was-red kill-switch lives in the
|
||||||
|
# gate-check step below (covers the case where upstream is publish
|
||||||
|
# success but E2E for the same SHA failed).
|
||||||
|
if: |
|
||||||
|
github.event_name == 'workflow_dispatch' ||
|
||||||
|
(github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success')
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Compute short sha
|
||||||
|
id: sha
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
if [ -n "${{ github.event.inputs.sha }}" ]; then
|
||||||
|
FULL="${{ github.event.inputs.sha }}"
|
||||||
|
else
|
||||||
|
FULL="${{ github.event.workflow_run.head_sha }}"
|
||||||
|
fi
|
||||||
|
echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "full=${FULL}" >> "$GITHUB_OUTPUT"
|
||||||
|
|
||||||
|
- name: Gate — E2E Staging SaaS state for this SHA
|
||||||
|
# When upstream IS E2E success, we know it's green (filtered by
|
||||||
|
# the job-level `if` already). When upstream is publish, look up
|
||||||
|
# E2E state for the same SHA. Four buckets:
|
||||||
|
#
|
||||||
|
# - completed/success: E2E confirmed safe → proceed
|
||||||
|
# - completed/failure|cancelled|timed_out: E2E found a
|
||||||
|
# regression → ABORT (exit 1), `:latest` stays put
|
||||||
|
# - in_progress|queued|requested: E2E is RACING with publish
|
||||||
|
# for a runtime-touching SHA. publish typically completes
|
||||||
|
# ~5-10min before E2E (~10-15min). If we promote on the
|
||||||
|
# publish signal here, a later E2E failure can't roll back
|
||||||
|
# `:latest` — it'd already be wrongly advanced. So we DEFER:
|
||||||
|
# skip subsequent steps (proceed=false) and let E2E's own
|
||||||
|
# completion event re-fire this workflow, which then takes
|
||||||
|
# the upstream-is-E2E path. exit 0 so the run shows as
|
||||||
|
# success rather than a noisy fake-failure.
|
||||||
|
# - none/none: E2E was paths-filtered out for this SHA (the
|
||||||
|
# change touched canvas/cmd/sweep/etc. — paths covered by
|
||||||
|
# publish but not by E2E). pre-merge gates on staging
|
||||||
|
# already validated this SHA → proceed.
|
||||||
|
#
|
||||||
|
# Manual dispatch skips this check — operator override.
|
||||||
|
id: gate
|
||||||
|
env:
|
||||||
|
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
REPO: ${{ github.repository }}
|
||||||
|
SHA: ${{ steps.sha.outputs.full }}
|
||||||
|
UPSTREAM_NAME: ${{ github.event.workflow_run.name }}
|
||||||
|
EVENT_NAME: ${{ github.event_name }}
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
if [ "$EVENT_NAME" = "workflow_dispatch" ]; then
|
||||||
|
echo "proceed=true" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "::notice::Manual dispatch — skipping E2E gate (operator override)"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$UPSTREAM_NAME" = "E2E Staging SaaS (full lifecycle)" ]; then
|
||||||
|
echo "proceed=true" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "::notice::Upstream is E2E itself (success per job-level if) — gate trivially satisfied"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Upstream is publish-workspace-server-image. Check E2E state.
|
||||||
|
# The jq filter must defend against TWO empty cases that gh
|
||||||
|
# CLI emits indistinguishably:
|
||||||
|
# 1. gh exits non-zero (network blip, auth issue) → handled
|
||||||
|
# by the `|| echo "none/none"` fallback below.
|
||||||
|
# 2. gh exits zero but returns `[]` (no E2E run on this
|
||||||
|
# main SHA — the common case for canvas-only / cmd-only
|
||||||
|
# / sweep-only changes whose paths don't trigger E2E).
|
||||||
|
# Without `(.[0] // {})`, jq sees `null` and emits
|
||||||
|
# "null/none" — which the case statement below has no
|
||||||
|
# branch for, so it falls into *) → exit 1.
|
||||||
|
# Surfaced 2026-04-30 the first time the App-token chain
|
||||||
|
# (#2389) actually fired auto-promote-on-e2e from a publish
|
||||||
|
# upstream — every prior run was E2E-upstream which
|
||||||
|
# short-circuits before this gate.
|
||||||
|
RESULT=$(gh run list \
|
||||||
|
--repo "$REPO" \
|
||||||
|
--workflow e2e-staging-saas.yml \
|
||||||
|
--branch main \
|
||||||
|
--commit "$SHA" \
|
||||||
|
--limit 1 \
|
||||||
|
--json status,conclusion \
|
||||||
|
--jq '(.[0] // {}) | "\(.status // "none")/\(.conclusion // "none")"' \
|
||||||
|
2>/dev/null || echo "none/none")
|
||||||
|
|
||||||
|
echo "E2E Staging SaaS for ${SHA:0:7}: $RESULT"
|
||||||
|
|
||||||
|
case "$RESULT" in
|
||||||
|
completed/success)
|
||||||
|
echo "proceed=true" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "::notice::E2E green for this SHA — proceeding with promote"
|
||||||
|
;;
|
||||||
|
completed/failure|completed/timed_out)
|
||||||
|
echo "proceed=false" >> "$GITHUB_OUTPUT"
|
||||||
|
{
|
||||||
|
echo "## ❌ Auto-promote aborted — E2E Staging SaaS failed"
|
||||||
|
echo
|
||||||
|
echo "E2E Staging SaaS for \`${SHA:0:7}\`: \`$RESULT\`"
|
||||||
|
echo "\`:latest\` stays on the prior known-good digest."
|
||||||
|
echo
|
||||||
|
echo "If the failure was a flake, manually dispatch this workflow with the same sha to override."
|
||||||
|
} >> "$GITHUB_STEP_SUMMARY"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
completed/cancelled)
|
||||||
|
# cancelled ≠ failure. Per-SHA concurrency cancels older E2E
|
||||||
|
# runs when a newer push lands (memory:
|
||||||
|
# feedback_concurrency_group_per_sha) — the newer SHA will
|
||||||
|
# have its own E2E + promote chain. Treat the same as
|
||||||
|
# in_progress: defer without aborting, let the next E2E run
|
||||||
|
# promote when it lands.
|
||||||
|
#
|
||||||
|
# Caught 2026-05-05 02:03 on sha 31f9a5e — auto-promote
|
||||||
|
# blocked the whole chain because this case fell through to
|
||||||
|
# exit 1 instead of clean defer.
|
||||||
|
echo "proceed=false" >> "$GITHUB_OUTPUT"
|
||||||
|
{
|
||||||
|
echo "## ⏭ Auto-promote deferred — E2E Staging SaaS was cancelled"
|
||||||
|
echo
|
||||||
|
echo "E2E Staging SaaS for \`${SHA:0:7}\`: \`$RESULT\`"
|
||||||
|
echo "Likely per-SHA concurrency (newer push superseded this E2E run)."
|
||||||
|
echo "The newer SHA's E2E will fire its own promote when it lands."
|
||||||
|
echo "If you need this specific SHA promoted, manually dispatch."
|
||||||
|
} >> "$GITHUB_STEP_SUMMARY"
|
||||||
|
;;
|
||||||
|
in_progress/*|queued/*|requested/*|waiting/*|pending/*)
|
||||||
|
echo "proceed=false" >> "$GITHUB_OUTPUT"
|
||||||
|
{
|
||||||
|
echo "## ⏳ Auto-promote deferred — E2E Staging SaaS still running"
|
||||||
|
echo
|
||||||
|
echo "Publish completed before E2E for \`${SHA:0:7}\` (state: \`$RESULT\`)."
|
||||||
|
echo "Skipping retag here — E2E's own completion event will re-fire this workflow."
|
||||||
|
echo "If E2E ends green, that run promotes \`:latest\`. If red, it aborts."
|
||||||
|
} >> "$GITHUB_STEP_SUMMARY"
|
||||||
|
;;
|
||||||
|
none/none)
|
||||||
|
echo "proceed=true" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "::notice::E2E paths-filtered out for this SHA — pre-merge staging gates carry"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "proceed=false" >> "$GITHUB_OUTPUT"
|
||||||
|
{
|
||||||
|
echo "## ❓ Auto-promote aborted — unexpected E2E state"
|
||||||
|
echo
|
||||||
|
echo "E2E Staging SaaS for \`${SHA:0:7}\`: \`$RESULT\` (unhandled)"
|
||||||
|
echo "Manual investigation needed; re-dispatch with the same sha once resolved."
|
||||||
|
} >> "$GITHUB_STEP_SUMMARY"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
- if: steps.gate.outputs.proceed == 'true'
|
||||||
|
uses: imjasonh/setup-crane@6da1ae018866400525525ce74ff892880c099987 # v0.5
|
||||||
|
|
||||||
|
- name: GHCR login
|
||||||
|
if: steps.gate.outputs.proceed == 'true'
|
||||||
|
run: |
|
||||||
|
echo "${{ secrets.GITHUB_TOKEN }}" | \
|
||||||
|
crane auth login ghcr.io -u "${{ github.actor }}" --password-stdin
|
||||||
|
|
||||||
|
- name: Verify :staging-<sha> exists for both images
|
||||||
|
# Better to fail fast with a clear message than to half-tag
|
||||||
|
# (platform retagged but platform-tenant missing → tenants pull
|
||||||
|
# a stale image).
|
||||||
|
if: steps.gate.outputs.proceed == 'true'
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
for img in "${IMAGE_NAME}" "${TENANT_IMAGE_NAME}"; do
|
||||||
|
tag="${img}:staging-${{ steps.sha.outputs.short }}"
|
||||||
|
if ! crane manifest "$tag" >/dev/null 2>&1; then
|
||||||
|
echo "::error::Missing tag: $tag"
|
||||||
|
echo "::error::publish-workspace-server-image must complete on this SHA before auto-promote can retag :latest."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo " ok: $tag exists"
|
||||||
|
done
|
||||||
|
|
||||||
|
- name: Ancestry check — refuse to promote :latest backwards
|
||||||
|
# #2244: workflow_run completions arrive in arbitrary order. If
|
||||||
|
# SHA-A and SHA-B both reach main within ~10 min and SHA-B's E2E
|
||||||
|
# completes before SHA-A's, this workflow can fire for SHA-A
|
||||||
|
# AFTER it already promoted SHA-B → :latest goes backwards. The
|
||||||
|
# orphan-reconciler "next run corrects it" doesn't apply: there's
|
||||||
|
# no auto-corrective re-promote, :latest stays wrong until the
|
||||||
|
# next main push lands.
|
||||||
|
#
|
||||||
|
# Detection: read current :latest's `org.opencontainers.image.revision`
|
||||||
|
# label (set by publish-workspace-server-image.yml at build time)
|
||||||
|
# and ask the GitHub compare API whether the candidate SHA is
|
||||||
|
# ahead-of / identical-to / behind / diverged-from current.
|
||||||
|
# Hard-fail on `behind` and `diverged` per the approved design —
|
||||||
|
# silent-bypass is the class we're moving away from. Workflow
|
||||||
|
# goes red, oncall sees it, operator decides how to recover
|
||||||
|
# (manual dispatch with the right SHA, force-promote, etc.).
|
||||||
|
#
|
||||||
|
# Manual dispatch skips this check — operator override semantics
|
||||||
|
# match the gate-check step above.
|
||||||
|
#
|
||||||
|
# Backward-compat: when current :latest carries no revision
|
||||||
|
# label (legacy image pre-publish-with-label), skip-with-warning.
|
||||||
|
# All :latest images on main are post-label as of 2026-04-29, so
|
||||||
|
# this branch will be dead within 90 days; remove then.
|
||||||
|
if: steps.gate.outputs.proceed == 'true' && github.event_name != 'workflow_dispatch'
|
||||||
|
id: ancestry
|
||||||
|
env:
|
||||||
|
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
REPO: ${{ github.repository }}
|
||||||
|
TARGET_SHA: ${{ steps.sha.outputs.full }}
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# Read the current :latest config and pull the revision label.
|
||||||
|
# `crane config` returns the OCI image config blob (not the manifest);
|
||||||
|
# labels live under `.config.Labels`. `// empty` makes jq return ""
|
||||||
|
# rather than the literal "null" so the test below works.
|
||||||
|
CURRENT_REVISION=$(crane config "${IMAGE_NAME}:latest" 2>/dev/null \
|
||||||
|
| jq -r '.config.Labels["org.opencontainers.image.revision"] // empty' \
|
||||||
|
|| true)
|
||||||
|
|
||||||
|
if [ -z "$CURRENT_REVISION" ]; then
|
||||||
|
echo "decision=skip-no-label" >> "$GITHUB_OUTPUT"
|
||||||
|
{
|
||||||
|
echo "## ⚠ Ancestry check skipped — current :latest has no revision label"
|
||||||
|
echo
|
||||||
|
echo "Likely a legacy image built before \`org.opencontainers.image.revision\` was set."
|
||||||
|
echo "Falling through to retag. After all \`:latest\` images are post-label (TODO 90 days), this branch is dead and should be removed."
|
||||||
|
} >> "$GITHUB_STEP_SUMMARY"
|
||||||
|
echo "::warning::Current :latest carries no revision label — skipping ancestry check (legacy image)"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$CURRENT_REVISION" = "$TARGET_SHA" ]; then
|
||||||
|
echo "decision=identical" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "::notice:::latest already at ${TARGET_SHA:0:7} — retag will be a no-op"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Ask GitHub which side of the merge graph TARGET_SHA sits on
|
||||||
|
# relative to CURRENT_REVISION. Returns one of: ahead | identical
|
||||||
|
# | behind | diverged. Network or auth errors collapse to "error"
|
||||||
|
# via the explicit fallback so the case below always matches.
|
||||||
|
STATUS=$(gh api \
|
||||||
|
"repos/${REPO}/compare/${CURRENT_REVISION}...${TARGET_SHA}" \
|
||||||
|
--jq '.status' 2>/dev/null || echo "error")
|
||||||
|
|
||||||
|
echo "ancestry compare ${CURRENT_REVISION:0:7} → ${TARGET_SHA:0:7}: $STATUS"
|
||||||
|
|
||||||
|
case "$STATUS" in
|
||||||
|
ahead)
|
||||||
|
echo "decision=ahead" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "::notice::Target ${TARGET_SHA:0:7} is ahead of current :latest (${CURRENT_REVISION:0:7}) — proceeding with retag"
|
||||||
|
;;
|
||||||
|
identical)
|
||||||
|
echo "decision=identical" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "::notice::Target identical to :latest — retag will be a no-op"
|
||||||
|
;;
|
||||||
|
behind)
|
||||||
|
echo "decision=behind" >> "$GITHUB_OUTPUT"
|
||||||
|
{
|
||||||
|
echo "## ❌ Auto-promote refused — target is BEHIND current :latest"
|
||||||
|
echo
|
||||||
|
echo "| Field | Value |"
|
||||||
|
echo "|---|---|"
|
||||||
|
echo "| Target SHA | \`$TARGET_SHA\` |"
|
||||||
|
echo "| Current :latest revision | \`$CURRENT_REVISION\` |"
|
||||||
|
echo "| GitHub compare status | \`behind\` |"
|
||||||
|
echo
|
||||||
|
echo "This guard catches the workflow_run-completion-order race (#2244):"
|
||||||
|
echo "two rapid main pushes whose E2Es complete out-of-order can otherwise"
|
||||||
|
echo "promote \`:latest\` backwards. \`:latest\` stays on \`${CURRENT_REVISION:0:7}\`."
|
||||||
|
echo
|
||||||
|
echo "**Recovery:** if this is a legitimate revert that should land on \`:latest\`,"
|
||||||
|
echo "manually dispatch this workflow with the target sha as input — the manual-dispatch"
|
||||||
|
echo "path skips the ancestry check (operator override)."
|
||||||
|
} >> "$GITHUB_STEP_SUMMARY"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
diverged)
|
||||||
|
echo "decision=diverged" >> "$GITHUB_OUTPUT"
|
||||||
|
{
|
||||||
|
echo "## ❓ Auto-promote refused — history diverged"
|
||||||
|
echo
|
||||||
|
echo "| Field | Value |"
|
||||||
|
echo "|---|---|"
|
||||||
|
echo "| Target SHA | \`$TARGET_SHA\` |"
|
||||||
|
echo "| Current :latest revision | \`$CURRENT_REVISION\` |"
|
||||||
|
echo "| GitHub compare status | \`diverged\` |"
|
||||||
|
echo
|
||||||
|
echo "Likely cause: force-push rewrote main's history, leaving the previous"
|
||||||
|
echo "\`:latest\` revision orphaned. Needs human review before \`:latest\` advances."
|
||||||
|
} >> "$GITHUB_STEP_SUMMARY"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
error|*)
|
||||||
|
echo "decision=error" >> "$GITHUB_OUTPUT"
|
||||||
|
{
|
||||||
|
echo "## ❌ Auto-promote aborted — ancestry-check API error"
|
||||||
|
echo
|
||||||
|
echo "\`gh api repos/${REPO}/compare/${CURRENT_REVISION}...${TARGET_SHA}\` returned unexpected status: \`$STATUS\`"
|
||||||
|
echo
|
||||||
|
echo "Manual dispatch with the target sha bypasses this check."
|
||||||
|
} >> "$GITHUB_STEP_SUMMARY"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
- name: Retag platform :staging-<sha> → :latest
|
||||||
|
if: steps.gate.outputs.proceed == 'true'
|
||||||
|
run: |
|
||||||
|
crane tag "${IMAGE_NAME}:staging-${{ steps.sha.outputs.short }}" latest
|
||||||
|
|
||||||
|
- name: Retag tenant :staging-<sha> → :latest
|
||||||
|
if: steps.gate.outputs.proceed == 'true'
|
||||||
|
run: |
|
||||||
|
crane tag "${TENANT_IMAGE_NAME}:staging-${{ steps.sha.outputs.short }}" latest
|
||||||
|
|
||||||
|
- name: Summary
|
||||||
|
if: steps.gate.outputs.proceed == 'true'
|
||||||
|
run: |
|
||||||
|
{
|
||||||
|
echo "## :latest promoted to ${{ steps.sha.outputs.short }}"
|
||||||
|
echo
|
||||||
|
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
|
||||||
|
echo "- Trigger: manual dispatch"
|
||||||
|
else
|
||||||
|
echo "- Upstream: \`${{ github.event.workflow_run.name }}\` ([run](${{ github.event.workflow_run.html_url }}))"
|
||||||
|
fi
|
||||||
|
echo "- platform:staging-${{ steps.sha.outputs.short }} → :latest"
|
||||||
|
echo "- platform-tenant:staging-${{ steps.sha.outputs.short }} → :latest"
|
||||||
|
echo
|
||||||
|
echo "Tenant fleet auto-pulls within 5 min via IMAGE_AUTO_REFRESH=true."
|
||||||
|
echo "Force immediate fanout: dispatch redeploy-tenants-on-main.yml."
|
||||||
|
} >> "$GITHUB_STEP_SUMMARY"
|
||||||
434
.github/workflows/auto-promote-staging.yml
vendored
Normal file
434
.github/workflows/auto-promote-staging.yml
vendored
Normal file
@ -0,0 +1,434 @@
|
|||||||
|
name: Auto-promote staging → main
|
||||||
|
|
||||||
|
# Fires after any of the staging-branch quality gates complete. When ALL
|
||||||
|
# required gates are green on the same staging SHA, opens (or re-uses)
|
||||||
|
# a PR `staging → main` and enables auto-merge so the merge queue lands
|
||||||
|
# it. Closes the gap that historically let features sit on staging for
|
||||||
|
# weeks waiting for a bulk promotion PR (see molecule-core#1496 for the
|
||||||
|
# 1172-commit example).
|
||||||
|
#
|
||||||
|
# 2026-04-28 rewrite (PR #142): the previous version did a direct
|
||||||
|
# `git merge --ff-only origin staging && git push origin main`. That
|
||||||
|
# breaks against main's branch-protection ruleset, which requires
|
||||||
|
# status checks "set by the expected GitHub apps" — direct pushes
|
||||||
|
# can't satisfy that condition (only PR merges through the queue can).
|
||||||
|
# The workflow was failing every tick with:
|
||||||
|
# remote: error: GH006: Protected branch update failed for refs/heads/main.
|
||||||
|
# remote: - Required status checks ... were not set by the expected GitHub apps.
|
||||||
|
# Fix: mirror the PR-based pattern from auto-sync-main-to-staging.yml
|
||||||
|
# (the reverse-direction sync, fixed in #2234 for the same reason).
|
||||||
|
# Both directions now use the same merge-queue path that humans use,
|
||||||
|
# no special-case bypass.
|
||||||
|
#
|
||||||
|
# Safety model:
|
||||||
|
# - Runs ONLY on workflow_run events for the staging branch.
|
||||||
|
# - Requires EVERY named gate workflow to have the same head_sha and
|
||||||
|
# all be `conclusion == success`. If any of them is red, skipped,
|
||||||
|
# cancelled, or pending, we abort (stay on the current main).
|
||||||
|
# - The PR base=main head=staging path lets GitHub itself enforce
|
||||||
|
# branch protection. If main has diverged from staging or required
|
||||||
|
# checks aren't satisfied, the merge queue declines the PR — no
|
||||||
|
# need for a manual ff-only ancestry check here.
|
||||||
|
# - Loop safety: the auto-sync-main-to-staging workflow fires when
|
||||||
|
# main lands the auto-promote PR, but its merge into staging is by
|
||||||
|
# GITHUB_TOKEN which doesn't trigger downstream workflow_run events
|
||||||
|
# (GitHub Actions safety). So this workflow doesn't re-fire from
|
||||||
|
# its own promote landing.
|
||||||
|
#
|
||||||
|
# Toggle via repo variable AUTO_PROMOTE_ENABLED (true/unset). When
|
||||||
|
# unset, the workflow logs what it would have done but doesn't open
|
||||||
|
# the PR — useful for dry-running the gate logic without surfacing
|
||||||
|
# a noisy PR while staging CI is still flaky.
|
||||||
|
#
|
||||||
|
# **One-time repo setting (load-bearing):** this workflow opens the
|
||||||
|
# staging→main PR via `gh pr create` using the default GITHUB_TOKEN.
|
||||||
|
# Since GitHub's 2022 default change, that token cannot create or
|
||||||
|
# approve PRs unless the repo opts in. The toggle is at:
|
||||||
|
#
|
||||||
|
# Settings → Actions → General → Workflow permissions
|
||||||
|
# → ✅ Allow GitHub Actions to create and approve pull requests
|
||||||
|
#
|
||||||
|
# Without it, every workflow_run fails with:
|
||||||
|
#
|
||||||
|
# pull request create failed: GraphQL: GitHub Actions is not
|
||||||
|
# permitted to create or approve pull requests (createPullRequest)
|
||||||
|
#
|
||||||
|
# Observed 2026-04-29 01:43 UTC blocking promotion of fcd87b9 (PRs
|
||||||
|
# #2248 + #2249); manually bridged via PR #2252. Re-check this
|
||||||
|
# setting if auto-promote starts failing with createPullRequest
|
||||||
|
# errors after a repo or org admin change.
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_run:
|
||||||
|
workflows:
|
||||||
|
- CI
|
||||||
|
- E2E Staging Canvas (Playwright)
|
||||||
|
- E2E API Smoke Test
|
||||||
|
- CodeQL
|
||||||
|
types: [completed]
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
force:
|
||||||
|
description: "Force promote even when AUTO_PROMOTE_ENABLED is unset (manual override)"
|
||||||
|
required: false
|
||||||
|
default: "false"
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
pull-requests: write
|
||||||
|
# actions: write is needed by the post-merge dispatch tail step
|
||||||
|
# (#2358 / #2357) — `gh workflow run publish-workspace-server-image.yml`
|
||||||
|
# POSTs to /actions/workflows/.../dispatches which requires this scope.
|
||||||
|
# Without it the call 403s and the publish/canary/redeploy chain still
|
||||||
|
# doesn't run on staging→main promotions, undoing #2358.
|
||||||
|
actions: write
|
||||||
|
|
||||||
|
# Serialize auto-promote runs. Multiple staging gate completions can land
|
||||||
|
# in quick succession (CI + E2E + CodeQL all finish within seconds of
|
||||||
|
# each other on a green PR) — without this, two parallel runs both:
|
||||||
|
# 1. Open / re-use the same promote PR.
|
||||||
|
# 2. Both call `gh pr merge --auto` (idempotent — fine).
|
||||||
|
# 3. Both poll for the same mergedAt and both `gh workflow run` publish
|
||||||
|
# → 2× redundant publish builds racing for the same `:staging-latest`
|
||||||
|
# retag, and 2× canary-verify chains.
|
||||||
|
# cancel-in-progress: false because we don't want a brand-new run to kill
|
||||||
|
# a polling-tail that's about to dispatch — the polling tail's 30 min cap
|
||||||
|
# is the right backstop, not workflow-level cancel.
|
||||||
|
concurrency:
|
||||||
|
group: auto-promote-staging
|
||||||
|
cancel-in-progress: false
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
check-all-gates-green:
|
||||||
|
# Only consider staging pushes. PRs into staging don't promote.
|
||||||
|
if: >
|
||||||
|
(github.event_name == 'workflow_run' &&
|
||||||
|
github.event.workflow_run.head_branch == 'staging' &&
|
||||||
|
github.event.workflow_run.event == 'push')
|
||||||
|
|| github.event_name == 'workflow_dispatch'
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
outputs:
|
||||||
|
all_green: ${{ steps.gates.outputs.all_green }}
|
||||||
|
head_sha: ${{ steps.gates.outputs.head_sha }}
|
||||||
|
steps:
|
||||||
|
# Skip empty-tree promotes (the perpetual auto-promote↔auto-sync cycle
|
||||||
|
# observed 2026-05-03). Sequence: auto-promote merges via the staging
|
||||||
|
# merge-queue's MERGE strategy, creating a merge commit on main that
|
||||||
|
# staging doesn't have. auto-sync then merges main back into staging
|
||||||
|
# via another merge commit (the queue's MERGE strategy applies on
|
||||||
|
# the staging side too, even when the workflow's local FF would
|
||||||
|
# have sufficed). Now staging has a new merge-commit SHA whose
|
||||||
|
# tree == main's tree — but auto-promote sees "staging ahead of
|
||||||
|
# main by 1" and opens YET another empty promote PR. Each round
|
||||||
|
# costs ~30-40 min wallclock, ~2 manual approvals, and burns a
|
||||||
|
# full CodeQL Go run (~15 min). Without this guard the cycle
|
||||||
|
# repeats indefinitely.
|
||||||
|
#
|
||||||
|
# Long-term fix is to switch the merge_queue ruleset's
|
||||||
|
# `merge_method` away from MERGE so FF-able PRs land cleanly,
|
||||||
|
# but that's a broader change affecting every staging PR's
|
||||||
|
# commit shape. This guard is the one-line surgical fix that
|
||||||
|
# breaks the cycle without touching merge-queue config.
|
||||||
|
#
|
||||||
|
# Fail-open: if `git diff` errors for any reason, fall through
|
||||||
|
# to the gate check (preserve existing behavior). Only skip
|
||||||
|
# when the diff is DEFINITIVELY empty.
|
||||||
|
- name: Checkout for tree-diff check
|
||||||
|
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
ref: staging
|
||||||
|
- name: Skip if staging tree == main tree (perpetual-cycle break)
|
||||||
|
id: tree-diff
|
||||||
|
env:
|
||||||
|
HEAD_SHA: ${{ github.event.workflow_run.head_sha || github.sha }}
|
||||||
|
run: |
|
||||||
|
set -eu
|
||||||
|
git fetch origin main --depth=50 || { echo "::warning::git fetch main failed — proceeding (fail-open)"; exit 0; }
|
||||||
|
# Compare staging tip's tree against main's tree. `git diff
|
||||||
|
# --quiet` exits 0 if no differences, 1 if there are.
|
||||||
|
if git diff --quiet origin/main "$HEAD_SHA" -- 2>/dev/null; then
|
||||||
|
{
|
||||||
|
echo "## ⏭ Skipped — no code to promote"
|
||||||
|
echo
|
||||||
|
echo "staging tip (\`${HEAD_SHA:0:8}\`) and \`main\` have identical trees."
|
||||||
|
echo "This is the auto-promote↔auto-sync merge-commit cycle: staging has a"
|
||||||
|
echo "new SHA (a sync-back merge commit) but the underlying file tree is"
|
||||||
|
echo "already on main, so there's no real code to ship."
|
||||||
|
echo
|
||||||
|
echo "Skipping to avoid opening an empty promote PR. Cycle terminates here."
|
||||||
|
} >> "$GITHUB_STEP_SUMMARY"
|
||||||
|
echo "::notice::auto-promote: staging tree == main tree — no code to promote, skipping"
|
||||||
|
echo "skip=true" >> "$GITHUB_OUTPUT"
|
||||||
|
else
|
||||||
|
echo "skip=false" >> "$GITHUB_OUTPUT"
|
||||||
|
fi
|
||||||
|
- name: Check all required gates on this SHA
|
||||||
|
if: steps.tree-diff.outputs.skip != 'true'
|
||||||
|
id: gates
|
||||||
|
env:
|
||||||
|
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
HEAD_SHA: ${{ github.event.workflow_run.head_sha || github.sha }}
|
||||||
|
REPO: ${{ github.repository }}
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# Required gate workflow files. Use file paths (relative to
|
||||||
|
# .github/workflows/) rather than display names because:
|
||||||
|
#
|
||||||
|
# 1. `gh run list --workflow=<name>` is ambiguous when two
|
||||||
|
# workflows have the same `name:` — observed 2026-04-28
|
||||||
|
# with "CodeQL" matching both `codeql.yml` (explicit) and
|
||||||
|
# GitHub's UI-configured Code-quality default setup
|
||||||
|
# (internal "codeql"). gh CLI returns "could not resolve
|
||||||
|
# to a unique workflow" → empty result → gate evaluated
|
||||||
|
# as missing/none → auto-promote dead-locked despite all
|
||||||
|
# checks actually passing.
|
||||||
|
#
|
||||||
|
# 2. File paths are the unique identifier for workflows;
|
||||||
|
# `name:` is just a display string and can collide.
|
||||||
|
#
|
||||||
|
# When adding/removing a gate, update this list AND the
|
||||||
|
# branch-protection required-checks list (which uses check-run
|
||||||
|
# display names, not workflow names; the two are decoupled and
|
||||||
|
# should be kept in sync manually).
|
||||||
|
GATES=(
|
||||||
|
"ci.yml"
|
||||||
|
"e2e-staging-canvas.yml"
|
||||||
|
"e2e-api.yml"
|
||||||
|
"codeql.yml"
|
||||||
|
)
|
||||||
|
|
||||||
|
echo "head_sha=${HEAD_SHA}" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "Checking gates on SHA ${HEAD_SHA}"
|
||||||
|
|
||||||
|
ALL_GREEN=true
|
||||||
|
for gate in "${GATES[@]}"; do
|
||||||
|
# Query the most recent run of this workflow on this SHA.
|
||||||
|
# event=push to avoid picking up PR runs. branch=staging to
|
||||||
|
# guard against someone dispatching the gate on a non-staging
|
||||||
|
# branch at the same SHA.
|
||||||
|
RESULT=$(gh run list \
|
||||||
|
--repo "$REPO" \
|
||||||
|
--workflow "$gate" \
|
||||||
|
--branch staging \
|
||||||
|
--event push \
|
||||||
|
--commit "$HEAD_SHA" \
|
||||||
|
--limit 1 \
|
||||||
|
--json status,conclusion \
|
||||||
|
--jq '.[0] | "\(.status)/\(.conclusion // "none")"' \
|
||||||
|
2>/dev/null || echo "missing/none")
|
||||||
|
|
||||||
|
echo " $gate → $RESULT"
|
||||||
|
|
||||||
|
# Only completed/success counts. completed/failure or
|
||||||
|
# in_progress/anything or no record at all = abort.
|
||||||
|
if [ "$RESULT" != "completed/success" ]; then
|
||||||
|
ALL_GREEN=false
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "all_green=${ALL_GREEN}" >> "$GITHUB_OUTPUT"
|
||||||
|
if [ "$ALL_GREEN" != "true" ]; then
|
||||||
|
echo "::notice::auto-promote: not all gates are green on ${HEAD_SHA} — staying on current main"
|
||||||
|
fi
|
||||||
|
|
||||||
|
promote:
|
||||||
|
needs: check-all-gates-green
|
||||||
|
if: needs.check-all-gates-green.outputs.all_green == 'true'
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Check rollout gate
|
||||||
|
env:
|
||||||
|
AUTO_PROMOTE_ENABLED: ${{ vars.AUTO_PROMOTE_ENABLED }}
|
||||||
|
FORCE_INPUT: ${{ github.event.inputs.force }}
|
||||||
|
run: |
|
||||||
|
set -eu
|
||||||
|
# Repo variable AUTO_PROMOTE_ENABLED=true flips this on. While
|
||||||
|
# it's unset, the workflow dry-runs (logs what it would have
|
||||||
|
# done) but doesn't open the promote PR. Set the variable in
|
||||||
|
# Settings → Secrets and variables → Actions → Variables.
|
||||||
|
if [ "${AUTO_PROMOTE_ENABLED:-}" != "true" ] && [ "${FORCE_INPUT:-false}" != "true" ]; then
|
||||||
|
{
|
||||||
|
echo "## ⏸ Auto-promote disabled"
|
||||||
|
echo
|
||||||
|
echo "Repo variable \`AUTO_PROMOTE_ENABLED\` is not set to \`true\`."
|
||||||
|
echo "All gates are green on staging; would have opened a promote PR to \`main\`."
|
||||||
|
echo
|
||||||
|
echo "To enable: Settings → Secrets and variables → Actions → Variables → \`AUTO_PROMOTE_ENABLED=true\`."
|
||||||
|
echo "To test once manually: workflow_dispatch with \`force=true\`."
|
||||||
|
} >> "$GITHUB_STEP_SUMMARY"
|
||||||
|
echo "::notice::auto-promote disabled — dry run only"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Mint the App token BEFORE the promote-PR step so the auto-merge
|
||||||
|
# call can use it. GITHUB_TOKEN-initiated merges suppress the
|
||||||
|
# downstream `push` event on main, breaking the
|
||||||
|
# publish-workspace-server-image → canary-verify → redeploy-tenants
|
||||||
|
# chain (issue #2357). Using the App token here means the
|
||||||
|
# merge-queue-landed merge IS able to fire the cascade naturally;
|
||||||
|
# the polling tail below stays as defense-in-depth.
|
||||||
|
- name: Mint App token for promote-PR + downstream dispatch
|
||||||
|
if: ${{ vars.AUTO_PROMOTE_ENABLED == 'true' || github.event.inputs.force == 'true' }}
|
||||||
|
id: app-token
|
||||||
|
uses: actions/create-github-app-token@1b10c78c7865c340bc4f6099eb2f838309f1e8c3 # v3.1.1
|
||||||
|
with:
|
||||||
|
app-id: ${{ secrets.MOLECULE_AI_APP_ID }}
|
||||||
|
private-key: ${{ secrets.MOLECULE_AI_APP_PRIVATE_KEY }}
|
||||||
|
|
||||||
|
- name: Open (or reuse) staging → main promote PR + enable auto-merge
|
||||||
|
if: ${{ vars.AUTO_PROMOTE_ENABLED == 'true' || github.event.inputs.force == 'true' }}
|
||||||
|
env:
|
||||||
|
GH_TOKEN: ${{ steps.app-token.outputs.token }}
|
||||||
|
REPO: ${{ github.repository }}
|
||||||
|
TARGET_SHA: ${{ needs.check-all-gates-green.outputs.head_sha }}
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# Look for an existing open promote PR (idempotent on re-run
|
||||||
|
# of the workflow). The PR's head IS the staging branch — the
|
||||||
|
# whole point is "advance main to staging's tip", so we don't
|
||||||
|
# need a per-SHA branch like auto-sync-main-to-staging uses.
|
||||||
|
PR_NUM=$(gh pr list --repo "$REPO" \
|
||||||
|
--base main --head staging --state open \
|
||||||
|
--json number --jq '.[0].number // ""')
|
||||||
|
|
||||||
|
if [ -z "$PR_NUM" ]; then
|
||||||
|
TITLE="staging → main: auto-promote ${TARGET_SHA:0:7}"
|
||||||
|
BODY_FILE=$(mktemp)
|
||||||
|
cat > "$BODY_FILE" <<EOFBODY
|
||||||
|
Automated promotion of \`staging\` (\`${TARGET_SHA:0:8}\`) to \`main\`. All required staging gates green at this SHA: CI, E2E Staging Canvas, E2E API Smoke, CodeQL.
|
||||||
|
|
||||||
|
This PR is auto-generated by \`.github/workflows/auto-promote-staging.yml\` whenever every required gate completes green on the same staging SHA. It exists because main's branch protection requires status checks "set by the expected GitHub apps" — direct \`git push\` from a workflow can't satisfy that, only PR merges through the queue can.
|
||||||
|
|
||||||
|
Merge queue lands this; no human action needed unless gates fail. Reverse-direction sync (the merge commit on main → staging) is handled by \`auto-sync-main-to-staging.yml\`.
|
||||||
|
EOFBODY
|
||||||
|
PR_URL=$(gh pr create --repo "$REPO" \
|
||||||
|
--base main --head staging \
|
||||||
|
--title "$TITLE" \
|
||||||
|
--body-file "$BODY_FILE")
|
||||||
|
PR_NUM=$(echo "$PR_URL" | grep -oE '[0-9]+$' | tail -1)
|
||||||
|
rm -f "$BODY_FILE"
|
||||||
|
echo "::notice::Opened PR #${PR_NUM}"
|
||||||
|
else
|
||||||
|
echo "::notice::Re-using existing promote PR #${PR_NUM}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Enable auto-merge — the merge queue picks it up once
|
||||||
|
# required gates are green on the merge_group ref.
|
||||||
|
if ! gh pr merge "$PR_NUM" --repo "$REPO" --auto --merge 2>&1; then
|
||||||
|
echo "::warning::Failed to enable auto-merge on PR #${PR_NUM} — operator may need to merge manually."
|
||||||
|
fi
|
||||||
|
|
||||||
|
{
|
||||||
|
echo "## ✅ Auto-promote PR opened"
|
||||||
|
echo
|
||||||
|
echo "- Source: staging at \`${TARGET_SHA:0:8}\`"
|
||||||
|
echo "- PR: #${PR_NUM}"
|
||||||
|
echo
|
||||||
|
echo "Merge queue lands the PR once required gates are green; no human action needed unless gates fail."
|
||||||
|
} >> "$GITHUB_STEP_SUMMARY"
|
||||||
|
|
||||||
|
# Hand the PR number to the next step so we can dispatch the
|
||||||
|
# tenant-redeploy chain after the merge queue lands the merge.
|
||||||
|
echo "promote_pr_num=${PR_NUM}" >> "$GITHUB_OUTPUT"
|
||||||
|
id: promote_pr
|
||||||
|
|
||||||
|
# The App token minted above (before the promote-PR step) is
|
||||||
|
# also used by the polling tail below. Defense-in-depth: with
|
||||||
|
# the merge-queue-landed merge now using the App token, the
|
||||||
|
# main-branch push event SHOULD fire the publish/canary/redeploy
|
||||||
|
# cascade naturally — but if for any reason it doesn't (e.g. an
|
||||||
|
# unrelated event-suppression edge case), the explicit dispatches
|
||||||
|
# below still wake the chain.
|
||||||
|
- name: Wait for promote merge, then dispatch publish + redeploy (#2357)
|
||||||
|
# Defense-in-depth dispatch. With the auto-merge call above
|
||||||
|
# now using the App token (this commit), the merge-queue-landed
|
||||||
|
# merge SHOULD fire publish-workspace-server-image naturally
|
||||||
|
# via on:push:[main] — App-token-initiated pushes DO trigger
|
||||||
|
# workflow_run cascades, unlike GITHUB_TOKEN-initiated ones
|
||||||
|
# (the documented "no recursion" rule —
|
||||||
|
# https://docs.github.com/en/actions/using-workflows/triggering-a-workflow#triggering-a-workflow-from-a-workflow).
|
||||||
|
#
|
||||||
|
# This explicit dispatch stays as belt-and-suspenders for any
|
||||||
|
# edge case where the natural cascade misfires. If it never
|
||||||
|
# observably fires after this token swap (i.e. the publish
|
||||||
|
# workflow has already started by the time we get here), the
|
||||||
|
# second dispatch is a harmless no-op (publish-workspace-server-image
|
||||||
|
# has its own concurrency group that dedupes).
|
||||||
|
#
|
||||||
|
# See PR for #2357: pre-fix the merge action was via
|
||||||
|
# GITHUB_TOKEN, suppressing the cascade and forcing this tail
|
||||||
|
# to be the SOLE chain trigger. With the auto-merge token swap
|
||||||
|
# the tail becomes redundant in the happy path; keep until
|
||||||
|
# we've observed >=10 successful natural cascades, then drop.
|
||||||
|
if: steps.promote_pr.outputs.promote_pr_num != ''
|
||||||
|
env:
|
||||||
|
GH_TOKEN: ${{ steps.app-token.outputs.token }}
|
||||||
|
REPO: ${{ github.repository }}
|
||||||
|
PR_NUM: ${{ steps.promote_pr.outputs.promote_pr_num }}
|
||||||
|
run: |
|
||||||
|
# Poll for merge — max 30 min (60 × 30s). The merge queue
|
||||||
|
# typically lands within 5-10 min when gates are green. Break
|
||||||
|
# early if the PR is closed without merging (operator action,
|
||||||
|
# gates flipped red post-approval, branch-protection rejection)
|
||||||
|
# so we don't tie up a runner for the full 30 min on a dead PR.
|
||||||
|
MERGED=""
|
||||||
|
STATE=""
|
||||||
|
for _ in $(seq 1 60); do
|
||||||
|
VIEW=$(gh pr view "$PR_NUM" --repo "$REPO" --json mergedAt,state)
|
||||||
|
MERGED=$(echo "$VIEW" | jq -r '.mergedAt // ""')
|
||||||
|
STATE=$(echo "$VIEW" | jq -r '.state // ""')
|
||||||
|
if [ -n "$MERGED" ] && [ "$MERGED" != "null" ]; then
|
||||||
|
echo "::notice::Promote PR #${PR_NUM} merged at ${MERGED}"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
if [ "$STATE" = "CLOSED" ]; then
|
||||||
|
echo "::warning::Promote PR #${PR_NUM} was closed without merging — skipping deploy dispatch."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
sleep 30
|
||||||
|
done
|
||||||
|
|
||||||
|
if [ -z "$MERGED" ] || [ "$MERGED" = "null" ]; then
|
||||||
|
echo "::warning::Promote PR #${PR_NUM} didn't merge within 30min — skipping deploy dispatch (manually run \`gh workflow run publish-workspace-server-image.yml --ref main\` once it lands)."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Dispatch publish on main using the App token. App-initiated
|
||||||
|
# workflow_dispatch DOES propagate the workflow_run cascade,
|
||||||
|
# unlike GITHUB_TOKEN-initiated dispatch.
|
||||||
|
# publish completes → canary-verify chains via workflow_run →
|
||||||
|
# redeploy-tenants-on-main chains via workflow_run + branches:[main].
|
||||||
|
if gh workflow run publish-workspace-server-image.yml \
|
||||||
|
--repo "$REPO" --ref main 2>&1; then
|
||||||
|
echo "::notice::Dispatched publish-workspace-server-image on ref=main as molecule-ai App — canary-verify and redeploy-tenants-on-main will chain via workflow_run."
|
||||||
|
{
|
||||||
|
echo "## 🚀 Tenant redeploy chain dispatched"
|
||||||
|
echo
|
||||||
|
echo "- publish-workspace-server-image (workflow_dispatch on \`main\`, actor: \`molecule-ai[bot]\`)"
|
||||||
|
echo "- canary-verify will chain on completion"
|
||||||
|
echo "- redeploy-tenants-on-main will chain on canary green"
|
||||||
|
} >> "$GITHUB_STEP_SUMMARY"
|
||||||
|
else
|
||||||
|
echo "::error::Failed to dispatch publish-workspace-server-image. Run manually: gh workflow run publish-workspace-server-image.yml --ref main"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# ALSO dispatch auto-sync-main-to-staging.yml. Same root cause as
|
||||||
|
# publish above (issue #2357): the merge-queue-initiated push to
|
||||||
|
# main is by GITHUB_TOKEN → no `on: push` triggers fire downstream.
|
||||||
|
# Without this dispatch, every staging→main promote leaves staging
|
||||||
|
# one merge commit BEHIND main, which silently dead-locks the NEXT
|
||||||
|
# promote PR as `mergeStateStatus: BEHIND` because main's
|
||||||
|
# branch-protection has `strict: true`. Verified empirically on
|
||||||
|
# 2026-05-02 against PR #2442 (Phase 2 promote): only the explicit
|
||||||
|
# publish-workspace-server-image dispatch fired on the previous
|
||||||
|
# promote SHA 76c604fb, while auto-sync silently no-op'd, leaving
|
||||||
|
# staging behind for ~24h until manually bridged.
|
||||||
|
if gh workflow run auto-sync-main-to-staging.yml \
|
||||||
|
--repo "$REPO" --ref main 2>&1; then
|
||||||
|
echo "::notice::Dispatched auto-sync-main-to-staging on ref=main as molecule-ai App — staging will absorb the new main merge commit via PR + merge queue."
|
||||||
|
else
|
||||||
|
echo "::error::Failed to dispatch auto-sync-main-to-staging. Run manually: gh workflow run auto-sync-main-to-staging.yml --ref main"
|
||||||
|
fi
|
||||||
83
.github/workflows/auto-promote-stale-alarm.yml
vendored
Normal file
83
.github/workflows/auto-promote-stale-alarm.yml
vendored
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
name: auto-promote-stale-alarm
|
||||||
|
|
||||||
|
# Hourly cron + on-demand alarm for the silent-block failure mode that
|
||||||
|
# motivated issue #2975:
|
||||||
|
# - The auto-promote-staging.yml workflow opened a PR + armed
|
||||||
|
# auto-merge, but main's branch protection requires a human review
|
||||||
|
# (reviewDecision=REVIEW_REQUIRED). The PR sat BLOCKED with no
|
||||||
|
# surface-up-the-stack for 12+ hours, holding 25 commits hostage
|
||||||
|
# including the Memory v2 redesign and a reno-stars data-loss fix.
|
||||||
|
#
|
||||||
|
# This workflow runs `scripts/check-stale-promote-pr.sh` against the
|
||||||
|
# repo's open auto-promote PRs (base=main head=staging). When a PR has
|
||||||
|
# been BLOCKED on REVIEW_REQUIRED for >4h, it:
|
||||||
|
# 1. Emits a workflow-level warning (visible in run summary + the
|
||||||
|
# Actions UI feed).
|
||||||
|
# 2. Posts a comment on the PR (idempotent — one alarm per PR).
|
||||||
|
#
|
||||||
|
# The detection logic lives in scripts/check-stale-promote-pr.sh so
|
||||||
|
# it's unit-testable with stubbed `gh` (see test-check-stale-promote-pr.sh).
|
||||||
|
# This file is the schedule + invocation surface only — SSOT for the
|
||||||
|
# detector itself.
|
||||||
|
|
||||||
|
on:
|
||||||
|
schedule:
|
||||||
|
# Hourly. Cheap (one `gh pr list` + jq), and 1h granularity is
|
||||||
|
# plenty for a 4h staleness threshold — operators see the alarm
|
||||||
|
# within at most 1h of crossing the threshold.
|
||||||
|
- cron: "27 * * * *" # at :27 to dodge the cron herd at :00
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
stale_hours:
|
||||||
|
description: "Hours after which a BLOCKED+REVIEW_REQUIRED PR is stale (default 4)"
|
||||||
|
required: false
|
||||||
|
default: "4"
|
||||||
|
post_comment:
|
||||||
|
description: "Post a comment on stale PRs (default true)"
|
||||||
|
required: false
|
||||||
|
default: "true"
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
pull-requests: write # post comments on stale PRs
|
||||||
|
|
||||||
|
# Serialize so the on-demand and scheduled runs don't double-comment
|
||||||
|
# the same PR. cancel-in-progress=false because the script is idempotent
|
||||||
|
# (existing comment marker prevents dupes), but a scheduled run firing
|
||||||
|
# while a manual one runs would just re-list the same PR set.
|
||||||
|
concurrency:
|
||||||
|
group: auto-promote-stale-alarm
|
||||||
|
cancel-in-progress: false
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
scan:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout (need scripts/ only)
|
||||||
|
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||||
|
with:
|
||||||
|
sparse-checkout: |
|
||||||
|
scripts/check-stale-promote-pr.sh
|
||||||
|
sparse-checkout-cone-mode: false
|
||||||
|
- name: Run stale-PR detector
|
||||||
|
env:
|
||||||
|
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
GITHUB_REPOSITORY: ${{ github.repository }}
|
||||||
|
STALE_HOURS: ${{ inputs.stale_hours || '4' }}
|
||||||
|
POST_COMMENT: ${{ inputs.post_comment || 'true' }}
|
||||||
|
run: |
|
||||||
|
# The script's exit code reflects the count of stale PRs.
|
||||||
|
# We don't want a stale finding to fail the workflow run —
|
||||||
|
# the warning + comment are the signal, the green/red is
|
||||||
|
# noise. So convert any non-zero exit to a workflow notice
|
||||||
|
# and exit 0.
|
||||||
|
set +e
|
||||||
|
bash scripts/check-stale-promote-pr.sh
|
||||||
|
rc=$?
|
||||||
|
set -e
|
||||||
|
if [ "$rc" -ne 0 ]; then
|
||||||
|
echo "::notice::Stale PR detector found $rc PR(s) needing attention. See warnings above + comments on the PRs."
|
||||||
|
fi
|
||||||
|
# Always succeed — operator-facing surface is the warning,
|
||||||
|
# not the workflow status.
|
||||||
|
exit 0
|
||||||
237
.github/workflows/auto-sync-main-to-staging.yml
vendored
Normal file
237
.github/workflows/auto-sync-main-to-staging.yml
vendored
Normal file
@ -0,0 +1,237 @@
|
|||||||
|
name: Auto-sync main → staging
|
||||||
|
|
||||||
|
# Reflects every push to `main` back onto `staging` so the
|
||||||
|
# staging-as-superset-of-main invariant holds.
|
||||||
|
#
|
||||||
|
# Background:
|
||||||
|
#
|
||||||
|
# `auto-promote-staging.yml` advances main via `git merge --ff-only`
|
||||||
|
# + `git push origin main` — that's a clean fast-forward, no merge
|
||||||
|
# commit. But manual merges of `staging → main` PRs through the
|
||||||
|
# GitHub UI / API create a merge commit on main that staging
|
||||||
|
# doesn't have. The next `staging → main` PR then evaluates as
|
||||||
|
# "BEHIND" because staging is missing that merge commit, requiring
|
||||||
|
# a manual `gh pr update-branch` round-trip.
|
||||||
|
#
|
||||||
|
# This happened twice on 2026-04-28 (PRs #2202, #2205, both manual
|
||||||
|
# bridges). Each time the bridge needed update-branch + a re-CI
|
||||||
|
# round before merging. Operationally annoying and avoidable.
|
||||||
|
#
|
||||||
|
# Architecture:
|
||||||
|
#
|
||||||
|
# This repo's `staging` branch is protected by a `merge_queue`
|
||||||
|
# ruleset (id 15500102) that blocks ALL direct pushes — no bypass
|
||||||
|
# even for org admins or the GitHub Actions integration. Direct
|
||||||
|
# `git push origin staging` returns GH013. So instead of pushing
|
||||||
|
# directly, this workflow:
|
||||||
|
#
|
||||||
|
# 1. Checks if main is already in staging's ancestry → no-op.
|
||||||
|
# 2. Creates an `auto-sync/main-<sha>` branch from staging.
|
||||||
|
# 3. Tries `git merge --ff-only origin/main` → if staging hasn't
|
||||||
|
# diverged this is a clean ff.
|
||||||
|
# 4. Otherwise `git merge --no-ff origin/main` to absorb main's
|
||||||
|
# tip while keeping staging's history.
|
||||||
|
# 5. Pushes the auto-sync branch.
|
||||||
|
# 6. Opens a PR (base=staging, head=auto-sync/main-<sha>) and
|
||||||
|
# enables auto-merge so the merge queue lands it.
|
||||||
|
#
|
||||||
|
# This mirrors the path human PRs take through staging — same
|
||||||
|
# rules, same gates, no special-case bypass.
|
||||||
|
#
|
||||||
|
# Loop safety:
|
||||||
|
#
|
||||||
|
# `GITHUB_TOKEN`-authored merges (including the merge queue's land
|
||||||
|
# of the auto-sync PR) do NOT trigger downstream workflow runs
|
||||||
|
# (GitHub Actions safety). So when the auto-sync PR lands on
|
||||||
|
# staging, `auto-promote-staging.yml` is NOT triggered by that
|
||||||
|
# push. The next developer push to staging triggers auto-promote
|
||||||
|
# normally. No loop possible.
|
||||||
|
#
|
||||||
|
# Concurrency:
|
||||||
|
#
|
||||||
|
# Two pushes to main in quick succession (e.g., manual UI merge
|
||||||
|
# immediately followed by auto-promote-staging's ff-merge) could
|
||||||
|
# otherwise open two overlapping auto-sync PRs. The concurrency
|
||||||
|
# group serializes runs; the second waits for the first to exit.
|
||||||
|
# (The first run exits after opening + auto-merge-queueing the PR,
|
||||||
|
# not after the merge actually completes — so multiple PRs can be
|
||||||
|
# open simultaneously, but the merge queue handles them serially.)
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
# workflow_dispatch lets:
|
||||||
|
# 1. Operators manually backfill a missed sync (e.g. after a manual
|
||||||
|
# UI merge that the runner missed).
|
||||||
|
# 2. auto-promote-staging.yml's polling tail explicitly invoke us
|
||||||
|
# after the promote PR lands. This is load-bearing: when the
|
||||||
|
# merge queue lands a promote-PR merge, the resulting push to
|
||||||
|
# `main` is "by GITHUB_TOKEN", and per GitHub's no-recursion
|
||||||
|
# rule (https://docs.github.com/en/actions/using-workflows/triggering-a-workflow#triggering-a-workflow-from-a-workflow)
|
||||||
|
# that push event does NOT fire any downstream workflows. The
|
||||||
|
# `on: push` trigger above is silently dead for the very pattern
|
||||||
|
# we exist to handle. Verified empirically 2026-05-02 against
|
||||||
|
# SHA 76c604fb (PR #2437 staging→main): only ONE workflow fired
|
||||||
|
# (publish-workspace-server-image, dispatched explicitly by
|
||||||
|
# auto-promote's polling tail with an App token). Every other
|
||||||
|
# `on: push: branches: [main]` workflow — including this one —
|
||||||
|
# was suppressed. Until the underlying merge call moves to an
|
||||||
|
# App token, an explicit dispatch is the only reliable path.
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
pull-requests: write
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: auto-sync-main-to-staging
|
||||||
|
cancel-in-progress: false
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
sync-staging:
|
||||||
|
# ubuntu-latest matches every other workflow in this repo. The
|
||||||
|
# earlier `[self-hosted, macos, arm64]` was a copy-paste artefact
|
||||||
|
# from the molecule-controlplane repo (which IS private and uses a
|
||||||
|
# Mac runner) — molecule-core has no Mac runner registered, so the
|
||||||
|
# job sat unassigned whenever the trigger fired. Verified 2026-05-02:
|
||||||
|
# this is the ONLY workflow in molecule-core/.github/workflows/ with
|
||||||
|
# a non-ubuntu runs-on.
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout staging
|
||||||
|
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
ref: staging
|
||||||
|
token: ${{ secrets.AUTO_SYNC_TOKEN }}
|
||||||
|
|
||||||
|
- name: Configure git author
|
||||||
|
run: |
|
||||||
|
git config user.name "github-actions[bot]"
|
||||||
|
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
||||||
|
|
||||||
|
- name: Check if staging already contains main
|
||||||
|
id: check
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
git fetch origin main
|
||||||
|
if git merge-base --is-ancestor origin/main HEAD; then
|
||||||
|
echo "needs_sync=false" >> "$GITHUB_OUTPUT"
|
||||||
|
{
|
||||||
|
echo "## ✅ No-op"
|
||||||
|
echo
|
||||||
|
echo "staging already contains \`origin/main\` ($(git rev-parse --short=8 origin/main))."
|
||||||
|
} >> "$GITHUB_STEP_SUMMARY"
|
||||||
|
else
|
||||||
|
echo "needs_sync=true" >> "$GITHUB_OUTPUT"
|
||||||
|
MAIN_SHORT=$(git rev-parse --short=8 origin/main)
|
||||||
|
echo "main_short=${MAIN_SHORT}" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "branch=auto-sync/main-${MAIN_SHORT}" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "::notice::staging is missing main's tip (${MAIN_SHORT}) — opening sync PR"
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Create auto-sync branch + merge main
|
||||||
|
if: steps.check.outputs.needs_sync == 'true'
|
||||||
|
id: prep
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
BRANCH="${{ steps.check.outputs.branch }}"
|
||||||
|
|
||||||
|
# If a previous auto-sync run already opened a branch for the
|
||||||
|
# same main sha, prefer reusing it (idempotent behavior on
|
||||||
|
# workflow restart). Force-update from latest staging anyway
|
||||||
|
# so it absorbs any staging-side commits that landed since.
|
||||||
|
git checkout -B "$BRANCH"
|
||||||
|
|
||||||
|
if git merge --ff-only origin/main; then
|
||||||
|
echo "did_ff=true" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "::notice::Fast-forwarded ${BRANCH} to origin/main"
|
||||||
|
else
|
||||||
|
echo "did_ff=false" >> "$GITHUB_OUTPUT"
|
||||||
|
if ! git merge --no-ff origin/main -m "chore: sync main → staging (auto)"; then
|
||||||
|
# Hygiene: leave the work tree clean before failing.
|
||||||
|
git merge --abort || true
|
||||||
|
{
|
||||||
|
echo "## ❌ Conflict"
|
||||||
|
echo
|
||||||
|
echo "Auto-merge \`main → staging\` failed with conflicts."
|
||||||
|
echo "A human needs to resolve manually."
|
||||||
|
} >> "$GITHUB_STEP_SUMMARY"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Push auto-sync branch
|
||||||
|
if: steps.check.outputs.needs_sync == 'true'
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
# Force-with-lease so a concurrent auto-sync run can't
|
||||||
|
# silently clobber an in-flight branch we just updated. If a
|
||||||
|
# different writer touched the branch, we abort and the next
|
||||||
|
# run picks up the latest state.
|
||||||
|
git push --force-with-lease origin "${{ steps.check.outputs.branch }}"
|
||||||
|
|
||||||
|
- name: Open auto-sync PR + enable auto-merge
|
||||||
|
if: steps.check.outputs.needs_sync == 'true'
|
||||||
|
env:
|
||||||
|
GH_TOKEN: ${{ secrets.AUTO_SYNC_TOKEN }}
|
||||||
|
BRANCH: ${{ steps.check.outputs.branch }}
|
||||||
|
MAIN_SHORT: ${{ steps.check.outputs.main_short }}
|
||||||
|
DID_FF: ${{ steps.prep.outputs.did_ff }}
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# Find existing PR for this branch (idempotent on workflow
|
||||||
|
# restart) before creating a new one.
|
||||||
|
PR_NUM=$(gh pr list --head "$BRANCH" --base staging --state open --json number --jq '.[0].number // ""')
|
||||||
|
|
||||||
|
if [ -z "$PR_NUM" ]; then
|
||||||
|
# Body lives in a temp file to keep the multi-line content
|
||||||
|
# out of the YAML block scalar (un-indented newlines inside
|
||||||
|
# an inline shell string break YAML parsing).
|
||||||
|
BODY_FILE=$(mktemp)
|
||||||
|
if [ "$DID_FF" = "true" ]; then
|
||||||
|
TITLE="chore: sync main → staging (auto, ff to ${MAIN_SHORT})"
|
||||||
|
cat > "$BODY_FILE" <<EOFBODY
|
||||||
|
Automated fast-forward of \`staging\` to \`origin/main\` (\`${MAIN_SHORT}\`). Staging has no in-flight commits that diverge from main. Merge queue lands this; no human action needed.
|
||||||
|
|
||||||
|
This PR is auto-generated by \`.github/workflows/auto-sync-main-to-staging.yml\` on every push to \`main\`. It exists because this repo's \`staging\` branch has a \`merge_queue\` ruleset that blocks direct pushes — even from the GitHub Actions integration.
|
||||||
|
EOFBODY
|
||||||
|
else
|
||||||
|
TITLE="chore: sync main → staging (auto, merge ${MAIN_SHORT})"
|
||||||
|
cat > "$BODY_FILE" <<EOFBODY
|
||||||
|
Automated merge of \`origin/main\` (\`${MAIN_SHORT}\`) into \`staging\`. Staging has commits main doesn't, so this is a non-ff merge that absorbs main's tip. Merge queue lands this.
|
||||||
|
|
||||||
|
This PR is auto-generated by \`.github/workflows/auto-sync-main-to-staging.yml\` on every push to \`main\`.
|
||||||
|
EOFBODY
|
||||||
|
fi
|
||||||
|
|
||||||
|
# gh pr create prints the URL on stdout; extract the PR number.
|
||||||
|
PR_URL=$(gh pr create \
|
||||||
|
--base staging \
|
||||||
|
--head "$BRANCH" \
|
||||||
|
--title "$TITLE" \
|
||||||
|
--body-file "$BODY_FILE")
|
||||||
|
PR_NUM=$(echo "$PR_URL" | grep -oE '[0-9]+$' | tail -1)
|
||||||
|
rm -f "$BODY_FILE"
|
||||||
|
echo "::notice::Opened PR #${PR_NUM}"
|
||||||
|
else
|
||||||
|
echo "::notice::Re-using existing PR #${PR_NUM} for ${BRANCH}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Enable auto-merge — the merge queue picks it up once
|
||||||
|
# required gates are green. Use --merge for merge commits
|
||||||
|
# (matches the rest of this repo's PR convention).
|
||||||
|
if ! gh pr merge "$PR_NUM" --auto --merge 2>&1; then
|
||||||
|
echo "::warning::Failed to enable auto-merge on PR #${PR_NUM} — operator may need to merge manually."
|
||||||
|
fi
|
||||||
|
|
||||||
|
{
|
||||||
|
echo "## ✅ Auto-sync PR opened"
|
||||||
|
echo
|
||||||
|
echo "- Branch: \`$BRANCH\`"
|
||||||
|
echo "- PR: #$PR_NUM"
|
||||||
|
echo "- Strategy: $([ "$DID_FF" = "true" ] && echo "ff" || echo "merge commit")"
|
||||||
|
echo
|
||||||
|
echo "Merge queue lands the PR once required gates are green; no human action needed unless gates fail."
|
||||||
|
} >> "$GITHUB_STEP_SUMMARY"
|
||||||
37
.github/workflows/auto-tag-runtime.yml
vendored
37
.github/workflows/auto-tag-runtime.yml
vendored
@ -57,42 +57,17 @@ jobs:
|
|||||||
id: bump
|
id: bump
|
||||||
if: steps.skip.outputs.skip != 'true'
|
if: steps.skip.outputs.skip != 'true'
|
||||||
env:
|
env:
|
||||||
# Gitea-shape token (act_runner forwards GITHUB_TOKEN as a
|
GH_TOKEN: ${{ github.token }}
|
||||||
# short-lived per-run secret with read access to this repo).
|
|
||||||
# We hit `/api/v1/repos/.../pulls?state=closed` directly
|
|
||||||
# because `gh pr list` calls Gitea's GraphQL endpoint, which
|
|
||||||
# returns HTTP 405 (issue #75 / post-#66 sweep).
|
|
||||||
GITEA_TOKEN: ${{ github.token }}
|
|
||||||
REPO: ${{ github.repository }}
|
|
||||||
GITEA_API_URL: ${{ github.server_url }}/api/v1
|
|
||||||
PUSH_SHA: ${{ github.sha }}
|
|
||||||
run: |
|
run: |
|
||||||
# Find the merged PR whose merge_commit_sha matches this push.
|
# The merged PR for this push commit. `gh pr list --search` finds
|
||||||
# Gitea's `/repos/{owner}/{repo}/pulls?state=closed` returns
|
# closed PRs whose merge commit matches; we take the first.
|
||||||
# PRs sorted newest-first; we paginate up to 50 and jq-filter
|
PR=$(gh pr list --state merged --search "${{ github.sha }}" --json number,labels --jq '.[0]' 2>/dev/null || echo "")
|
||||||
# on `merge_commit_sha == PUSH_SHA`. Bounded — auto-tag fires
|
|
||||||
# per push to main, so the matching PR is always among the
|
|
||||||
# most recent closures. 50 is comfortably more than the
|
|
||||||
# ~10-20 staging→main promotes that close in any reasonable
|
|
||||||
# window.
|
|
||||||
set -euo pipefail
|
|
||||||
PRS_JSON=$(curl --fail-with-body -sS \
|
|
||||||
-H "Authorization: token ${GITEA_TOKEN}" \
|
|
||||||
-H "Accept: application/json" \
|
|
||||||
"${GITEA_API_URL}/repos/${REPO}/pulls?state=closed&sort=newest&limit=50" \
|
|
||||||
2>/dev/null || echo "[]")
|
|
||||||
PR=$(printf '%s' "$PRS_JSON" \
|
|
||||||
| jq -c --arg sha "$PUSH_SHA" \
|
|
||||||
'[.[] | select(.merged_at != null and .merge_commit_sha == $sha)] | .[0] // empty')
|
|
||||||
if [ -z "$PR" ] || [ "$PR" = "null" ]; then
|
if [ -z "$PR" ] || [ "$PR" = "null" ]; then
|
||||||
echo "No merged PR found for ${PUSH_SHA} — defaulting to patch bump."
|
echo "No merged PR found for ${{ github.sha }} — defaulting to patch bump."
|
||||||
echo "kind=patch" >> "$GITHUB_OUTPUT"
|
echo "kind=patch" >> "$GITHUB_OUTPUT"
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
# Gitea returns labels under `.labels[].name`, same shape as
|
LABELS=$(echo "$PR" | jq -r '.labels[].name')
|
||||||
# GitHub's REST. The previous `gh pr list --json number,labels`
|
|
||||||
# output was identical; jq filter unchanged.
|
|
||||||
LABELS=$(printf '%s' "$PR" | jq -r '.labels[]?.name // empty')
|
|
||||||
if echo "$LABELS" | grep -qx 'release:major'; then
|
if echo "$LABELS" | grep -qx 'release:major'; then
|
||||||
echo "kind=major" >> "$GITHUB_OUTPUT"
|
echo "kind=major" >> "$GITHUB_OUTPUT"
|
||||||
elif echo "$LABELS" | grep -qx 'release:minor'; then
|
elif echo "$LABELS" | grep -qx 'release:minor'; then
|
||||||
|
|||||||
4
.github/workflows/block-internal-paths.yml
vendored
4
.github/workflows/block-internal-paths.yml
vendored
@ -1,7 +1,7 @@
|
|||||||
name: Block internal-flavored paths
|
name: Block internal-flavored paths
|
||||||
|
|
||||||
# Hard CI gate. Internal content (positioning, competitive briefs, sales
|
# Hard CI gate. Internal content (positioning, competitive briefs, sales
|
||||||
# playbooks, PMM/press drip, draft campaigns) lives in molecule-ai/internal —
|
# playbooks, PMM/press drip, draft campaigns) lives in Molecule-AI/internal —
|
||||||
# this public monorepo must never re-acquire those paths. CEO directive
|
# this public monorepo must never re-acquire those paths. CEO directive
|
||||||
# 2026-04-23 after a fleet-wide audit found 79 internal files leaked here.
|
# 2026-04-23 after a fleet-wide audit found 79 internal files leaked here.
|
||||||
#
|
#
|
||||||
@ -135,7 +135,7 @@ jobs:
|
|||||||
echo "::error::Forbidden internal-flavored paths detected:"
|
echo "::error::Forbidden internal-flavored paths detected:"
|
||||||
printf "$OFFENDING"
|
printf "$OFFENDING"
|
||||||
echo ""
|
echo ""
|
||||||
echo "These paths belong in molecule-ai/internal, not this public repo."
|
echo "These paths belong in Molecule-AI/internal, not this public repo."
|
||||||
echo "See docs/internal-content-policy.md for canonical locations."
|
echo "See docs/internal-content-policy.md for canonical locations."
|
||||||
echo ""
|
echo ""
|
||||||
echo "If your file is genuinely public-facing (e.g. a blog post"
|
echo "If your file is genuinely public-facing (e.g. a blog post"
|
||||||
|
|||||||
30
.github/workflows/branch-protection-drift.yml
vendored
30
.github/workflows/branch-protection-drift.yml
vendored
@ -19,7 +19,6 @@ on:
|
|||||||
branches: [staging, main]
|
branches: [staging, main]
|
||||||
paths:
|
paths:
|
||||||
- 'tools/branch-protection/**'
|
- 'tools/branch-protection/**'
|
||||||
- '.github/workflows/**'
|
|
||||||
- '.github/workflows/branch-protection-drift.yml'
|
- '.github/workflows/branch-protection-drift.yml'
|
||||||
|
|
||||||
permissions:
|
permissions:
|
||||||
@ -80,32 +79,3 @@ jobs:
|
|||||||
# Repo-admin scope, needed for /branches/:b/protection.
|
# Repo-admin scope, needed for /branches/:b/protection.
|
||||||
GH_TOKEN: ${{ secrets.GH_TOKEN_FOR_ADMIN_API }}
|
GH_TOKEN: ${{ secrets.GH_TOKEN_FOR_ADMIN_API }}
|
||||||
run: bash tools/branch-protection/drift_check.sh
|
run: bash tools/branch-protection/drift_check.sh
|
||||||
|
|
||||||
# Self-test the parity script before running it on the real
|
|
||||||
# workflows — pins the script's classification logic against
|
|
||||||
# synthetic safe/unsafe/missing/unsafe-mix/matrix fixtures so a
|
|
||||||
# regression in the script can't false-pass on the production
|
|
||||||
# workflow audit. Cheap (~0.5s); always runs.
|
|
||||||
- name: Self-test check-name parity script
|
|
||||||
run: bash tools/branch-protection/test_check_name_parity.sh
|
|
||||||
|
|
||||||
# Check-name parity gate (#144 / saved memory
|
|
||||||
# feedback_branch_protection_check_name_parity).
|
|
||||||
#
|
|
||||||
# drift_check.sh asserts the live branch protection matches what
|
|
||||||
# apply.sh would set; check_name_parity.sh closes the orthogonal
|
|
||||||
# gap: it asserts every required check name in apply.sh maps to a
|
|
||||||
# workflow job whose "always emits this status" shape is intact.
|
|
||||||
#
|
|
||||||
# The two checks fail in different scenarios:
|
|
||||||
#
|
|
||||||
# - drift_check fails → live state was rewritten out-of-band
|
|
||||||
# (UI click, manual PATCH).
|
|
||||||
# - check_name_parity fails → an apply.sh required name has no
|
|
||||||
# emitter, OR the emitting workflow has a top-level paths:
|
|
||||||
# filter without per-step if-gates (the silent-block shape).
|
|
||||||
#
|
|
||||||
# Cheap (~1s); runs without the admin token because it only reads
|
|
||||||
# apply.sh + .github/workflows/ from the checkout.
|
|
||||||
- name: Run check-name parity gate
|
|
||||||
run: bash tools/branch-protection/check_name_parity.sh
|
|
||||||
|
|||||||
82
.github/workflows/canary-staging.yml
vendored
82
.github/workflows/canary-staging.yml
vendored
@ -20,19 +20,6 @@ on:
|
|||||||
# a few minutes under load — that's fine for a canary.
|
# a few minutes under load — that's fine for a canary.
|
||||||
- cron: '*/30 * * * *'
|
- cron: '*/30 * * * *'
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
inputs:
|
|
||||||
keep_on_failure:
|
|
||||||
description: >-
|
|
||||||
Skip teardown when the canary fails (debugging only). The
|
|
||||||
tenant org + EC2 + CF tunnel + DNS stay alive so an operator
|
|
||||||
can SSM into the workspace EC2 and capture docker logs of the
|
|
||||||
failing claude-code container. REMEMBER to manually delete
|
|
||||||
via DELETE /cp/admin/tenants/<slug> when done so the org
|
|
||||||
doesn't accumulate cost. Only honored on workflow_dispatch;
|
|
||||||
cron runs always tear down (we don't want unattended cron
|
|
||||||
to leak resources).
|
|
||||||
type: boolean
|
|
||||||
default: false
|
|
||||||
|
|
||||||
# Serialise with the full-SaaS workflow so they don't contend for the
|
# Serialise with the full-SaaS workflow so they don't contend for the
|
||||||
# same org-create quota on staging. Different group key from
|
# same org-create quota on staging. Different group key from
|
||||||
@ -93,14 +80,6 @@ jobs:
|
|||||||
# is "Token Plan only" but cheap-per-token and fast.
|
# is "Token Plan only" but cheap-per-token and fast.
|
||||||
E2E_MODEL_SLUG: MiniMax-M2.7-highspeed
|
E2E_MODEL_SLUG: MiniMax-M2.7-highspeed
|
||||||
E2E_RUN_ID: "canary-${{ github.run_id }}"
|
E2E_RUN_ID: "canary-${{ github.run_id }}"
|
||||||
# Debug-only: when an operator dispatches with keep_on_failure=true,
|
|
||||||
# the canary script's E2E_KEEP_ORG=1 path skips teardown so the
|
|
||||||
# tenant org + EC2 stay alive for SSM-based log capture. Cron runs
|
|
||||||
# never set this (the input only exists on workflow_dispatch) so
|
|
||||||
# unattended cron always tears down. See molecule-core#129
|
|
||||||
# failure mode #1 — capturing the actual exception requires
|
|
||||||
# docker logs from the live container.
|
|
||||||
E2E_KEEP_ORG: ${{ github.event.inputs.keep_on_failure == 'true' && '1' || '0' }}
|
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||||
@ -158,28 +137,27 @@ jobs:
|
|||||||
id: canary
|
id: canary
|
||||||
run: bash tests/e2e/test_staging_full_saas.sh
|
run: bash tests/e2e/test_staging_full_saas.sh
|
||||||
|
|
||||||
# Alerting: open a sticky issue on the FIRST failure; comment on
|
# Alerting: open an issue only after THREE consecutive failures so
|
||||||
# subsequent failures; auto-close on next green. Comment-on-existing
|
# transient flakes (Cloudflare DNS hiccup, AWS API blip) don't spam
|
||||||
# de-duplicates so a single open issue accumulates the streak —
|
# the issue list. If an issue is already open, we still comment on
|
||||||
# ops sees one issue with N comments rather than N issues.
|
# every failure so ops sees the streak. Auto-close on next green.
|
||||||
#
|
#
|
||||||
# Why no consecutive-failures threshold (e.g., wait 3 runs before
|
# Threshold rationale: canary fires every 30 min, so 3 failures =
|
||||||
# filing): the prior threshold check used
|
# ~90 min of consecutive red — well past any single-run flake but
|
||||||
# `github.rest.actions.listWorkflowRuns()` which Gitea 1.22.6 does
|
# still tight enough that a real outage gets surfaced before the
|
||||||
# not expose (returns 404). On Gitea Actions the threshold call
|
# next deploy window.
|
||||||
# ALWAYS failed, breaking the entire alerting step and going days
|
|
||||||
# silent on real regressions (38h+ chronic red on 2026-05-07/08
|
|
||||||
# before this fix; tracked in molecule-core#129). Filing on first
|
|
||||||
# failure is also better UX — we want to know about the first red,
|
|
||||||
# not wait 90 min for it to "count." Real flakes get one issue +
|
|
||||||
# a quick close-on-green; persistent reds accumulate comments.
|
|
||||||
- name: Open issue on failure
|
- name: Open issue on failure
|
||||||
if: failure()
|
if: failure()
|
||||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
|
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
|
||||||
|
env:
|
||||||
|
# Inject the workflow path explicitly — context.workflow is
|
||||||
|
# the *name*, not the file path the actions API needs.
|
||||||
|
WORKFLOW_PATH: '.github/workflows/canary-staging.yml'
|
||||||
|
CONSECUTIVE_THRESHOLD: '3'
|
||||||
with:
|
with:
|
||||||
script: |
|
script: |
|
||||||
const title = '🔴 Canary failing: staging SaaS smoke';
|
const title = '🔴 Canary failing: staging SaaS smoke';
|
||||||
const runURL = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
|
const runURL = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
|
||||||
|
|
||||||
// Find an existing open canary issue (stable title match).
|
// Find an existing open canary issue (stable title match).
|
||||||
// If one exists, this isn't a "first failure" — comment and exit.
|
// If one exists, this isn't a "first failure" — comment and exit.
|
||||||
@ -199,12 +177,32 @@ jobs:
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// No open issue yet — file one on this first failure. The
|
// No open issue yet — check the last N-1 runs' conclusions.
|
||||||
// comment-on-existing branch above means subsequent failures
|
// We open the issue only if the last (THRESHOLD-1) runs ALSO
|
||||||
// accumulate as comments on this same issue, so we don't
|
// failed (so this is the 3rd consecutive red).
|
||||||
// spam new issues per run.
|
const threshold = parseInt(process.env.CONSECUTIVE_THRESHOLD, 10);
|
||||||
|
const { data: runs } = await github.rest.actions.listWorkflowRuns({
|
||||||
|
owner: context.repo.owner, repo: context.repo.repo,
|
||||||
|
workflow_id: process.env.WORKFLOW_PATH,
|
||||||
|
status: 'completed',
|
||||||
|
per_page: threshold,
|
||||||
|
// Skip the current in-progress run; it isn't 'completed' yet.
|
||||||
|
});
|
||||||
|
// listWorkflowRuns returns recent first. We need (threshold-1)
|
||||||
|
// prior failures (current run is the threshold-th).
|
||||||
|
const priorFailures = (runs.workflow_runs || [])
|
||||||
|
.slice(0, threshold - 1)
|
||||||
|
.filter(r => r.id !== context.runId)
|
||||||
|
.filter(r => r.conclusion === 'failure')
|
||||||
|
.length;
|
||||||
|
if (priorFailures < threshold - 1) {
|
||||||
|
core.info(`Below threshold: ${priorFailures + 1}/${threshold} consecutive failures — not filing yet`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const body =
|
const body =
|
||||||
`Canary run failed at ${new Date().toISOString()}.\n\n` +
|
`Canary run failed at ${new Date().toISOString()}, ` +
|
||||||
|
`${threshold} consecutive runs red.\n\n` +
|
||||||
`Run: ${runURL}\n\n` +
|
`Run: ${runURL}\n\n` +
|
||||||
`This issue auto-closes on the next green canary run. ` +
|
`This issue auto-closes on the next green canary run. ` +
|
||||||
`Consecutive failures add a comment here rather than a new issue.`;
|
`Consecutive failures add a comment here rather than a new issue.`;
|
||||||
@ -213,7 +211,7 @@ jobs:
|
|||||||
title, body,
|
title, body,
|
||||||
labels: ['canary-staging', 'bug'],
|
labels: ['canary-staging', 'bug'],
|
||||||
});
|
});
|
||||||
core.info('Opened canary failure issue (first red)');
|
core.info(`Opened canary failure issue (${threshold} consecutive reds)`);
|
||||||
|
|
||||||
- name: Auto-close canary issue on success
|
- name: Auto-close canary issue on success
|
||||||
if: success()
|
if: success()
|
||||||
|
|||||||
2
.github/workflows/canary-verify.yml
vendored
2
.github/workflows/canary-verify.yml
vendored
@ -108,7 +108,7 @@ jobs:
|
|||||||
echo
|
echo
|
||||||
echo "One or more canary secrets are unset (\`CANARY_TENANT_URLS\`, \`CANARY_ADMIN_TOKENS\`, \`CANARY_CP_SHARED_SECRET\`)."
|
echo "One or more canary secrets are unset (\`CANARY_TENANT_URLS\`, \`CANARY_ADMIN_TOKENS\`, \`CANARY_CP_SHARED_SECRET\`)."
|
||||||
echo "Phase 2 canary fleet has not been stood up yet —"
|
echo "Phase 2 canary fleet has not been stood up yet —"
|
||||||
echo "see [canary-tenants.md](https://git.moleculesai.app/molecule-ai/molecule-controlplane/blob/main/docs/canary-tenants.md)."
|
echo "see [canary-tenants.md](https://github.com/Molecule-AI/molecule-controlplane/blob/main/docs/canary-tenants.md)."
|
||||||
echo
|
echo
|
||||||
echo "**Skipped — promote-to-latest will NOT auto-fire.** Dispatch \`promote-latest.yml\` manually when ready."
|
echo "**Skipped — promote-to-latest will NOT auto-fire.** Dispatch \`promote-latest.yml\` manually when ready."
|
||||||
} >> "$GITHUB_STEP_SUMMARY"
|
} >> "$GITHUB_STEP_SUMMARY"
|
||||||
|
|||||||
18
.github/workflows/ci.yml
vendored
18
.github/workflows/ci.yml
vendored
@ -87,7 +87,7 @@ jobs:
|
|||||||
run: go mod download
|
run: go mod download
|
||||||
- if: needs.changes.outputs.platform == 'true'
|
- if: needs.changes.outputs.platform == 'true'
|
||||||
run: go build ./cmd/server
|
run: go build ./cmd/server
|
||||||
# CLI (molecli) moved to standalone repo: github.com/molecule-ai/molecule-cli
|
# CLI (molecli) moved to standalone repo: github.com/Molecule-AI/molecule-cli
|
||||||
- if: needs.changes.outputs.platform == 'true'
|
- if: needs.changes.outputs.platform == 'true'
|
||||||
run: go vet ./... || true
|
run: go vet ./... || true
|
||||||
- if: needs.changes.outputs.platform == 'true'
|
- if: needs.changes.outputs.platform == 'true'
|
||||||
@ -165,7 +165,7 @@ jobs:
|
|||||||
# Strip the package-import prefix so we can match .coverage-allowlist.txt
|
# Strip the package-import prefix so we can match .coverage-allowlist.txt
|
||||||
# entries written as paths relative to workspace-server/.
|
# entries written as paths relative to workspace-server/.
|
||||||
# Handle both module paths: platform/workspace-server/... and platform/...
|
# Handle both module paths: platform/workspace-server/... and platform/...
|
||||||
rel=$(echo "$file" | sed 's|^github.com/molecule-ai/molecule-monorepo/platform/workspace-server/||; s|^github.com/molecule-ai/molecule-monorepo/platform/||')
|
rel=$(echo "$file" | sed 's|^github.com/Molecule-AI/molecule-monorepo/platform/workspace-server/||; s|^github.com/Molecule-AI/molecule-monorepo/platform/||')
|
||||||
|
|
||||||
if echo "$ALLOWLIST" | grep -qxF "$rel"; then
|
if echo "$ALLOWLIST" | grep -qxF "$rel"; then
|
||||||
echo "::warning file=workspace-server/$rel::Critical file at ${pct}% coverage (allowlisted, #1823) — fix before expiry."
|
echo "::warning file=workspace-server/$rel::Critical file at ${pct}% coverage (allowlisted, #1823) — fix before expiry."
|
||||||
@ -235,13 +235,7 @@ jobs:
|
|||||||
run: npx vitest run --coverage
|
run: npx vitest run --coverage
|
||||||
- name: Upload coverage summary as artifact
|
- name: Upload coverage summary as artifact
|
||||||
if: needs.changes.outputs.canvas == 'true' && always()
|
if: needs.changes.outputs.canvas == 'true' && always()
|
||||||
# Pinned to v3 for Gitea act_runner v0.6 compatibility — v4+ uses
|
uses: actions/upload-artifact@v3 # pinned to v3 for Gitea act_runner v0.6 compatibility (internal#46)
|
||||||
# the GHES 3.10+ artifact protocol that Gitea 1.22.x does NOT
|
|
||||||
# implement, surfacing as `GHESNotSupportedError: @actions/artifact
|
|
||||||
# v2.0.0+, upload-artifact@v4+ and download-artifact@v4+ are not
|
|
||||||
# currently supported on GHES`. Drop this pin when Gitea ships
|
|
||||||
# the v4 protocol (tracked: post-Gitea-1.23 followup).
|
|
||||||
uses: actions/upload-artifact@c6a366c94c3e0affe28c06c8df20a878f24da3cf # v3.2.2
|
|
||||||
with:
|
with:
|
||||||
name: canvas-coverage-${{ github.run_id }}
|
name: canvas-coverage-${{ github.run_id }}
|
||||||
path: canvas/coverage/
|
path: canvas/coverage/
|
||||||
@ -249,8 +243,8 @@ jobs:
|
|||||||
if-no-files-found: warn
|
if-no-files-found: warn
|
||||||
|
|
||||||
# MCP Server + SDK removed from CI — now in standalone repos:
|
# MCP Server + SDK removed from CI — now in standalone repos:
|
||||||
# - github.com/molecule-ai/molecule-mcp-server (npm CI)
|
# - github.com/Molecule-AI/molecule-mcp-server (npm CI)
|
||||||
# - github.com/molecule-ai/molecule-sdk-python (PyPI CI)
|
# - github.com/Molecule-AI/molecule-sdk-python (PyPI CI)
|
||||||
|
|
||||||
# e2e-api job moved to .github/workflows/e2e-api.yml (issue #458).
|
# e2e-api job moved to .github/workflows/e2e-api.yml (issue #458).
|
||||||
# It now has workflow-level concurrency (cancel-in-progress: false) so
|
# It now has workflow-level concurrency (cancel-in-progress: false) so
|
||||||
@ -440,5 +434,5 @@ jobs:
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
# SDK + plugin validation moved to standalone repo:
|
# SDK + plugin validation moved to standalone repo:
|
||||||
# github.com/molecule-ai/molecule-sdk-python
|
# github.com/Molecule-AI/molecule-sdk-python
|
||||||
|
|
||||||
|
|||||||
181
.github/workflows/codeql.yml
vendored
181
.github/workflows/codeql.yml
vendored
@ -1,92 +1,36 @@
|
|||||||
name: CodeQL
|
name: CodeQL
|
||||||
|
|
||||||
# Stub workflow — CodeQL Action is structurally incompatible with Gitea
|
# Controls CodeQL scan triggers for this repo.
|
||||||
# Actions (post-2026-05-06 SCM migration off GitHub).
|
|
||||||
#
|
#
|
||||||
# Why this is a stub, not a real CodeQL run:
|
# GitHub's "Code quality" default setup (the UI-configured one) is
|
||||||
|
# hardcoded to only scan the default branch — on this repo that's
|
||||||
|
# `staging`, so PRs promoting staging→main would otherwise never be
|
||||||
|
# scanned. This workflow fills that gap by explicitly scanning both
|
||||||
|
# branches on push and PR.
|
||||||
#
|
#
|
||||||
# 1. github/codeql-action/init@v4 hits api.github.com endpoints
|
# Runs on ubuntu-latest (GHA-hosted — public repo, free). GHAS is NOT
|
||||||
# (CodeQL CLI bundle download + query-pack registry + telemetry)
|
# enabled on this repo, so results are not uploaded to the Security
|
||||||
# that Gitea 1.22.x does NOT proxy. The act_runner has
|
# tab — the scan fails the PR check on findings, and the SARIF is
|
||||||
# GITHUB_SERVER_URL=https://git.moleculesai.app correctly set
|
# kept as a workflow artifact for triage.
|
||||||
# (per saved memory feedback_act_runner_github_server_url and
|
|
||||||
# /config.yaml on the operator host), but the Gitea API surface
|
|
||||||
# simply does not implement the codeql-action bundle endpoints.
|
|
||||||
# Observed in run 1d/3101 (2026-05-07): "::error::404 page not
|
|
||||||
# found" inside the Initialize CodeQL step, before any analysis.
|
|
||||||
#
|
|
||||||
# 2. PR #35 attempted to mark `continue-on-error: true` at the JOB
|
|
||||||
# level (correct YAML structure). Gitea 1.22.6 does NOT propagate
|
|
||||||
# job-level continue-on-error to the commit-status API — every
|
|
||||||
# matrix leg still posts `failure` to the status surface, which
|
|
||||||
# keeps OVERALL=failure on every push to main + staging and
|
|
||||||
# blocks visual auto-promote signals (#156).
|
|
||||||
#
|
|
||||||
# 3. Hongming policy decision (2026-05-07, task #156): CodeQL is
|
|
||||||
# ADVISORY, not blocking, on Gitea Actions. We do not block PR
|
|
||||||
# merge or staging→main promotion on CodeQL findings until we
|
|
||||||
# have a Gitea-compatible static-analysis pipeline.
|
|
||||||
#
|
|
||||||
# What this stub preserves:
|
|
||||||
#
|
|
||||||
# - Workflow name `CodeQL` (referenced by auto-promote-staging.yml
|
|
||||||
# line 67 as a workflow_run gate — must stay stable).
|
|
||||||
# - Job name template `Analyze (${{ matrix.language }})` and the
|
|
||||||
# 3-leg matrix (go, javascript-typescript, python). Branch
|
|
||||||
# protection / required-check parity (#144) keys on these
|
|
||||||
# exact context names.
|
|
||||||
# - merge_group + push + pull_request + schedule triggers, so the
|
|
||||||
# merge-queue check name still resolves (per saved memory
|
|
||||||
# feedback_branch_protection_check_name_parity).
|
|
||||||
#
|
|
||||||
# Re-enabling real analysis (future work):
|
|
||||||
#
|
|
||||||
# - Option A: self-hosted Semgrep / OpenGrep via a custom action
|
|
||||||
# that doesn't hit api.github.com. Tracked behind #156 follow-up.
|
|
||||||
# - Option B: Sonatype Nexus IQ or similar, called from a step
|
|
||||||
# that uses the Gitea-issued token only.
|
|
||||||
# - Option C: re-host this workflow on a small GitHub mirror used
|
|
||||||
# ONLY for SAST (push-mirrored from Gitea). Acceptable trade-off
|
|
||||||
# if/when payment is restored on a non-suspended GitHub org —
|
|
||||||
# but per saved memory feedback_no_single_source_of_truth, we
|
|
||||||
# should design for multi-vendor backup, not GitHub-only SAST.
|
|
||||||
#
|
|
||||||
# Until one of those lands, this stub keeps commit-status green so
|
|
||||||
# the auto-promote chain isn't permanently red on a tool we cannot
|
|
||||||
# actually run.
|
|
||||||
#
|
|
||||||
# Security policy: ADVISORY. We accept the residual risk of un-scanned
|
|
||||||
# pushes during this window. Compensating controls in place:
|
|
||||||
# - secret-scan.yml runs on every push (active, blocks on hits)
|
|
||||||
# - block-internal-paths.yml blocks forbidden file paths
|
|
||||||
# - lint-curl-status-capture.yml catches one specific class of bug
|
|
||||||
# - branch-protection-drift.yml + the merge_group required-checks
|
|
||||||
# parity keep the gate surface stable
|
|
||||||
# These are not equivalent to CodeQL coverage. Status of the
|
|
||||||
# replacement plan is tracked in #156.
|
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches: [main, staging]
|
branches: [main, staging]
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [main, staging]
|
branches: [main, staging]
|
||||||
# Required so the matrix legs emit a real result on the queued
|
# GitHub merge queue fires `merge_group` for the queue's pre-merge CI run.
|
||||||
# commit instead of a false-green when merge queue is enabled.
|
# Required so CodeQL Analyze checks get a real result on the queued
|
||||||
# Per saved memory feedback_branch_protection_check_name_parity:
|
# commit instead of a false-green. Event only fires once merge queue is
|
||||||
# path-filtered / matrix workflows MUST emit the protected name
|
# enabled on the target branch — safe to add unconditionally.
|
||||||
# via a job that always runs.
|
|
||||||
merge_group:
|
merge_group:
|
||||||
types: [checks_requested]
|
types: [checks_requested]
|
||||||
schedule:
|
schedule:
|
||||||
# Weekly heartbeat. Cheap on a stub (the no-op job is ~5s) but
|
# Weekly run picks up findings in code that hasn't been touched.
|
||||||
# keeps the workflow visible in Gitea's Actions UI so the next
|
|
||||||
# operator notices it's a stub instead of a missing surface.
|
|
||||||
- cron: '30 1 * * 0'
|
- cron: '30 1 * * 0'
|
||||||
|
|
||||||
# Workflow-level concurrency: only one stub run per branch/PR at a
|
# Workflow-level concurrency: only one CodeQL run per branch/PR at a time.
|
||||||
# time. cancel-in-progress: false because a quick follow-up push
|
# `cancel-in-progress: false` queues new runs so a quick follow-up push
|
||||||
# shouldn't kill an in-flight run — even though the stub is fast,
|
# doesn't nuke a 45-min analysis mid-flight.
|
||||||
# the contract should match a real CodeQL run for when we re-enable.
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: codeql-${{ github.ref }}
|
group: codeql-${{ github.ref }}
|
||||||
cancel-in-progress: false
|
cancel-in-progress: false
|
||||||
@ -94,17 +38,13 @@ concurrency:
|
|||||||
permissions:
|
permissions:
|
||||||
actions: read
|
actions: read
|
||||||
contents: read
|
contents: read
|
||||||
# No security-events: write — we don't call the upload API anyway,
|
# No security-events: write — we don't call the upload API.
|
||||||
# GHAS isn't on Gitea.
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
analyze:
|
analyze:
|
||||||
# Job NAME shape is load-bearing — auto-promote-staging.yml +
|
|
||||||
# branch protection both key on `Analyze (${{ matrix.language }})`.
|
|
||||||
# Do NOT rename without coordinating both surfaces.
|
|
||||||
name: Analyze (${{ matrix.language }})
|
name: Analyze (${{ matrix.language }})
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
timeout-minutes: 5
|
timeout-minutes: 45
|
||||||
|
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
@ -112,25 +52,68 @@ jobs:
|
|||||||
language: [go, javascript-typescript, python]
|
language: [go, javascript-typescript, python]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
# Single-step stub: log the policy decision + emit success.
|
- name: Checkout
|
||||||
# Exit 0 explicitly so the commit-status API records `success`
|
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||||
# for each of the three matrix legs.
|
|
||||||
- name: CodeQL stub (advisory, non-blocking on Gitea)
|
# github-app-auth sibling-checkout removed 2026-05-07 (#157):
|
||||||
|
# plugin was dropped + the Dockerfile no longer needs it.
|
||||||
|
# jq is pre-installed on ubuntu-latest — no setup step needed.
|
||||||
|
|
||||||
|
- name: Initialize CodeQL
|
||||||
|
uses: github/codeql-action/init@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
|
||||||
|
with:
|
||||||
|
languages: ${{ matrix.language }}
|
||||||
|
# security-extended widens past the default to include the
|
||||||
|
# full security-query set for a public SaaS surface.
|
||||||
|
queries: security-extended
|
||||||
|
|
||||||
|
- name: Autobuild
|
||||||
|
uses: github/codeql-action/autobuild@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
|
||||||
|
|
||||||
|
- name: Perform CodeQL Analysis
|
||||||
|
id: analyze
|
||||||
|
uses: github/codeql-action/analyze@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
|
||||||
|
with:
|
||||||
|
category: "/language:${{ matrix.language }}"
|
||||||
|
# upload: never — GHAS isn't enabled on this repo, so the
|
||||||
|
# upload API 403s. Write SARIF locally instead.
|
||||||
|
upload: never
|
||||||
|
output: sarif-results/${{ matrix.language }}
|
||||||
|
|
||||||
|
- name: Parse SARIF + fail on findings
|
||||||
|
# The analyze step writes <database>.sarif into the output
|
||||||
|
# directory — database name is the short CodeQL lang id, not
|
||||||
|
# the matrix value (e.g. "javascript-typescript" →
|
||||||
|
# javascript.sarif), so glob rather than hardcode.
|
||||||
|
# Filter to error/warning severity: security-extended emits
|
||||||
|
# "note" rows for informational findings we don't want to fail
|
||||||
|
# the build over.
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
cat <<EOF
|
dir="sarif-results/${{ matrix.language }}"
|
||||||
CodeQL is currently ADVISORY on Gitea Actions (post-2026-05-06).
|
sarif=$(ls "$dir"/*.sarif 2>/dev/null | head -1 || true)
|
||||||
Language matrix leg: ${{ matrix.language }}
|
if [ -z "$sarif" ] || [ ! -f "$sarif" ]; then
|
||||||
Reason: github/codeql-action/init@v4 calls api.github.com
|
echo "::error::No SARIF file found under $dir"
|
||||||
bundle endpoints that Gitea 1.22.x does not implement.
|
ls -la "$dir" 2>/dev/null || true
|
||||||
Observed: "::error::404 page not found" in the Init
|
exit 1
|
||||||
CodeQL step on every prior run.
|
fi
|
||||||
Policy: per Hongming decision 2026-05-07 (#156), CodeQL is
|
echo "Parsing $sarif"
|
||||||
non-blocking until a Gitea-compatible SAST pipeline
|
count=$(jq '[.runs[].results[] | select(.level == "error" or .level == "warning")] | length' "$sarif")
|
||||||
lands. See workflow file header for replacement
|
echo "CodeQL findings (error+warning) for ${{ matrix.language }}: $count"
|
||||||
options + compensating controls.
|
if [ "$count" -gt 0 ]; then
|
||||||
Status: emitting success so auto-promote isn't permanently
|
echo "::error::CodeQL found $count issues. Details below; full SARIF in the artifact."
|
||||||
red on a tool we cannot actually run today.
|
jq -r '.runs[].results[] | select(.level == "error" or .level == "warning") | " - [\(.level)] \(.ruleId // "?"): \(.message.text // "(no message)") @ \(.locations[0].physicalLocation.artifactLocation.uri // "?"):\(.locations[0].physicalLocation.region.startLine // "?")"' "$sarif"
|
||||||
EOF
|
exit 1
|
||||||
echo "::notice::CodeQL ${{ matrix.language }} — advisory stub, success."
|
fi
|
||||||
|
|
||||||
|
- name: Upload SARIF artifact
|
||||||
|
# Keep SARIF around on success + failure so triagers can diff.
|
||||||
|
# 14-day retention — longer than default 3, short enough not
|
||||||
|
# to bloat quota.
|
||||||
|
if: always()
|
||||||
|
uses: actions/upload-artifact@v3 # pinned to v3 for Gitea act_runner v0.6 compatibility (internal#46)
|
||||||
|
with:
|
||||||
|
name: codeql-sarif-${{ matrix.language }}
|
||||||
|
path: sarif-results/${{ matrix.language }}/
|
||||||
|
retention-days: 14
|
||||||
|
|||||||
130
.github/workflows/e2e-api.yml
vendored
130
.github/workflows/e2e-api.yml
vendored
@ -12,59 +12,6 @@ name: E2E API Smoke Test
|
|||||||
# spending CI cycles. See the in-job comment on the `e2e-api` job for
|
# spending CI cycles. See the in-job comment on the `e2e-api` job for
|
||||||
# why this is one job (not two-jobs-sharing-name) and the 2026-04-29
|
# why this is one job (not two-jobs-sharing-name) and the 2026-04-29
|
||||||
# PR #2264 incident that drove the consolidation.
|
# PR #2264 incident that drove the consolidation.
|
||||||
#
|
|
||||||
# Parallel-safety (Class B Hongming-owned CICD red sweep, 2026-05-08)
|
|
||||||
# -------------------------------------------------------------------
|
|
||||||
# Same substrate hazard as PR #98 (handlers-postgres-integration). Our
|
|
||||||
# Gitea act_runner runs with `container.network: host` (operator host
|
|
||||||
# `/opt/molecule/runners/config.yaml`), which means:
|
|
||||||
#
|
|
||||||
# * Two concurrent runs both try to bind their `-p 15432:5432` /
|
|
||||||
# `-p 16379:6379` host ports — the second postgres/redis FATALs
|
|
||||||
# with `Address in use` and `docker run` returns exit 125 with
|
|
||||||
# `Conflict. The container name "/molecule-ci-postgres" is already
|
|
||||||
# in use by container ...`. Verified in run a7/2727 on 2026-05-07.
|
|
||||||
# * The fixed container names `molecule-ci-postgres` / `-redis` (the
|
|
||||||
# pre-fix shape) collide on name AS WELL AS port. The cleanup-with-
|
|
||||||
# `docker rm -f` at the start of the second job KILLS the first
|
|
||||||
# job's still-running postgres/redis.
|
|
||||||
#
|
|
||||||
# Fix shape (mirrors PR #98's bridge-net pattern, adapted because
|
|
||||||
# platform-server is a Go binary on the host, not a containerised
|
|
||||||
# step):
|
|
||||||
#
|
|
||||||
# 1. Unique container names per run:
|
|
||||||
# pg-e2e-api-${RUN_ID}-${RUN_ATTEMPT}
|
|
||||||
# redis-e2e-api-${RUN_ID}-${RUN_ATTEMPT}
|
|
||||||
# `${RUN_ID}-${RUN_ATTEMPT}` is unique even across reruns of the
|
|
||||||
# same run_id.
|
|
||||||
# 2. Ephemeral host port per run (`-p 0:5432`), then read the actual
|
|
||||||
# bound port via `docker port` and export DATABASE_URL/REDIS_URL
|
|
||||||
# pointing at it. No fixed host-port → no port collision.
|
|
||||||
# 3. `127.0.0.1` (NOT `localhost`) in URLs — IPv6 first-resolve was
|
|
||||||
# the original flake fixed in #92 and the script's still IPv6-
|
|
||||||
# enabled.
|
|
||||||
# 4. `if: always()` cleanup so containers don't leak when test steps
|
|
||||||
# fail.
|
|
||||||
#
|
|
||||||
# Issue #94 items #2 + #3 (also fixed here):
|
|
||||||
# * Pre-pull `alpine:latest` so the platform-server's provisioner
|
|
||||||
# (`internal/handlers/container_files.go`) can stand up its
|
|
||||||
# ephemeral token-write helper without a daemon.io round-trip.
|
|
||||||
# * Create `molecule-monorepo-net` bridge network if missing so the
|
|
||||||
# provisioner's container.HostConfig {NetworkMode: ...} attach
|
|
||||||
# succeeds.
|
|
||||||
# Item #1 (timeouts) — evidence on recent runs (77/3191, ae/4270, 0e/
|
|
||||||
# 2318) shows Postgres ready in 3s, Redis in 1s, Platform in 1s when
|
|
||||||
# they DO come up. Timeouts are not the bottleneck; not bumped.
|
|
||||||
#
|
|
||||||
# Item explicitly NOT fixed here: failing test `Status back online`
|
|
||||||
# fails because the platform's langgraph workspace template image
|
|
||||||
# (ghcr.io/molecule-ai/workspace-template-langgraph:latest) returns
|
|
||||||
# 403 Forbidden post-2026-05-06 GitHub org suspension. That is a
|
|
||||||
# template-registry resolution issue (ADR-002 / local-build mode) and
|
|
||||||
# belongs in a separate change that touches workspace-server, not
|
|
||||||
# this workflow file.
|
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
@ -131,14 +78,11 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
timeout-minutes: 15
|
timeout-minutes: 15
|
||||||
env:
|
env:
|
||||||
# Unique per-run container names so concurrent runs on the host-
|
DATABASE_URL: postgres://dev:dev@localhost:15432/molecule?sslmode=disable
|
||||||
# network act_runner don't collide on name OR port.
|
REDIS_URL: redis://localhost:16379
|
||||||
# `${RUN_ID}-${RUN_ATTEMPT}` stays unique across reruns of the
|
|
||||||
# same run_id. PORT is set later (after docker port lookup) since
|
|
||||||
# we let Docker assign an ephemeral host port.
|
|
||||||
PG_CONTAINER: pg-e2e-api-${{ github.run_id }}-${{ github.run_attempt }}
|
|
||||||
REDIS_CONTAINER: redis-e2e-api-${{ github.run_id }}-${{ github.run_attempt }}
|
|
||||||
PORT: "8080"
|
PORT: "8080"
|
||||||
|
PG_CONTAINER: molecule-ci-postgres
|
||||||
|
REDIS_CONTAINER: molecule-ci-redis
|
||||||
steps:
|
steps:
|
||||||
- name: No-op pass (paths filter excluded this commit)
|
- name: No-op pass (paths filter excluded this commit)
|
||||||
if: needs.detect-changes.outputs.api != 'true'
|
if: needs.detect-changes.outputs.api != 'true'
|
||||||
@ -153,53 +97,11 @@ jobs:
|
|||||||
go-version: 'stable'
|
go-version: 'stable'
|
||||||
cache: true
|
cache: true
|
||||||
cache-dependency-path: workspace-server/go.sum
|
cache-dependency-path: workspace-server/go.sum
|
||||||
- name: Pre-pull alpine + ensure provisioner network (Issue #94 items #2 + #3)
|
|
||||||
if: needs.detect-changes.outputs.api == 'true'
|
|
||||||
run: |
|
|
||||||
# Provisioner uses alpine:latest for ephemeral token-write
|
|
||||||
# containers (workspace-server/internal/handlers/container_files.go).
|
|
||||||
# Pre-pull so the first provision in test_api.sh doesn't race
|
|
||||||
# the daemon's pull cache. Idempotent — `docker pull` is a no-op
|
|
||||||
# when the image is already present.
|
|
||||||
docker pull alpine:latest >/dev/null
|
|
||||||
# Provisioner attaches workspace containers to
|
|
||||||
# molecule-monorepo-net (workspace-server/internal/provisioner/
|
|
||||||
# provisioner.go::DefaultNetwork). The bridge already exists on
|
|
||||||
# the operator host's docker daemon — `network create` is
|
|
||||||
# idempotent via `|| true`.
|
|
||||||
docker network create molecule-monorepo-net >/dev/null 2>&1 || true
|
|
||||||
echo "alpine:latest pre-pulled; molecule-monorepo-net ensured."
|
|
||||||
- name: Start Postgres (docker)
|
- name: Start Postgres (docker)
|
||||||
if: needs.detect-changes.outputs.api == 'true'
|
if: needs.detect-changes.outputs.api == 'true'
|
||||||
run: |
|
run: |
|
||||||
# Defensive cleanup — only matches THIS run's container name,
|
|
||||||
# so it cannot kill a sibling run's postgres. (Pre-fix the
|
|
||||||
# name was static and this rm hit other runs' containers.)
|
|
||||||
docker rm -f "$PG_CONTAINER" 2>/dev/null || true
|
docker rm -f "$PG_CONTAINER" 2>/dev/null || true
|
||||||
# `-p 0:5432` requests an ephemeral host port; we read it back
|
docker run -d --name "$PG_CONTAINER" -e POSTGRES_USER=dev -e POSTGRES_PASSWORD=dev -e POSTGRES_DB=molecule -p 15432:5432 postgres:16
|
||||||
# below and export DATABASE_URL.
|
|
||||||
docker run -d --name "$PG_CONTAINER" \
|
|
||||||
-e POSTGRES_USER=dev -e POSTGRES_PASSWORD=dev -e POSTGRES_DB=molecule \
|
|
||||||
-p 0:5432 postgres:16 >/dev/null
|
|
||||||
# Resolve the host-side port assignment. `docker port` prints
|
|
||||||
# `0.0.0.0:NNNN` (and on host-net runners may also print an
|
|
||||||
# IPv6 line — take the first IPv4 line).
|
|
||||||
PG_PORT=$(docker port "$PG_CONTAINER" 5432/tcp | awk -F: '/^0\.0\.0\.0:/ {print $2; exit}')
|
|
||||||
if [ -z "$PG_PORT" ]; then
|
|
||||||
# Fallback: any first line. Some Docker versions print only
|
|
||||||
# one line.
|
|
||||||
PG_PORT=$(docker port "$PG_CONTAINER" 5432/tcp | head -1 | awk -F: '{print $NF}')
|
|
||||||
fi
|
|
||||||
if [ -z "$PG_PORT" ]; then
|
|
||||||
echo "::error::Could not resolve host port for $PG_CONTAINER"
|
|
||||||
docker port "$PG_CONTAINER" 5432/tcp || true
|
|
||||||
docker logs "$PG_CONTAINER" || true
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
# 127.0.0.1 (NOT localhost) — IPv6 first-resolve flake (#92).
|
|
||||||
echo "PG_PORT=${PG_PORT}" >> "$GITHUB_ENV"
|
|
||||||
echo "DATABASE_URL=postgres://dev:dev@127.0.0.1:${PG_PORT}/molecule?sslmode=disable" >> "$GITHUB_ENV"
|
|
||||||
echo "Postgres host port: ${PG_PORT}"
|
|
||||||
for i in $(seq 1 30); do
|
for i in $(seq 1 30); do
|
||||||
if docker exec "$PG_CONTAINER" pg_isready -U dev >/dev/null 2>&1; then
|
if docker exec "$PG_CONTAINER" pg_isready -U dev >/dev/null 2>&1; then
|
||||||
echo "Postgres ready after ${i}s"
|
echo "Postgres ready after ${i}s"
|
||||||
@ -214,20 +116,7 @@ jobs:
|
|||||||
if: needs.detect-changes.outputs.api == 'true'
|
if: needs.detect-changes.outputs.api == 'true'
|
||||||
run: |
|
run: |
|
||||||
docker rm -f "$REDIS_CONTAINER" 2>/dev/null || true
|
docker rm -f "$REDIS_CONTAINER" 2>/dev/null || true
|
||||||
docker run -d --name "$REDIS_CONTAINER" -p 0:6379 redis:7 >/dev/null
|
docker run -d --name "$REDIS_CONTAINER" -p 16379:6379 redis:7
|
||||||
REDIS_PORT=$(docker port "$REDIS_CONTAINER" 6379/tcp | awk -F: '/^0\.0\.0\.0:/ {print $2; exit}')
|
|
||||||
if [ -z "$REDIS_PORT" ]; then
|
|
||||||
REDIS_PORT=$(docker port "$REDIS_CONTAINER" 6379/tcp | head -1 | awk -F: '{print $NF}')
|
|
||||||
fi
|
|
||||||
if [ -z "$REDIS_PORT" ]; then
|
|
||||||
echo "::error::Could not resolve host port for $REDIS_CONTAINER"
|
|
||||||
docker port "$REDIS_CONTAINER" 6379/tcp || true
|
|
||||||
docker logs "$REDIS_CONTAINER" || true
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo "REDIS_PORT=${REDIS_PORT}" >> "$GITHUB_ENV"
|
|
||||||
echo "REDIS_URL=redis://127.0.0.1:${REDIS_PORT}" >> "$GITHUB_ENV"
|
|
||||||
echo "Redis host port: ${REDIS_PORT}"
|
|
||||||
for i in $(seq 1 15); do
|
for i in $(seq 1 15); do
|
||||||
if docker exec "$REDIS_CONTAINER" redis-cli ping 2>/dev/null | grep -q PONG; then
|
if docker exec "$REDIS_CONTAINER" redis-cli ping 2>/dev/null | grep -q PONG; then
|
||||||
echo "Redis ready after ${i}s"
|
echo "Redis ready after ${i}s"
|
||||||
@ -246,15 +135,13 @@ jobs:
|
|||||||
if: needs.detect-changes.outputs.api == 'true'
|
if: needs.detect-changes.outputs.api == 'true'
|
||||||
working-directory: workspace-server
|
working-directory: workspace-server
|
||||||
run: |
|
run: |
|
||||||
# DATABASE_URL + REDIS_URL exported by the start-postgres /
|
|
||||||
# start-redis steps point at this run's per-run host ports.
|
|
||||||
./platform-server > platform.log 2>&1 &
|
./platform-server > platform.log 2>&1 &
|
||||||
echo $! > platform.pid
|
echo $! > platform.pid
|
||||||
- name: Wait for /health
|
- name: Wait for /health
|
||||||
if: needs.detect-changes.outputs.api == 'true'
|
if: needs.detect-changes.outputs.api == 'true'
|
||||||
run: |
|
run: |
|
||||||
for i in $(seq 1 30); do
|
for i in $(seq 1 30); do
|
||||||
if curl -sf http://127.0.0.1:8080/health > /dev/null; then
|
if curl -sf http://localhost:8080/health > /dev/null; then
|
||||||
echo "Platform up after ${i}s"
|
echo "Platform up after ${i}s"
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
@ -298,9 +185,6 @@ jobs:
|
|||||||
kill "$(cat workspace-server/platform.pid)" 2>/dev/null || true
|
kill "$(cat workspace-server/platform.pid)" 2>/dev/null || true
|
||||||
fi
|
fi
|
||||||
- name: Stop service containers
|
- name: Stop service containers
|
||||||
# always() so containers don't leak when test steps fail. The
|
|
||||||
# cleanup is best-effort: if the container is already gone
|
|
||||||
# (e.g. concurrent rerun race), don't fail the job.
|
|
||||||
if: always() && needs.detect-changes.outputs.api == 'true'
|
if: always() && needs.detect-changes.outputs.api == 'true'
|
||||||
run: |
|
run: |
|
||||||
docker rm -f "$PG_CONTAINER" 2>/dev/null || true
|
docker rm -f "$PG_CONTAINER" 2>/dev/null || true
|
||||||
|
|||||||
13
.github/workflows/e2e-staging-canvas.yml
vendored
13
.github/workflows/e2e-staging-canvas.yml
vendored
@ -22,9 +22,9 @@ on:
|
|||||||
# spending CI cycles. See e2e-api.yml for the rationale on why this
|
# spending CI cycles. See e2e-api.yml for the rationale on why this
|
||||||
# is a single job rather than two-jobs-sharing-name.
|
# is a single job rather than two-jobs-sharing-name.
|
||||||
push:
|
push:
|
||||||
branches: [main]
|
branches: [main, staging]
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [main]
|
branches: [main, staging]
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
schedule:
|
schedule:
|
||||||
# Weekly on Sunday 08:00 UTC — catches Chrome / Playwright / Next.js
|
# Weekly on Sunday 08:00 UTC — catches Chrome / Playwright / Next.js
|
||||||
@ -139,11 +139,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Upload Playwright report on failure
|
- name: Upload Playwright report on failure
|
||||||
if: failure() && needs.detect-changes.outputs.canvas == 'true'
|
if: failure() && needs.detect-changes.outputs.canvas == 'true'
|
||||||
# Pinned to v3 for Gitea act_runner v0.6 compatibility — v4+ uses
|
uses: actions/upload-artifact@v3 # pinned to v3 for Gitea act_runner v0.6 compatibility (internal#46)
|
||||||
# the GHES 3.10+ artifact protocol that Gitea 1.22.x does NOT
|
|
||||||
# implement (see ci.yml upload step for the canonical error
|
|
||||||
# cite). Drop this pin when Gitea ships the v4 protocol.
|
|
||||||
uses: actions/upload-artifact@c6a366c94c3e0affe28c06c8df20a878f24da3cf # v3.2.2
|
|
||||||
with:
|
with:
|
||||||
name: playwright-report-staging
|
name: playwright-report-staging
|
||||||
path: canvas/playwright-report-staging/
|
path: canvas/playwright-report-staging/
|
||||||
@ -151,8 +147,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Upload screenshots on failure
|
- name: Upload screenshots on failure
|
||||||
if: failure() && needs.detect-changes.outputs.canvas == 'true'
|
if: failure() && needs.detect-changes.outputs.canvas == 'true'
|
||||||
# Pinned to v3 for Gitea act_runner v0.6 compatibility (see above).
|
uses: actions/upload-artifact@v3 # pinned to v3 for Gitea act_runner v0.6 compatibility (internal#46)
|
||||||
uses: actions/upload-artifact@c6a366c94c3e0affe28c06c8df20a878f24da3cf # v3.2.2
|
|
||||||
with:
|
with:
|
||||||
name: playwright-screenshots
|
name: playwright-screenshots
|
||||||
path: canvas/test-results/
|
path: canvas/test-results/
|
||||||
|
|||||||
4
.github/workflows/e2e-staging-external.yml
vendored
4
.github/workflows/e2e-staging-external.yml
vendored
@ -32,7 +32,7 @@ name: E2E Staging External Runtime
|
|||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches: [main]
|
branches: [staging, main]
|
||||||
paths:
|
paths:
|
||||||
- 'workspace-server/internal/handlers/workspace.go'
|
- 'workspace-server/internal/handlers/workspace.go'
|
||||||
- 'workspace-server/internal/handlers/registry.go'
|
- 'workspace-server/internal/handlers/registry.go'
|
||||||
@ -44,7 +44,7 @@ on:
|
|||||||
- 'tests/e2e/test_staging_external_runtime.sh'
|
- 'tests/e2e/test_staging_external_runtime.sh'
|
||||||
- '.github/workflows/e2e-staging-external.yml'
|
- '.github/workflows/e2e-staging-external.yml'
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [main]
|
branches: [staging, main]
|
||||||
paths:
|
paths:
|
||||||
- 'workspace-server/internal/handlers/workspace.go'
|
- 'workspace-server/internal/handlers/workspace.go'
|
||||||
- 'workspace-server/internal/handlers/registry.go'
|
- 'workspace-server/internal/handlers/registry.go'
|
||||||
|
|||||||
13
.github/workflows/e2e-staging-saas.yml
vendored
13
.github/workflows/e2e-staging-saas.yml
vendored
@ -20,12 +20,13 @@ name: E2E Staging SaaS (full lifecycle)
|
|||||||
# via the same paths watcher that e2e-api.yml uses)
|
# via the same paths watcher that e2e-api.yml uses)
|
||||||
|
|
||||||
on:
|
on:
|
||||||
# Trunk-based (Phase 3 of internal#81): main is the only branch.
|
# Fire on staging push too — previously this only ran on main, which
|
||||||
# Previously this fired on staging push too because staging was a
|
# meant the most thorough end-to-end test caught regressions AFTER
|
||||||
# superset of main and ran the gate ahead of auto-promote; with no
|
# they shipped to staging (and then to the auto-promote PR). Running
|
||||||
# staging branch, main is where E2E gates the deploy.
|
# on staging push catches them BEFORE the staging→main promotion
|
||||||
|
# opens, so a green canary into auto-promote is more meaningful.
|
||||||
push:
|
push:
|
||||||
branches: [main]
|
branches: [staging, main]
|
||||||
paths:
|
paths:
|
||||||
- 'workspace-server/internal/handlers/registry.go'
|
- 'workspace-server/internal/handlers/registry.go'
|
||||||
- 'workspace-server/internal/handlers/workspace_provision.go'
|
- 'workspace-server/internal/handlers/workspace_provision.go'
|
||||||
@ -35,7 +36,7 @@ on:
|
|||||||
- 'tests/e2e/test_staging_full_saas.sh'
|
- 'tests/e2e/test_staging_full_saas.sh'
|
||||||
- '.github/workflows/e2e-staging-saas.yml'
|
- '.github/workflows/e2e-staging-saas.yml'
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [main]
|
branches: [staging, main]
|
||||||
paths:
|
paths:
|
||||||
- 'workspace-server/internal/handlers/registry.go'
|
- 'workspace-server/internal/handlers/registry.go'
|
||||||
- 'workspace-server/internal/handlers/workspace_provision.go'
|
- 'workspace-server/internal/handlers/workspace_provision.go'
|
||||||
|
|||||||
139
.github/workflows/handlers-postgres-integration.yml
vendored
139
.github/workflows/handlers-postgres-integration.yml
vendored
@ -14,42 +14,12 @@ name: Handlers Postgres Integration
|
|||||||
# self-review caught it took 2 minutes to set up and would have caught
|
# self-review caught it took 2 minutes to set up and would have caught
|
||||||
# the bug at PR-time.
|
# the bug at PR-time.
|
||||||
#
|
#
|
||||||
# Why this workflow does NOT use `services: postgres:` (Class B fix)
|
# This job spins a Postgres service container, applies the migration,
|
||||||
# ------------------------------------------------------------------
|
# and runs `go test -tags=integration` against a live DB. Required
|
||||||
# Our act_runner config has `container.network: host` (operator host
|
# check on staging branch protection — backend handler PRs cannot
|
||||||
# /opt/molecule/runners/config.yaml), which act_runner applies to BOTH
|
# merge without a real-DB regression gate.
|
||||||
# the job container AND every service container. With host-net, two
|
|
||||||
# concurrent runs of this workflow both try to bind 0.0.0.0:5432 — the
|
|
||||||
# second postgres FATALs with `could not create any TCP/IP sockets:
|
|
||||||
# Address in use`, and Docker auto-removes it (act_runner sets
|
|
||||||
# AutoRemove:true on service containers). By the time the migrations
|
|
||||||
# step runs `psql`, the postgres container is gone, hence
|
|
||||||
# `Connection refused` then `failed to remove container: No such
|
|
||||||
# container` at cleanup time.
|
|
||||||
#
|
#
|
||||||
# Per-job `container.network` override is silently ignored by
|
# Cost: ~30s job (postgres pull from GH cache + go build + 4 tests).
|
||||||
# act_runner — `--network and --net in the options will be ignored.`
|
|
||||||
# appears in the runner log. Documented constraint.
|
|
||||||
#
|
|
||||||
# So we sidestep `services:` entirely. The job container still uses
|
|
||||||
# host-net (inherited from runner config; required for cache server
|
|
||||||
# discovery on the bridge IP 172.18.0.17:42631). We launch a sibling
|
|
||||||
# postgres on the existing `molecule-monorepo-net` bridge with a
|
|
||||||
# UNIQUE name per run — `pg-handlers-${RUN_ID}-${RUN_ATTEMPT}` — and
|
|
||||||
# read its bridge IP via `docker inspect`. A host-net job container
|
|
||||||
# can reach a bridge-net container directly via the bridge IP (verified
|
|
||||||
# manually on operator host 2026-05-08).
|
|
||||||
#
|
|
||||||
# Trade-offs vs. the original `services:` shape:
|
|
||||||
# + No host-port collision; N parallel runs share the bridge cleanly
|
|
||||||
# + `if: always()` cleanup runs even on test-step failure
|
|
||||||
# - One more step in the workflow (+~3 lines)
|
|
||||||
# - Requires `molecule-monorepo-net` to exist on the operator host
|
|
||||||
# (it does; declared in docker-compose.yml + docker-compose.infra.yml)
|
|
||||||
#
|
|
||||||
# Class B Hongming-owned CICD red sweep, 2026-05-08.
|
|
||||||
#
|
|
||||||
# Cost: ~30s job (postgres pull from cache + go build + 4 tests).
|
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
@ -89,14 +59,20 @@ jobs:
|
|||||||
name: Handlers Postgres Integration
|
name: Handlers Postgres Integration
|
||||||
needs: detect-changes
|
needs: detect-changes
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
env:
|
services:
|
||||||
# Unique name per run so concurrent jobs don't collide on the
|
postgres:
|
||||||
# bridge network. ${RUN_ID}-${RUN_ATTEMPT} is unique even across
|
image: postgres:15-alpine
|
||||||
# workflow_dispatch reruns of the same run_id.
|
env:
|
||||||
PG_NAME: pg-handlers-${{ github.run_id }}-${{ github.run_attempt }}
|
POSTGRES_PASSWORD: test
|
||||||
# Bridge network already exists on the operator host (declared
|
POSTGRES_DB: molecule
|
||||||
# in docker-compose.yml + docker-compose.infra.yml).
|
ports:
|
||||||
PG_NETWORK: molecule-monorepo-net
|
- 5432:5432
|
||||||
|
# GHA spins this with --health-cmd built in for postgres images.
|
||||||
|
options: >-
|
||||||
|
--health-cmd pg_isready
|
||||||
|
--health-interval 5s
|
||||||
|
--health-timeout 5s
|
||||||
|
--health-retries 10
|
||||||
defaults:
|
defaults:
|
||||||
run:
|
run:
|
||||||
working-directory: workspace-server
|
working-directory: workspace-server
|
||||||
@ -113,57 +89,16 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
go-version: 'stable'
|
go-version: 'stable'
|
||||||
|
|
||||||
- if: needs.detect-changes.outputs.handlers == 'true'
|
|
||||||
name: Start sibling Postgres on bridge network
|
|
||||||
working-directory: .
|
|
||||||
run: |
|
|
||||||
# Sanity: the bridge network must exist on the operator host.
|
|
||||||
# Hard-fail loud if it doesn't — easier to spot than a silent
|
|
||||||
# auto-create that diverges from the rest of the stack.
|
|
||||||
if ! docker network inspect "${PG_NETWORK}" >/dev/null 2>&1; then
|
|
||||||
echo "::error::Bridge network '${PG_NETWORK}' missing on operator host. Re-run docker-compose.infra.yml or check ops handbook."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# If a stale container with the same name exists (rerun on
|
|
||||||
# the same run_id), wipe it first.
|
|
||||||
docker rm -f "${PG_NAME}" >/dev/null 2>&1 || true
|
|
||||||
|
|
||||||
docker run -d \
|
|
||||||
--name "${PG_NAME}" \
|
|
||||||
--network "${PG_NETWORK}" \
|
|
||||||
--health-cmd "pg_isready -U postgres" \
|
|
||||||
--health-interval 5s \
|
|
||||||
--health-timeout 5s \
|
|
||||||
--health-retries 10 \
|
|
||||||
-e POSTGRES_PASSWORD=test \
|
|
||||||
-e POSTGRES_DB=molecule \
|
|
||||||
postgres:15-alpine >/dev/null
|
|
||||||
|
|
||||||
# Read back the bridge IP. Always present immediately after
|
|
||||||
# `docker run -d` for bridge networks.
|
|
||||||
PG_HOST=$(docker inspect "${PG_NAME}" \
|
|
||||||
--format "{{(index .NetworkSettings.Networks \"${PG_NETWORK}\").IPAddress}}")
|
|
||||||
if [ -z "${PG_HOST}" ]; then
|
|
||||||
echo "::error::Could not resolve PG_HOST for ${PG_NAME} on ${PG_NETWORK}"
|
|
||||||
docker logs "${PG_NAME}" || true
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo "PG_HOST=${PG_HOST}" >> "$GITHUB_ENV"
|
|
||||||
echo "INTEGRATION_DB_URL=postgres://postgres:test@${PG_HOST}:5432/molecule?sslmode=disable" >> "$GITHUB_ENV"
|
|
||||||
echo "Started ${PG_NAME} at ${PG_HOST}:5432"
|
|
||||||
|
|
||||||
- if: needs.detect-changes.outputs.handlers == 'true'
|
- if: needs.detect-changes.outputs.handlers == 'true'
|
||||||
name: Apply migrations to Postgres service
|
name: Apply migrations to Postgres service
|
||||||
env:
|
env:
|
||||||
PGPASSWORD: test
|
PGPASSWORD: test
|
||||||
run: |
|
run: |
|
||||||
# Wait for postgres to actually accept connections. Docker's
|
# Wait for postgres to actually accept connections (the
|
||||||
# health-cmd handles container-side readiness, but the wire
|
# GHA --health-cmd is best-effort but psql can still race).
|
||||||
# to the bridge IP is best-tested with pg_isready directly.
|
|
||||||
for i in {1..15}; do
|
for i in {1..15}; do
|
||||||
if pg_isready -h "${PG_HOST}" -p 5432 -U postgres -q; then break; fi
|
if pg_isready -h localhost -p 5432 -U postgres -q; then break; fi
|
||||||
echo "waiting for postgres at ${PG_HOST}:5432..."; sleep 2
|
echo "waiting for postgres..."; sleep 2
|
||||||
done
|
done
|
||||||
|
|
||||||
# Apply every .up.sql in lexicographic order with
|
# Apply every .up.sql in lexicographic order with
|
||||||
@ -196,7 +131,7 @@ jobs:
|
|||||||
# not fine once a cross-table atomicity test came in.
|
# not fine once a cross-table atomicity test came in.
|
||||||
set +e
|
set +e
|
||||||
for migration in $(ls migrations/*.sql 2>/dev/null | grep -v '\.down\.sql$' | sort); do
|
for migration in $(ls migrations/*.sql 2>/dev/null | grep -v '\.down\.sql$' | sort); do
|
||||||
if psql -h "${PG_HOST}" -U postgres -d molecule -v ON_ERROR_STOP=1 \
|
if psql -h localhost -U postgres -d molecule -v ON_ERROR_STOP=1 \
|
||||||
-f "$migration" >/dev/null 2>&1; then
|
-f "$migration" >/dev/null 2>&1; then
|
||||||
echo "✓ $(basename "$migration")"
|
echo "✓ $(basename "$migration")"
|
||||||
else
|
else
|
||||||
@ -210,7 +145,7 @@ jobs:
|
|||||||
# fail if any didn't land — that would be a real regression we
|
# fail if any didn't land — that would be a real regression we
|
||||||
# want loud.
|
# want loud.
|
||||||
for tbl in delegations workspaces activity_logs pending_uploads; do
|
for tbl in delegations workspaces activity_logs pending_uploads; do
|
||||||
if ! psql -h "${PG_HOST}" -U postgres -d molecule -tA \
|
if ! psql -h localhost -U postgres -d molecule -tA \
|
||||||
-c "SELECT 1 FROM information_schema.tables WHERE table_name = '$tbl'" \
|
-c "SELECT 1 FROM information_schema.tables WHERE table_name = '$tbl'" \
|
||||||
| grep -q 1; then
|
| grep -q 1; then
|
||||||
echo "::error::$tbl table missing after migration replay — handler integration tests would be meaningless"
|
echo "::error::$tbl table missing after migration replay — handler integration tests would be meaningless"
|
||||||
@ -221,32 +156,16 @@ jobs:
|
|||||||
|
|
||||||
- if: needs.detect-changes.outputs.handlers == 'true'
|
- if: needs.detect-changes.outputs.handlers == 'true'
|
||||||
name: Run integration tests
|
name: Run integration tests
|
||||||
|
env:
|
||||||
|
INTEGRATION_DB_URL: postgres://postgres:test@localhost:5432/molecule?sslmode=disable
|
||||||
run: |
|
run: |
|
||||||
# INTEGRATION_DB_URL is exported by the start-postgres step;
|
|
||||||
# points at the per-run bridge IP, not 127.0.0.1, so concurrent
|
|
||||||
# workflow runs don't fight over a host-net 5432 port.
|
|
||||||
go test -tags=integration -timeout 5m -v ./internal/handlers/ -run "^TestIntegration_"
|
go test -tags=integration -timeout 5m -v ./internal/handlers/ -run "^TestIntegration_"
|
||||||
|
|
||||||
- if: failure() && needs.detect-changes.outputs.handlers == 'true'
|
- if: needs.detect-changes.outputs.handlers == 'true' && failure()
|
||||||
name: Diagnostic dump on failure
|
name: Diagnostic dump on failure
|
||||||
env:
|
env:
|
||||||
PGPASSWORD: test
|
PGPASSWORD: test
|
||||||
run: |
|
run: |
|
||||||
echo "::group::postgres container status"
|
|
||||||
docker ps -a --filter "name=${PG_NAME}" --format '{{.Status}} {{.Names}}' || true
|
|
||||||
docker logs "${PG_NAME}" 2>&1 | tail -50 || true
|
|
||||||
echo "::endgroup::"
|
|
||||||
echo "::group::delegations table state"
|
echo "::group::delegations table state"
|
||||||
psql -h "${PG_HOST}" -U postgres -d molecule -c "SELECT * FROM delegations LIMIT 50;" || true
|
psql -h localhost -U postgres -d molecule -c "SELECT * FROM delegations LIMIT 50;" || true
|
||||||
echo "::endgroup::"
|
echo "::endgroup::"
|
||||||
|
|
||||||
- if: always() && needs.detect-changes.outputs.handlers == 'true'
|
|
||||||
name: Stop sibling Postgres
|
|
||||||
working-directory: .
|
|
||||||
run: |
|
|
||||||
# always() so containers don't leak when migrations or tests
|
|
||||||
# fail. The cleanup is best-effort: if the container is
|
|
||||||
# already gone (e.g. concurrent rerun race), don't fail the job.
|
|
||||||
docker rm -f "${PG_NAME}" >/dev/null 2>&1 || true
|
|
||||||
echo "Cleaned up ${PG_NAME}"
|
|
||||||
|
|
||||||
|
|||||||
60
.github/workflows/harness-replays.yml
vendored
60
.github/workflows/harness-replays.yml
vendored
@ -98,66 +98,6 @@ jobs:
|
|||||||
# github-app-auth sibling-checkout removed 2026-05-07 (#157):
|
# github-app-auth sibling-checkout removed 2026-05-07 (#157):
|
||||||
# the plugin was dropped + Dockerfile.tenant no longer COPYs it.
|
# the plugin was dropped + Dockerfile.tenant no longer COPYs it.
|
||||||
|
|
||||||
# Pre-clone manifest deps before docker compose builds the tenant
|
|
||||||
# image (Task #173 followup — same pattern as
|
|
||||||
# publish-workspace-server-image.yml's "Pre-clone manifest deps"
|
|
||||||
# step).
|
|
||||||
#
|
|
||||||
# Why pre-clone here too: tests/harness/compose.yml builds tenant-alpha
|
|
||||||
# and tenant-beta from workspace-server/Dockerfile.tenant with
|
|
||||||
# context=../.. (repo root). That Dockerfile expects
|
|
||||||
# .tenant-bundle-deps/{workspace-configs-templates,org-templates,plugins}
|
|
||||||
# to be present at build context root (post-#173 it COPYs from there
|
|
||||||
# instead of running an in-image clone — the in-image clone failed
|
|
||||||
# with "could not read Username for https://git.moleculesai.app"
|
|
||||||
# because there's no auth path inside the build sandbox).
|
|
||||||
#
|
|
||||||
# Without this step harness-replays fails before any replay runs,
|
|
||||||
# with `failed to calculate checksum of ref ...
|
|
||||||
# "/.tenant-bundle-deps/plugins": not found`. Caught by run #892
|
|
||||||
# (main, 2026-05-07T20:28:53Z) and run #964 (staging — same
|
|
||||||
# symptom, different root cause: staging still has the in-image
|
|
||||||
# clone path, hits the auth error directly).
|
|
||||||
#
|
|
||||||
# 2026-05-08 sub-finding (#192): the clone step ALSO fails when
|
|
||||||
# any referenced workspace-template repo is private and the
|
|
||||||
# AUTO_SYNC_TOKEN bearer (devops-engineer persona) lacks read
|
|
||||||
# access. Root cause: 5 of 9 workspace-template repos
|
|
||||||
# (openclaw, codex, crewai, deepagents, gemini-cli) had been
|
|
||||||
# marked private with no team grant. Resolution: flipped them
|
|
||||||
# to public per `feedback_oss_first_repo_visibility_default`
|
|
||||||
# (the OSS surface should be public). Layer-3 (customer-private +
|
|
||||||
# marketplace third-party repos) tracked separately in
|
|
||||||
# internal#102.
|
|
||||||
#
|
|
||||||
# Token shape matches publish-workspace-server-image.yml: AUTO_SYNC_TOKEN
|
|
||||||
# is the devops-engineer persona PAT, NOT the founder PAT (per
|
|
||||||
# `feedback_per_agent_gitea_identity_default`). clone-manifest.sh
|
|
||||||
# embeds it as basic-auth for the duration of the clones and strips
|
|
||||||
# .git directories — the token never enters the resulting image.
|
|
||||||
- name: Pre-clone manifest deps
|
|
||||||
if: needs.detect-changes.outputs.run == 'true'
|
|
||||||
env:
|
|
||||||
MOLECULE_GITEA_TOKEN: ${{ secrets.AUTO_SYNC_TOKEN }}
|
|
||||||
run: |
|
|
||||||
set -euo pipefail
|
|
||||||
if [ -z "${MOLECULE_GITEA_TOKEN}" ]; then
|
|
||||||
echo "::error::AUTO_SYNC_TOKEN secret is empty — register the devops-engineer persona PAT in repo Actions secrets"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
mkdir -p .tenant-bundle-deps
|
|
||||||
bash scripts/clone-manifest.sh \
|
|
||||||
manifest.json \
|
|
||||||
.tenant-bundle-deps/workspace-configs-templates \
|
|
||||||
.tenant-bundle-deps/org-templates \
|
|
||||||
.tenant-bundle-deps/plugins
|
|
||||||
# Sanity-check counts so a silent partial clone fails fast
|
|
||||||
# instead of producing a half-empty image.
|
|
||||||
ws_count=$(find .tenant-bundle-deps/workspace-configs-templates -mindepth 1 -maxdepth 1 -type d | wc -l)
|
|
||||||
org_count=$(find .tenant-bundle-deps/org-templates -mindepth 1 -maxdepth 1 -type d | wc -l)
|
|
||||||
plugins_count=$(find .tenant-bundle-deps/plugins -mindepth 1 -maxdepth 1 -type d | wc -l)
|
|
||||||
echo "Cloned: ws=$ws_count org=$org_count plugins=$plugins_count"
|
|
||||||
|
|
||||||
- name: Install Python deps for replays
|
- name: Install Python deps for replays
|
||||||
# peer-discovery-404 (and future replays) eval Python against the
|
# peer-discovery-404 (and future replays) eval Python against the
|
||||||
# running tenant — importing workspace/a2a_client.py pulls in
|
# running tenant — importing workspace/a2a_client.py pulls in
|
||||||
|
|||||||
59
.github/workflows/pr-guards.yml
vendored
59
.github/workflows/pr-guards.yml
vendored
@ -1,25 +1,14 @@
|
|||||||
name: pr-guards
|
name: pr-guards
|
||||||
|
|
||||||
# PR-time guards. Today the only guard is "disable auto-merge when a
|
# Thin caller that delegates to the molecule-ci reusable guard. Today
|
||||||
# new commit is pushed after auto-merge was enabled" — added 2026-04-27
|
# the guard is just "disable auto-merge when a new commit is pushed
|
||||||
# after PR #2174 auto-merged with only its first commit because the
|
# after auto-merge was enabled" — added 2026-04-27 after PR #2174
|
||||||
# second commit was pushed after the merge queue had locked the PR's
|
# auto-merged with only its first commit because the second commit
|
||||||
# SHA.
|
# was pushed after the merge queue had locked the PR's SHA.
|
||||||
#
|
#
|
||||||
# Why this is inlined (not delegated to molecule-ci's reusable
|
# When more PR-time guards land in molecule-ci, add them here as
|
||||||
# workflow): the reusable workflow uses `gh pr merge --disable-auto`,
|
# additional jobs that share the same pull_request:synchronize
|
||||||
# which calls GitHub's GraphQL API. Gitea has no GraphQL endpoint and
|
# trigger.
|
||||||
# returns HTTP 405 on /api/graphql, so the job failed on every Gitea
|
|
||||||
# PR push since the 2026-05-06 migration. Gitea also has no `--auto`
|
|
||||||
# merge primitive that this job could be acting on, so the right
|
|
||||||
# behaviour on Gitea is "no-op + green status" — not a 405.
|
|
||||||
#
|
|
||||||
# Inlining (vs. an `if:` on the `uses:` line) keeps the job ALWAYS
|
|
||||||
# running, which matters for branch protection: required-check names
|
|
||||||
# need a job that emits SUCCESS terminal state, not SKIPPED. See
|
|
||||||
# `feedback_branch_protection_check_name_parity` and `feedback_pr_merge_safety_guards`.
|
|
||||||
#
|
|
||||||
# Issue #88 item 1.
|
|
||||||
|
|
||||||
on:
|
on:
|
||||||
pull_request:
|
pull_request:
|
||||||
@ -30,34 +19,4 @@ permissions:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
disable-auto-merge-on-push:
|
disable-auto-merge-on-push:
|
||||||
runs-on: ubuntu-latest
|
uses: Molecule-AI/molecule-ci/.github/workflows/disable-auto-merge-on-push.yml@main
|
||||||
steps:
|
|
||||||
# Detect Gitea Actions. act_runner sets GITEA_ACTIONS=true in the
|
|
||||||
# step env on every job. Belt-and-suspenders: also check the repo
|
|
||||||
# url's host, which is independent of any runner-side env config
|
|
||||||
# (covers a future Gitea host where the env var is forgotten).
|
|
||||||
- name: Detect runner host
|
|
||||||
id: host
|
|
||||||
run: |
|
|
||||||
if [[ "${GITEA_ACTIONS:-}" == "true" ]] || [[ "${{ github.server_url }}" == *moleculesai.app* ]] || [[ "${{ github.event.repository.html_url }}" == *moleculesai.app* ]]; then
|
|
||||||
echo "is_gitea=true" >> "$GITHUB_OUTPUT"
|
|
||||||
echo "::notice::Gitea Actions detected — auto-merge gating is not applicable here (Gitea has no --auto merge primitive). Job will no-op."
|
|
||||||
else
|
|
||||||
echo "is_gitea=false" >> "$GITHUB_OUTPUT"
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Disable auto-merge (GitHub only)
|
|
||||||
if: steps.host.outputs.is_gitea != 'true'
|
|
||||||
env:
|
|
||||||
GH_TOKEN: ${{ github.token }}
|
|
||||||
PR: ${{ github.event.pull_request.number }}
|
|
||||||
REPO: ${{ github.repository }}
|
|
||||||
NEW_SHA: ${{ github.sha }}
|
|
||||||
run: |
|
|
||||||
set -eu
|
|
||||||
gh pr merge "$PR" --disable-auto -R "$REPO" || true
|
|
||||||
gh pr comment "$PR" -R "$REPO" --body "🔒 Auto-merge disabled — new commit (\`${NEW_SHA:0:7}\`) pushed after auto-merge was enabled. The merge queue locks SHAs at entry, so subsequent pushes can race. Verify the new commit and re-enable with \`gh pr merge --auto\`."
|
|
||||||
|
|
||||||
- name: Gitea no-op
|
|
||||||
if: steps.host.outputs.is_gitea == 'true'
|
|
||||||
run: echo "Gitea Actions — auto-merge gating not applicable; no-op (job intentionally green so branch protection's required-check name lands SUCCESS)."
|
|
||||||
|
|||||||
4
.github/workflows/publish-runtime.yml
vendored
4
.github/workflows/publish-runtime.yml
vendored
@ -25,7 +25,7 @@ name: publish-runtime
|
|||||||
# 3. Publishes to PyPI via the PyPA Trusted Publisher action (OIDC).
|
# 3. Publishes to PyPI via the PyPA Trusted Publisher action (OIDC).
|
||||||
# No static API token is stored — PyPI verifies the workflow's
|
# No static API token is stored — PyPI verifies the workflow's
|
||||||
# OIDC claim against the trusted-publisher config registered for
|
# OIDC claim against the trusted-publisher config registered for
|
||||||
# molecule-ai-workspace-runtime (molecule-ai/molecule-core,
|
# molecule-ai-workspace-runtime (Molecule-AI/molecule-core,
|
||||||
# publish-runtime.yml, environment pypi-publish).
|
# publish-runtime.yml, environment pypi-publish).
|
||||||
#
|
#
|
||||||
# After publish: the 8 template repos pick up the new version on their
|
# After publish: the 8 template repos pick up the new version on their
|
||||||
@ -166,7 +166,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Publish to PyPI (Trusted Publisher / OIDC)
|
- name: Publish to PyPI (Trusted Publisher / OIDC)
|
||||||
# PyPI side is configured: project molecule-ai-workspace-runtime →
|
# PyPI side is configured: project molecule-ai-workspace-runtime →
|
||||||
# publisher molecule-ai/molecule-core, workflow publish-runtime.yml,
|
# publisher Molecule-AI/molecule-core, workflow publish-runtime.yml,
|
||||||
# environment pypi-publish. The action mints a short-lived OIDC
|
# environment pypi-publish. The action mints a short-lived OIDC
|
||||||
# token and exchanges it for a PyPI upload credential — no static
|
# token and exchanges it for a PyPI upload credential — no static
|
||||||
# API token in this repo's secrets.
|
# API token in this repo's secrets.
|
||||||
|
|||||||
229
.github/workflows/publish-workspace-server-image.yml
vendored
229
.github/workflows/publish-workspace-server-image.yml
vendored
@ -37,7 +37,6 @@ on:
|
|||||||
- 'workspace-server/**'
|
- 'workspace-server/**'
|
||||||
- 'canvas/**'
|
- 'canvas/**'
|
||||||
- 'manifest.json'
|
- 'manifest.json'
|
||||||
- 'scripts/**'
|
|
||||||
- '.github/workflows/publish-workspace-server-image.yml'
|
- '.github/workflows/publish-workspace-server-image.yml'
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
@ -75,87 +74,33 @@ jobs:
|
|||||||
# plugin was dropped + workspace-server/Dockerfile no longer
|
# plugin was dropped + workspace-server/Dockerfile no longer
|
||||||
# COPYs it.
|
# COPYs it.
|
||||||
|
|
||||||
# ECR auth + buildx setup are now inline in each build step
|
- name: Configure AWS credentials for ECR
|
||||||
# below (Task #173, 2026-05-07).
|
# GHCR was the pre-suspension target; the molecule-ai org on
|
||||||
#
|
# GitHub got swept 2026-05-06 and ghcr.io/molecule-ai/* is no
|
||||||
# Why moved inline: aws-actions/configure-aws-credentials@v4 +
|
# longer reachable. Post-suspension target is the operator's
|
||||||
# aws-actions/amazon-ecr-login@v2 + docker/setup-buildx-action
|
# ECR org (153263036946.dkr.ecr.us-east-2.amazonaws.com/
|
||||||
# all left auth state in places that the actual `docker push`
|
# molecule-ai/*), which already hosts platform-tenant +
|
||||||
# couldn't see on Gitea Actions:
|
# workspace-template-* + runner-base images. AWS creds come
|
||||||
# - The actions wrote to a step-scoped DOCKER_CONFIG path
|
# from the AWS_ACCESS_KEY_ID/SECRET secrets bound to the
|
||||||
# that didn't survive into subsequent shell steps.
|
# molecule-cp IAM user. Closes #161.
|
||||||
# - Buildx couldn't bridge the runner container ↔
|
uses: aws-actions/configure-aws-credentials@v4
|
||||||
# operator-host docker daemon auth gap (401 on the
|
with:
|
||||||
# docker-container driver, "no basic auth credentials"
|
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||||
# with the action-driven login).
|
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||||
#
|
aws-region: us-east-2
|
||||||
# Doing AWS+ECR auth inline (`aws ecr get-login-password |
|
|
||||||
# docker login`) in the same shell step as `docker build` +
|
- name: Log in to ECR
|
||||||
# `docker push` is the operator-host manual approach, mapped
|
id: ecr-login
|
||||||
# 1:1 into CI. Auth state is guaranteed to live in the env that
|
uses: aws-actions/amazon-ecr-login@v2
|
||||||
# `docker push` actually runs from.
|
|
||||||
#
|
- name: Set up Docker Buildx
|
||||||
# Post-suspension target is the operator's ECR org
|
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0
|
||||||
# (153263036946.dkr.ecr.us-east-2.amazonaws.com/molecule-ai/*),
|
|
||||||
# which already hosts platform-tenant + workspace-template-* +
|
|
||||||
# runner-base images. AWS creds come from the
|
|
||||||
# AWS_ACCESS_KEY_ID/SECRET secrets bound to the molecule-cp
|
|
||||||
# IAM user. Closes #161.
|
|
||||||
|
|
||||||
- name: Compute tags
|
- name: Compute tags
|
||||||
id: tags
|
id: tags
|
||||||
run: |
|
run: |
|
||||||
echo "sha=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT"
|
echo "sha=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT"
|
||||||
|
|
||||||
# Pre-clone manifest deps before docker build (Task #173 fix).
|
|
||||||
#
|
|
||||||
# Why pre-clone: post-2026-05-06, every workspace-template-* repo on
|
|
||||||
# Gitea (codex, crewai, deepagents, gemini-cli, langgraph) plus all
|
|
||||||
# 7 org-template-* repos are private. The pre-fix Dockerfile.tenant
|
|
||||||
# ran `git clone` inside an in-image stage, which had no auth path
|
|
||||||
# — every CI build failed with "fatal: could not read Username for
|
|
||||||
# https://git.moleculesai.app". For weeks, every workspace-server
|
|
||||||
# rebuild required a manual operator-host push. Now we clone in the
|
|
||||||
# trusted CI context (where AUTO_SYNC_TOKEN is naturally available)
|
|
||||||
# and Dockerfile.tenant just COPYs from .tenant-bundle-deps/.
|
|
||||||
#
|
|
||||||
# Token shape: AUTO_SYNC_TOKEN is the devops-engineer persona PAT
|
|
||||||
# (see /etc/molecule-bootstrap/agent-secrets.env). Per saved memory
|
|
||||||
# `feedback_per_agent_gitea_identity_default`, every CI surface uses
|
|
||||||
# a per-persona token, never the founder PAT. clone-manifest.sh
|
|
||||||
# embeds it as basic-auth (oauth2:<token>) for the duration of the
|
|
||||||
# clones, then strips .git directories — the token never enters
|
|
||||||
# the resulting image.
|
|
||||||
#
|
|
||||||
# Idempotent: if a re-run finds populated dirs, clone-manifest.sh
|
|
||||||
# skips them; safe to retrigger via path-filter or workflow_dispatch.
|
|
||||||
- name: Pre-clone manifest deps
|
|
||||||
env:
|
|
||||||
MOLECULE_GITEA_TOKEN: ${{ secrets.AUTO_SYNC_TOKEN }}
|
|
||||||
run: |
|
|
||||||
set -euo pipefail
|
|
||||||
if [ -z "${MOLECULE_GITEA_TOKEN}" ]; then
|
|
||||||
echo "::error::AUTO_SYNC_TOKEN secret is empty — register the devops-engineer persona PAT in repo Actions secrets"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
mkdir -p .tenant-bundle-deps
|
|
||||||
bash scripts/clone-manifest.sh \
|
|
||||||
manifest.json \
|
|
||||||
.tenant-bundle-deps/workspace-configs-templates \
|
|
||||||
.tenant-bundle-deps/org-templates \
|
|
||||||
.tenant-bundle-deps/plugins
|
|
||||||
# Sanity-check counts so a silent partial clone fails fast
|
|
||||||
# instead of producing a half-empty image.
|
|
||||||
ws_count=$(find .tenant-bundle-deps/workspace-configs-templates -mindepth 1 -maxdepth 1 -type d | wc -l)
|
|
||||||
org_count=$(find .tenant-bundle-deps/org-templates -mindepth 1 -maxdepth 1 -type d | wc -l)
|
|
||||||
plugins_count=$(find .tenant-bundle-deps/plugins -mindepth 1 -maxdepth 1 -type d | wc -l)
|
|
||||||
echo "Cloned: ws=$ws_count org=$org_count plugins=$plugins_count"
|
|
||||||
# Counts are derived from manifest.json (9 ws / 7 org / 21
|
|
||||||
# plugins as of 2026-05-07). If manifest.json grows but the
|
|
||||||
# clone step regresses silently, the find above caps at the
|
|
||||||
# actual disk state — but clone-manifest.sh's own EXPECTED vs
|
|
||||||
# CLONED check (line ~95) is the authoritative fail-fast.
|
|
||||||
|
|
||||||
# Canary-gated release flow:
|
# Canary-gated release flow:
|
||||||
# - This step always publishes :staging-<sha> + :staging-latest.
|
# - This step always publishes :staging-<sha> + :staging-latest.
|
||||||
# - On staging push, staging-CP picks up :staging-latest immediately
|
# - On staging push, staging-CP picks up :staging-latest immediately
|
||||||
@ -181,82 +126,58 @@ jobs:
|
|||||||
# were running pre-RFC code. Adding the staging trigger above closes
|
# were running pre-RFC code. Adding the staging trigger above closes
|
||||||
# that gap. Earlier 2026-04-24 incident: a static :staging-<sha> pin
|
# that gap. Earlier 2026-04-24 incident: a static :staging-<sha> pin
|
||||||
# drifted 10 days behind staging — same class of bug, different
|
# drifted 10 days behind staging — same class of bug, different
|
||||||
# mechanism. ECR repo molecule-ai/platform created 2026-05-07.
|
# mechanism.
|
||||||
# Build + push platform image with plain `docker` (no buildx).
|
- name: Build & push platform image to GHCR (staging-<sha> + staging-latest)
|
||||||
# GIT_SHA bakes into the Go binary via -ldflags so /buildinfo
|
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||||
# returns it at runtime — see Dockerfile + buildinfo/buildinfo.go.
|
with:
|
||||||
# The OCI revision label below carries the same value for registry
|
context: .
|
||||||
# tooling; the duplication is intentional.
|
file: ./workspace-server/Dockerfile
|
||||||
- name: Build & push platform image to ECR (staging-<sha> + staging-latest)
|
platforms: linux/amd64
|
||||||
env:
|
push: true
|
||||||
IMAGE_NAME: ${{ env.IMAGE_NAME }}
|
tags: |
|
||||||
TAG_SHA: staging-${{ steps.tags.outputs.sha }}
|
${{ env.IMAGE_NAME }}:staging-${{ steps.tags.outputs.sha }}
|
||||||
TAG_LATEST: staging-latest
|
${{ env.IMAGE_NAME }}:staging-latest
|
||||||
GIT_SHA: ${{ github.sha }}
|
cache-from: type=gha
|
||||||
REPO: ${{ github.repository }}
|
cache-to: type=gha,mode=max
|
||||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
# GIT_SHA bakes into the Go binary via -ldflags so /buildinfo
|
||||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
# returns it at runtime — see Dockerfile + buildinfo/buildinfo.go.
|
||||||
AWS_DEFAULT_REGION: us-east-2
|
# This is the same value as the OCI revision label below; passing
|
||||||
run: |
|
# it twice is intentional, the OCI label is for registry tooling
|
||||||
set -euo pipefail
|
# while /buildinfo is for the redeploy verification step.
|
||||||
# ECR auth in-step so config.json is populated in the same
|
build-args: |
|
||||||
# shell env that runs `docker push`. ECR get-login-password
|
GIT_SHA=${{ github.sha }}
|
||||||
# tokens last 12h, plenty for a single-step build+push.
|
labels: |
|
||||||
ECR_REGISTRY="${IMAGE_NAME%%/*}"
|
org.opencontainers.image.source=https://github.com/${{ github.repository }}
|
||||||
aws ecr get-login-password --region us-east-2 | \
|
org.opencontainers.image.revision=${{ github.sha }}
|
||||||
docker login --username AWS --password-stdin "${ECR_REGISTRY}"
|
org.opencontainers.image.description=Molecule AI platform (Go API server) — pending canary verify
|
||||||
docker build \
|
|
||||||
--file ./workspace-server/Dockerfile \
|
|
||||||
--build-arg GIT_SHA="${GIT_SHA}" \
|
|
||||||
--label "org.opencontainers.image.source=https://github.com/${REPO}" \
|
|
||||||
--label "org.opencontainers.image.revision=${GIT_SHA}" \
|
|
||||||
--label "org.opencontainers.image.description=Molecule AI platform (Go API server) — pending canary verify" \
|
|
||||||
--tag "${IMAGE_NAME}:${TAG_SHA}" \
|
|
||||||
--tag "${IMAGE_NAME}:${TAG_LATEST}" \
|
|
||||||
.
|
|
||||||
docker push "${IMAGE_NAME}:${TAG_SHA}"
|
|
||||||
docker push "${IMAGE_NAME}:${TAG_LATEST}"
|
|
||||||
|
|
||||||
# Canvas uses same-origin fetches. The tenant Go platform
|
|
||||||
# reverse-proxies /cp/* to the SaaS CP via its CP_UPSTREAM_URL
|
|
||||||
# env; the tenant's /canvas/viewport, /approvals/pending,
|
|
||||||
# /org/templates etc. live on the tenant platform itself.
|
|
||||||
# Both legs share one origin (the tenant subdomain) so
|
|
||||||
# PLATFORM_URL="" forces canvas to fetch paths as relative,
|
|
||||||
# which land same-origin.
|
|
||||||
#
|
|
||||||
# Self-hosted / private-label deployments override this at
|
|
||||||
# build time with a specific backend (e.g. local dev:
|
|
||||||
# NEXT_PUBLIC_PLATFORM_URL=http://localhost:8080).
|
|
||||||
- name: Build & push tenant image to ECR (staging-<sha> + staging-latest)
|
|
||||||
env:
|
|
||||||
TENANT_IMAGE_NAME: ${{ env.TENANT_IMAGE_NAME }}
|
|
||||||
TAG_SHA: staging-${{ steps.tags.outputs.sha }}
|
|
||||||
TAG_LATEST: staging-latest
|
|
||||||
GIT_SHA: ${{ github.sha }}
|
|
||||||
REPO: ${{ github.repository }}
|
|
||||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
|
||||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
|
||||||
AWS_DEFAULT_REGION: us-east-2
|
|
||||||
run: |
|
|
||||||
set -euo pipefail
|
|
||||||
# Re-login: the platform-image step's docker login wrote to
|
|
||||||
# the same config.json, so this is technically redundant — but
|
|
||||||
# making each push step self-contained keeps the workflow
|
|
||||||
# robust to step reordering / future extraction.
|
|
||||||
ECR_REGISTRY="${TENANT_IMAGE_NAME%%/*}"
|
|
||||||
aws ecr get-login-password --region us-east-2 | \
|
|
||||||
docker login --username AWS --password-stdin "${ECR_REGISTRY}"
|
|
||||||
docker build \
|
|
||||||
--file ./workspace-server/Dockerfile.tenant \
|
|
||||||
--build-arg NEXT_PUBLIC_PLATFORM_URL= \
|
|
||||||
--build-arg GIT_SHA="${GIT_SHA}" \
|
|
||||||
--label "org.opencontainers.image.source=https://github.com/${REPO}" \
|
|
||||||
--label "org.opencontainers.image.revision=${GIT_SHA}" \
|
|
||||||
--label "org.opencontainers.image.description=Molecule AI tenant platform + canvas — pending canary verify" \
|
|
||||||
--tag "${TENANT_IMAGE_NAME}:${TAG_SHA}" \
|
|
||||||
--tag "${TENANT_IMAGE_NAME}:${TAG_LATEST}" \
|
|
||||||
.
|
|
||||||
docker push "${TENANT_IMAGE_NAME}:${TAG_SHA}"
|
|
||||||
docker push "${TENANT_IMAGE_NAME}:${TAG_LATEST}"
|
|
||||||
|
|
||||||
|
- name: Build & push tenant image to GHCR (staging-<sha> + staging-latest)
|
||||||
|
uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7.1.0
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
file: ./workspace-server/Dockerfile.tenant
|
||||||
|
platforms: linux/amd64
|
||||||
|
push: true
|
||||||
|
tags: |
|
||||||
|
${{ env.TENANT_IMAGE_NAME }}:staging-${{ steps.tags.outputs.sha }}
|
||||||
|
${{ env.TENANT_IMAGE_NAME }}:staging-latest
|
||||||
|
cache-from: type=gha
|
||||||
|
cache-to: type=gha,mode=max
|
||||||
|
# Canvas uses same-origin fetches. The tenant Go platform
|
||||||
|
# reverse-proxies /cp/* to the SaaS CP via its CP_UPSTREAM_URL
|
||||||
|
# env; the tenant's /canvas/viewport, /approvals/pending,
|
||||||
|
# /org/templates etc. live on the tenant platform itself.
|
||||||
|
# Both legs share one origin (the tenant subdomain) so
|
||||||
|
# PLATFORM_URL="" forces canvas to fetch paths as relative,
|
||||||
|
# which land same-origin.
|
||||||
|
#
|
||||||
|
# Self-hosted / private-label deployments override this at
|
||||||
|
# build time with a specific backend (e.g. local dev:
|
||||||
|
# NEXT_PUBLIC_PLATFORM_URL=http://localhost:8080).
|
||||||
|
build-args: |
|
||||||
|
NEXT_PUBLIC_PLATFORM_URL=
|
||||||
|
GIT_SHA=${{ github.sha }}
|
||||||
|
labels: |
|
||||||
|
org.opencontainers.image.source=https://github.com/${{ github.repository }}
|
||||||
|
org.opencontainers.image.revision=${{ github.sha }}
|
||||||
|
org.opencontainers.image.description=Molecule AI tenant platform + canvas — pending canary verify
|
||||||
|
|||||||
@ -9,7 +9,7 @@ name: redeploy-tenants-on-main
|
|||||||
#
|
#
|
||||||
# This workflow closes the gap by calling the control-plane admin
|
# This workflow closes the gap by calling the control-plane admin
|
||||||
# endpoint that performs a canary-first, batched, health-gated rolling
|
# endpoint that performs a canary-first, batched, health-gated rolling
|
||||||
# redeploy across every live tenant. Implemented in molecule-ai/
|
# redeploy across every live tenant. Implemented in Molecule-AI/
|
||||||
# molecule-controlplane as POST /cp/admin/tenants/redeploy-fleet
|
# molecule-controlplane as POST /cp/admin/tenants/redeploy-fleet
|
||||||
# (feat/tenant-auto-redeploy, landing alongside this workflow).
|
# (feat/tenant-auto-redeploy, landing alongside this workflow).
|
||||||
#
|
#
|
||||||
@ -146,7 +146,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Call CP redeploy-fleet
|
- name: Call CP redeploy-fleet
|
||||||
# CP_ADMIN_API_TOKEN must be set as a repo/org secret on
|
# CP_ADMIN_API_TOKEN must be set as a repo/org secret on
|
||||||
# molecule-ai/molecule-core, matching the staging/prod CP's
|
# Molecule-AI/molecule-core, matching the staging/prod CP's
|
||||||
# CP_ADMIN_API_TOKEN env. Stored in Railway, mirrored to this
|
# CP_ADMIN_API_TOKEN env. Stored in Railway, mirrored to this
|
||||||
# repo's secrets for CI.
|
# repo's secrets for CI.
|
||||||
env:
|
env:
|
||||||
|
|||||||
@ -36,7 +36,7 @@ on:
|
|||||||
workflow_run:
|
workflow_run:
|
||||||
workflows: ['publish-workspace-server-image']
|
workflows: ['publish-workspace-server-image']
|
||||||
types: [completed]
|
types: [completed]
|
||||||
branches: [main]
|
branches: [staging]
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
inputs:
|
inputs:
|
||||||
target_tag:
|
target_tag:
|
||||||
@ -97,7 +97,7 @@ jobs:
|
|||||||
|
|
||||||
- name: Call staging-CP redeploy-fleet
|
- name: Call staging-CP redeploy-fleet
|
||||||
# CP_STAGING_ADMIN_API_TOKEN must be set as a repo/org secret
|
# CP_STAGING_ADMIN_API_TOKEN must be set as a repo/org secret
|
||||||
# on molecule-ai/molecule-core, matching staging-CP's
|
# on Molecule-AI/molecule-core, matching staging-CP's
|
||||||
# CP_ADMIN_API_TOKEN env var (visible in Railway controlplane
|
# CP_ADMIN_API_TOKEN env var (visible in Railway controlplane
|
||||||
# / staging environment). Stored separately from the prod
|
# / staging environment). Stored separately from the prod
|
||||||
# CP_ADMIN_API_TOKEN so a leak of one doesn't auth the other.
|
# CP_ADMIN_API_TOKEN so a leak of one doesn't auth the other.
|
||||||
|
|||||||
105
.github/workflows/retarget-main-to-staging.yml
vendored
Normal file
105
.github/workflows/retarget-main-to-staging.yml
vendored
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
name: Retarget main PRs to staging
|
||||||
|
|
||||||
|
# Mechanical enforcement of SHARED_RULES rule 8 ("Staging-first workflow, no
|
||||||
|
# exceptions"). When a bot opens a PR against main, retarget it to staging
|
||||||
|
# automatically and leave an explanatory comment. Human CEO-authored PRs (the
|
||||||
|
# staging→main promotion PR, etc.) are left alone — they're the authorised
|
||||||
|
# exception to the rule.
|
||||||
|
#
|
||||||
|
# Why an Action instead of only a prompt rule: prompt rules depend on every
|
||||||
|
# role's system-prompt.md staying in sync. Today 5 of 8 engineer roles
|
||||||
|
# (core-be, core-fe, app-fe, app-qa, devops-engineer) don't have the
|
||||||
|
# staging-first section — the bot keeps opening PRs to main. An Action
|
||||||
|
# enforces the invariant regardless of prompt drift.
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request_target:
|
||||||
|
types: [opened, reopened]
|
||||||
|
branches: [main]
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
pull-requests: write
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
retarget:
|
||||||
|
name: Retarget to staging
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
# Only fire for bot-authored PRs. Human CEO PRs (staging→main promotion)
|
||||||
|
# are intentional and pass through.
|
||||||
|
#
|
||||||
|
# Head-ref guard: never retarget a PR whose head IS `staging` — those
|
||||||
|
# are the auto-promote staging→main PRs (opened by molecule-ai[bot]
|
||||||
|
# since #2586 switched to an App token, which now passes the bot
|
||||||
|
# filter below). Retargeting head=staging onto base=staging fails
|
||||||
|
# with HTTP 422 "no new commits between base 'staging' and head
|
||||||
|
# 'staging'", which used to surface as a noisy red workflow run on
|
||||||
|
# every auto-promote (caught 2026-05-03 on PR #2588).
|
||||||
|
if: >-
|
||||||
|
github.event.pull_request.head.ref != 'staging'
|
||||||
|
&& (
|
||||||
|
github.event.pull_request.user.type == 'Bot'
|
||||||
|
|| endsWith(github.event.pull_request.user.login, '[bot]')
|
||||||
|
|| github.event.pull_request.user.login == 'app/molecule-ai'
|
||||||
|
|| github.event.pull_request.user.login == 'molecule-ai[bot]'
|
||||||
|
)
|
||||||
|
steps:
|
||||||
|
- name: Retarget PR base to staging
|
||||||
|
id: retarget
|
||||||
|
env:
|
||||||
|
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||||
|
PR_AUTHOR: ${{ github.event.pull_request.user.login }}
|
||||||
|
# Issue #1884: when the bot opens a PR against main and there's
|
||||||
|
# already another PR on the same head branch targeting staging,
|
||||||
|
# GitHub's PATCH /pulls returns 422 with
|
||||||
|
# "A pull request already exists for base branch 'staging' …".
|
||||||
|
# The retarget can't proceed — but the right response is to
|
||||||
|
# close the now-redundant main-PR, not to fail the workflow
|
||||||
|
# noisily. Detect that specific 422 and close instead.
|
||||||
|
run: |
|
||||||
|
set +e
|
||||||
|
echo "Retargeting PR #${PR_NUMBER} (author: ${PR_AUTHOR}) from main → staging"
|
||||||
|
PATCH_OUTPUT=$(gh api -X PATCH \
|
||||||
|
"repos/${{ github.repository }}/pulls/${PR_NUMBER}" \
|
||||||
|
-f base=staging \
|
||||||
|
--jq '.base.ref' 2>&1)
|
||||||
|
PATCH_EXIT=$?
|
||||||
|
set -e
|
||||||
|
if [ "$PATCH_EXIT" -eq 0 ]; then
|
||||||
|
echo "::notice::Retargeted PR #${PR_NUMBER} → staging"
|
||||||
|
echo "outcome=retargeted" >> "$GITHUB_OUTPUT"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
# Specifically match the 422 duplicate-base/head error so
|
||||||
|
# any OTHER PATCH failure (auth, deleted PR, etc.) still
|
||||||
|
# surfaces as a real workflow failure.
|
||||||
|
if echo "$PATCH_OUTPUT" | grep -q "pull request already exists for base branch 'staging'"; then
|
||||||
|
echo "::notice::PR #${PR_NUMBER}: duplicate target-staging PR exists on same head — closing this main-PR as redundant."
|
||||||
|
gh pr close "$PR_NUMBER" \
|
||||||
|
--repo "${{ github.repository }}" \
|
||||||
|
--comment "[retarget-bot] Closing — another PR on the same head branch already targets \`staging\`. This PR is redundant. See issue #1884 for the rationale."
|
||||||
|
echo "outcome=closed-as-duplicate" >> "$GITHUB_OUTPUT"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
echo "::error::Retarget PATCH failed and was NOT a duplicate-base error:"
|
||||||
|
echo "$PATCH_OUTPUT" >&2
|
||||||
|
exit 1
|
||||||
|
|
||||||
|
- name: Post explainer comment
|
||||||
|
if: steps.retarget.outputs.outcome == 'retargeted'
|
||||||
|
env:
|
||||||
|
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||||
|
run: |
|
||||||
|
gh pr comment "$PR_NUMBER" \
|
||||||
|
--repo "${{ github.repository }}" \
|
||||||
|
--body "$(cat <<'BODY'
|
||||||
|
[retarget-bot] This PR was opened against `main` and has been retargeted to `staging` automatically.
|
||||||
|
|
||||||
|
**Why:** per [SHARED_RULES rule 8](https://github.com/Molecule-AI/molecule-ai-org-template-molecule-dev/blob/main/SHARED_RULES.md), all feature work targets `staging` first; the CEO promotes `staging → main` separately.
|
||||||
|
|
||||||
|
**What changed:** just the base branch — no code change. CI will re-run against `staging`. If you get merge conflicts, rebase on `staging`.
|
||||||
|
|
||||||
|
**If this PR is the CEO's staging→main promotion:** the Action skipped you (only bot-authored PRs are retargeted). If you see this comment on your CEO PR, that's a bug — please tag @HongmingWang-Rabbit.
|
||||||
|
BODY
|
||||||
|
)"
|
||||||
2
.github/workflows/secret-scan.yml
vendored
2
.github/workflows/secret-scan.yml
vendored
@ -12,7 +12,7 @@ name: Secret scan
|
|||||||
#
|
#
|
||||||
# jobs:
|
# jobs:
|
||||||
# secret-scan:
|
# secret-scan:
|
||||||
# uses: molecule-ai/molecule-core/.github/workflows/secret-scan.yml@staging
|
# uses: Molecule-AI/molecule-core/.github/workflows/secret-scan.yml@staging
|
||||||
#
|
#
|
||||||
# Pin to @staging not @main — staging is the active default branch,
|
# Pin to @staging not @main — staging is the active default branch,
|
||||||
# main lags via the staging-promotion workflow. Updates ride along
|
# main lags via the staging-promotion workflow. Updates ride along
|
||||||
|
|||||||
7
.gitignore
vendored
7
.gitignore
vendored
@ -131,13 +131,6 @@ backups/
|
|||||||
# Cloned by publish-workspace-server-image.yml so the Dockerfile's
|
# Cloned by publish-workspace-server-image.yml so the Dockerfile's
|
||||||
# replace-directive path resolves. Lives in its own repo.
|
# replace-directive path resolves. Lives in its own repo.
|
||||||
/molecule-ai-plugin-github-app-auth/
|
/molecule-ai-plugin-github-app-auth/
|
||||||
# Tenant-image build context — populated by the workflow's
|
|
||||||
# "Pre-clone manifest deps" step. Mirrors the public manifest, holds the
|
|
||||||
# same content as the three /<>/ dirs above but namespaced under one
|
|
||||||
# parent so the Docker build context is a single COPY-friendly tree.
|
|
||||||
# Each entry is a transient working-dir, never source-of-truth, never
|
|
||||||
# committed.
|
|
||||||
/.tenant-bundle-deps/
|
|
||||||
|
|
||||||
# Internal-flavored content lives in Molecule-AI/internal — NEVER in this
|
# Internal-flavored content lives in Molecule-AI/internal — NEVER in this
|
||||||
# public monorepo. Migrated 2026-04-23 (CEO directive). The CI workflow
|
# public monorepo. Migrated 2026-04-23 (CEO directive). The CI workflow
|
||||||
|
|||||||
28
Makefile
28
Makefile
@ -1,28 +0,0 @@
|
|||||||
# Top-level Makefile — convenience wrappers around docker compose.
|
|
||||||
#
|
|
||||||
# Most molecule-core dev work happens via these shortcuts. CI doesn't
|
|
||||||
# use this Makefile; CI calls docker compose / go test directly so the
|
|
||||||
# Makefile can evolve without breaking the build.
|
|
||||||
|
|
||||||
.PHONY: help dev up down logs build test
|
|
||||||
|
|
||||||
help: ## Show this help.
|
|
||||||
@grep -E '^[a-zA-Z_-]+:.*?## ' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-12s\033[0m %s\n", $$1, $$2}'
|
|
||||||
|
|
||||||
dev: ## Start the full stack with air hot-reload for the platform service.
|
|
||||||
docker compose -f docker-compose.yml -f docker-compose.dev.yml up
|
|
||||||
|
|
||||||
up: ## Start the full stack in production-shape mode (no air, normal Dockerfile).
|
|
||||||
docker compose up
|
|
||||||
|
|
||||||
down: ## Stop the stack and remove containers (volumes preserved).
|
|
||||||
docker compose down
|
|
||||||
|
|
||||||
logs: ## Tail logs from all services (Ctrl-C to detach).
|
|
||||||
docker compose logs -f
|
|
||||||
|
|
||||||
build: ## Force a fresh build of the platform image (no cache).
|
|
||||||
docker compose build --no-cache platform
|
|
||||||
|
|
||||||
test: ## Run Go unit tests in workspace-server/.
|
|
||||||
cd workspace-server && go test -race ./...
|
|
||||||
@ -225,14 +225,14 @@ The result is not just “an agent that learns.” It is **an organization that
|
|||||||
- runtime tiers
|
- runtime tiers
|
||||||
- direct workspace inspection through terminal and files
|
- direct workspace inspection through terminal and files
|
||||||
|
|
||||||
### SaaS (via [`molecule-controlplane`](https://git.moleculesai.app/molecule-ai/molecule-controlplane))
|
### SaaS (via [`molecule-controlplane`](https://github.com/Molecule-AI/molecule-controlplane))
|
||||||
|
|
||||||
- multi-tenant on AWS EC2 + Neon (per-tenant Postgres branch) + Cloudflare Tunnels (per-tenant, no public ports)
|
- multi-tenant on AWS EC2 + Neon (per-tenant Postgres branch) + Cloudflare Tunnels (per-tenant, no public ports)
|
||||||
- WorkOS AuthKit + Stripe Checkout + Customer Portal
|
- WorkOS AuthKit + Stripe Checkout + Customer Portal
|
||||||
- AWS KMS envelope encryption (DB / Redis connection strings); AWS Secrets Manager for tenant bootstrap
|
- AWS KMS envelope encryption (DB / Redis connection strings); AWS Secrets Manager for tenant bootstrap
|
||||||
- `tenant_resources` audit table + 30-min boot-event-aware reconciler — every CF / AWS lifecycle event recorded, claim vs live state diffed
|
- `tenant_resources` audit table + 30-min boot-event-aware reconciler — every CF / AWS lifecycle event recorded, claim vs live state diffed
|
||||||
|
|
||||||
### Bring your own Claude Code session (via [`molecule-mcp-claude-channel`](https://git.moleculesai.app/molecule-ai/molecule-mcp-claude-channel))
|
### Bring your own Claude Code session (via [`molecule-mcp-claude-channel`](https://github.com/Molecule-AI/molecule-mcp-claude-channel))
|
||||||
|
|
||||||
- Claude Code plugin that bridges Molecule A2A traffic into a local Claude Code session via MCP
|
- Claude Code plugin that bridges Molecule A2A traffic into a local Claude Code session via MCP
|
||||||
- subscribe to one or more workspaces; peer messages surface as conversation turns; replies route back through Molecule's A2A
|
- subscribe to one or more workspaces; peer messages surface as conversation turns; replies route back through Molecule's A2A
|
||||||
@ -330,7 +330,7 @@ Then open `http://localhost:3000`:
|
|||||||
|
|
||||||
The current `main` branch ships the core platform, Canvas v4 (warm-paper themed), Memory v2 (pgvector semantic recall), the typed-SSOT A2A response path (RFC #2967), **eight production adapters** (Claude Code, Hermes, Gemini CLI, LangGraph, DeepAgents, CrewAI, AutoGen, OpenClaw), skill lifecycle, and operational surfaces.
|
The current `main` branch ships the core platform, Canvas v4 (warm-paper themed), Memory v2 (pgvector semantic recall), the typed-SSOT A2A response path (RFC #2967), **eight production adapters** (Claude Code, Hermes, Gemini CLI, LangGraph, DeepAgents, CrewAI, AutoGen, OpenClaw), skill lifecycle, and operational surfaces.
|
||||||
|
|
||||||
The companion private repo [`molecule-controlplane`](https://git.moleculesai.app/molecule-ai/molecule-controlplane) provides the SaaS surface — multi-tenant orchestration on EC2 + Neon + Cloudflare Tunnels, KMS envelope encryption, WorkOS auth, Stripe billing, and a `tenant_resources` audit table with a 30-min reconciler.
|
The companion private repo [`molecule-controlplane`](https://github.com/Molecule-AI/molecule-controlplane) provides the SaaS surface — multi-tenant orchestration on EC2 + Neon + Cloudflare Tunnels, KMS envelope encryption, WorkOS auth, Stripe billing, and a `tenant_resources` audit table with a 30-min reconciler.
|
||||||
|
|
||||||
Adjacent runtime work such as **NemoClaw** remains branch-level until merged, and this README keeps that distinction explicit on purpose.
|
Adjacent runtime work such as **NemoClaw** remains branch-level until merged, and this README keeps that distinction explicit on purpose.
|
||||||
|
|
||||||
|
|||||||
@ -224,14 +224,14 @@ Molecule AI 并不是要替代下面这些 framework,而是把它们纳入更
|
|||||||
- runtime tiers
|
- runtime tiers
|
||||||
- 终端与文件层面的 workspace 直接排障
|
- 终端与文件层面的 workspace 直接排障
|
||||||
|
|
||||||
### SaaS(由 [`molecule-controlplane`](https://git.moleculesai.app/molecule-ai/molecule-controlplane) 提供)
|
### SaaS(由 [`molecule-controlplane`](https://github.com/Molecule-AI/molecule-controlplane) 提供)
|
||||||
|
|
||||||
- 多租户运行在 AWS EC2 + Neon(每租户一个 Postgres branch)+ Cloudflare Tunnels(每租户一条隧道,对外不开任何端口)
|
- 多租户运行在 AWS EC2 + Neon(每租户一个 Postgres branch)+ Cloudflare Tunnels(每租户一条隧道,对外不开任何端口)
|
||||||
- WorkOS AuthKit + Stripe Checkout + Customer Portal
|
- WorkOS AuthKit + Stripe Checkout + Customer Portal
|
||||||
- AWS KMS 信封加密(DB / Redis 连接串);AWS Secrets Manager 负责租户 bootstrap
|
- AWS KMS 信封加密(DB / Redis 连接串);AWS Secrets Manager 负责租户 bootstrap
|
||||||
- `tenant_resources` 审计表 + 30 分钟 boot-event-aware reconciler —— 每个 CF / AWS lifecycle 事件都有记录,每 30 分钟比对 claim 与实际状态
|
- `tenant_resources` 审计表 + 30 分钟 boot-event-aware reconciler —— 每个 CF / AWS lifecycle 事件都有记录,每 30 分钟比对 claim 与实际状态
|
||||||
|
|
||||||
### 在 Claude Code 里直接接入(由 [`molecule-mcp-claude-channel`](https://git.moleculesai.app/molecule-ai/molecule-mcp-claude-channel) 提供)
|
### 在 Claude Code 里直接接入(由 [`molecule-mcp-claude-channel`](https://github.com/Molecule-AI/molecule-mcp-claude-channel) 提供)
|
||||||
|
|
||||||
- 把 Molecule A2A 流量桥接到本地 Claude Code 会话的 MCP 插件
|
- 把 Molecule A2A 流量桥接到本地 Claude Code 会话的 MCP 插件
|
||||||
- 订阅一个或多个 workspace;peer 的消息会以 user-turn 出现,回复会经 Molecule A2A 路由出去
|
- 订阅一个或多个 workspace;peer 的消息会以 user-turn 出现,回复会经 Molecule A2A 路由出去
|
||||||
@ -323,7 +323,7 @@ npm run dev
|
|||||||
|
|
||||||
当前 `main` 已经包含核心平台、Canvas v4(warm-paper 主题)、Memory v2(pgvector 语义召回)、typed-SSOT A2A 响应路径(RFC #2967)、**8 个正式 adapter**(Claude Code、Hermes、Gemini CLI、LangGraph、DeepAgents、CrewAI、AutoGen、OpenClaw)、skill lifecycle,以及主要运维面。
|
当前 `main` 已经包含核心平台、Canvas v4(warm-paper 主题)、Memory v2(pgvector 语义召回)、typed-SSOT A2A 响应路径(RFC #2967)、**8 个正式 adapter**(Claude Code、Hermes、Gemini CLI、LangGraph、DeepAgents、CrewAI、AutoGen、OpenClaw)、skill lifecycle,以及主要运维面。
|
||||||
|
|
||||||
配套的私有仓库 [`molecule-controlplane`](https://git.moleculesai.app/molecule-ai/molecule-controlplane) 提供 SaaS 层 —— 多租户编排(EC2 + Neon + Cloudflare Tunnels)、KMS 信封加密、WorkOS 鉴权、Stripe 计费,以及 `tenant_resources` 审计表加 30 分钟 reconciler。
|
配套的私有仓库 [`molecule-controlplane`](https://github.com/Molecule-AI/molecule-controlplane) 提供 SaaS 层 —— 多租户编排(EC2 + Neon + Cloudflare Tunnels)、KMS 信封加密、WorkOS 鉴权、Stripe 计费,以及 `tenant_resources` 审计表加 30 分钟 reconciler。
|
||||||
|
|
||||||
像 **NemoClaw** 这样的相邻 runtime 路线仍然属于分支级工作,只有合并后才会进入正式支持列表,这里会明确区分。
|
像 **NemoClaw** 这样的相邻 runtime 路线仍然属于分支级工作,只有合并后才会进入正式支持列表,这里会明确区分。
|
||||||
|
|
||||||
|
|||||||
@ -1,10 +0,0 @@
|
|||||||
# Excluded from `docker build` context. Without this, the COPY . . step in
|
|
||||||
# canvas/Dockerfile clobbers the freshly-installed node_modules with the
|
|
||||||
# host's (potentially broken / wrong-arch) copy — the @tailwindcss/oxide
|
|
||||||
# native binary disagreed and broke `next build`.
|
|
||||||
node_modules
|
|
||||||
.next
|
|
||||||
.git
|
|
||||||
*.log
|
|
||||||
.env*
|
|
||||||
!.env.example
|
|
||||||
@ -1,11 +1,7 @@
|
|||||||
FROM node:22-alpine AS builder
|
FROM node:22-alpine AS builder
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
COPY package.json package-lock.json* ./
|
COPY package.json package-lock.json* ./
|
||||||
# `npm ci` (not `install`) for lockfile-exact reproducibility.
|
RUN npm install
|
||||||
# `--include=optional` ensures the platform-specific @tailwindcss/oxide
|
|
||||||
# native binary lands — without it, postcss fails with "Cannot read
|
|
||||||
# properties of undefined (reading 'All')" at build time.
|
|
||||||
RUN npm ci --include=optional
|
|
||||||
COPY . .
|
COPY . .
|
||||||
ARG NEXT_PUBLIC_PLATFORM_URL=http://localhost:8080
|
ARG NEXT_PUBLIC_PLATFORM_URL=http://localhost:8080
|
||||||
ARG NEXT_PUBLIC_WS_URL=ws://localhost:8080/ws
|
ARG NEXT_PUBLIC_WS_URL=ws://localhost:8080/ws
|
||||||
|
|||||||
@ -17,24 +17,6 @@ import { dirname, join } from "node:path";
|
|||||||
// update one heuristic. Production is unaffected: `output: "standalone"`
|
// update one heuristic. Production is unaffected: `output: "standalone"`
|
||||||
// bakes resolved env into the build, and the marker file isn't shipped.
|
// bakes resolved env into the build, and the marker file isn't shipped.
|
||||||
loadMonorepoEnv();
|
loadMonorepoEnv();
|
||||||
// Boot-time matched-pair guard for ADMIN_TOKEN / NEXT_PUBLIC_ADMIN_TOKEN.
|
|
||||||
// When ADMIN_TOKEN is set on the workspace-server (server-side bearer
|
|
||||||
// gate, wsauth_middleware.go ~L245), the canvas MUST send the matching
|
|
||||||
// NEXT_PUBLIC_ADMIN_TOKEN as `Authorization: Bearer ...` on every API
|
|
||||||
// call. If only one is set, every workspace API call 401s silently —
|
|
||||||
// the canvas hydrates with empty data and the user sees a broken page
|
|
||||||
// with no console hint about the auth-config mismatch.
|
|
||||||
//
|
|
||||||
// Pre-fix the matched-pair contract was descriptive only (a comment in
|
|
||||||
// .env): future devs/agents could re-misconfigure with one of the two
|
|
||||||
// unset and silently 401. Closes the post-PR-#174 self-review gap.
|
|
||||||
//
|
|
||||||
// Warn-only (not exit) — production canvas Docker images bake these
|
|
||||||
// vars into the build at image-build time, and a missed pair there
|
|
||||||
// would still emit the warning at runtime via the standalone server's
|
|
||||||
// startup. Killing the process on misconfiguration would turn a
|
|
||||||
// recoverable auth issue into a hard crashloop.
|
|
||||||
checkAdminTokenPair();
|
|
||||||
|
|
||||||
const nextConfig: NextConfig = {
|
const nextConfig: NextConfig = {
|
||||||
output: "standalone",
|
output: "standalone",
|
||||||
@ -75,43 +57,6 @@ function loadMonorepoEnv() {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Boot-time matched-pair guard. Runs after .env has been loaded so the
|
|
||||||
// check sees the post-load state. The two env vars must be set or
|
|
||||||
// unset together; one-without-the-other is the silent-401 footgun.
|
|
||||||
//
|
|
||||||
// Treats empty string ("") as unset. An explicitly-empty `KEY=` in
|
|
||||||
// .env counts as set-to-empty in `process.env`, but for auth purposes
|
|
||||||
// an empty bearer token is equivalent to no token — so both
|
|
||||||
// `ADMIN_TOKEN=` and an unset ADMIN_TOKEN are equivalent relative to
|
|
||||||
// the matched-pair invariant.
|
|
||||||
//
|
|
||||||
// Returns void; side effect is the console.error warning. Kept as a
|
|
||||||
// separate function (exported) so a future test can reset env, call
|
|
||||||
// this, and assert on captured stderr.
|
|
||||||
export function checkAdminTokenPair(): void {
|
|
||||||
const serverSet = !!process.env.ADMIN_TOKEN;
|
|
||||||
const clientSet = !!process.env.NEXT_PUBLIC_ADMIN_TOKEN;
|
|
||||||
if (serverSet === clientSet) return;
|
|
||||||
// Distinct messages so the operator can tell which half is missing
|
|
||||||
// — the fix is symmetric (set the other one) but the diagnostic
|
|
||||||
// mentions which side is currently set so they don't have to grep.
|
|
||||||
if (serverSet && !clientSet) {
|
|
||||||
// eslint-disable-next-line no-console
|
|
||||||
console.error(
|
|
||||||
"[next.config] ADMIN_TOKEN is set but NEXT_PUBLIC_ADMIN_TOKEN is not — " +
|
|
||||||
"canvas will 401 against workspace-server because the bearer header " +
|
|
||||||
"is never attached. Set both to the same value, or unset both.",
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
// eslint-disable-next-line no-console
|
|
||||||
console.error(
|
|
||||||
"[next.config] NEXT_PUBLIC_ADMIN_TOKEN is set but ADMIN_TOKEN is not — " +
|
|
||||||
"workspace-server will reject the bearer because no AdminAuth gate " +
|
|
||||||
"is configured. Set both to the same value, or unset both.",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function findMonorepoRoot(start: string): string | null {
|
function findMonorepoRoot(start: string): string | null {
|
||||||
let dir = start;
|
let dir = start;
|
||||||
for (let i = 0; i < 6; i++) {
|
for (let i = 0; i < 6; i++) {
|
||||||
|
|||||||
@ -3,7 +3,6 @@ import { cookies, headers } from "next/headers";
|
|||||||
import "./globals.css";
|
import "./globals.css";
|
||||||
import { AuthGate } from "@/components/AuthGate";
|
import { AuthGate } from "@/components/AuthGate";
|
||||||
import { CookieConsent } from "@/components/CookieConsent";
|
import { CookieConsent } from "@/components/CookieConsent";
|
||||||
import { PurchaseSuccessModal } from "@/components/PurchaseSuccessModal";
|
|
||||||
import { ThemeProvider } from "@/lib/theme-provider";
|
import { ThemeProvider } from "@/lib/theme-provider";
|
||||||
import {
|
import {
|
||||||
THEME_COOKIE,
|
THEME_COOKIE,
|
||||||
@ -87,12 +86,6 @@ export default async function RootLayout({
|
|||||||
vercel preview URL, apex) pass through unchanged. */}
|
vercel preview URL, apex) pass through unchanged. */}
|
||||||
<AuthGate>{children}</AuthGate>
|
<AuthGate>{children}</AuthGate>
|
||||||
<CookieConsent />
|
<CookieConsent />
|
||||||
{/* Demo Mock #1: post-purchase success toast. Mounted at the
|
|
||||||
layout level so it persists across page state transitions
|
|
||||||
(loading → hydrated → error) without being unmounted and
|
|
||||||
losing its open-state. Reads ?purchase_success=1 from the
|
|
||||||
URL on first paint, then strips the param. */}
|
|
||||||
<PurchaseSuccessModal />
|
|
||||||
</ThemeProvider>
|
</ThemeProvider>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|||||||
@ -41,7 +41,7 @@ export default function PricingPage() {
|
|||||||
<p className="mt-2 text-ink-mid">
|
<p className="mt-2 text-ink-mid">
|
||||||
We publish the{" "}
|
We publish the{" "}
|
||||||
<a
|
<a
|
||||||
href="https://git.moleculesai.app/molecule-ai/molecule-monorepo"
|
href="https://github.com/Molecule-AI/molecule-monorepo"
|
||||||
className="text-accent underline hover:text-accent"
|
className="text-accent underline hover:text-accent"
|
||||||
>
|
>
|
||||||
full source on GitHub
|
full source on GitHub
|
||||||
|
|||||||
@ -1,10 +1,9 @@
|
|||||||
'use client';
|
'use client';
|
||||||
|
|
||||||
import { useEffect, useMemo, useCallback, useRef } from "react";
|
import { useEffect, useMemo, useCallback } from "react";
|
||||||
import { type Edge, MarkerType } from "@xyflow/react";
|
import { type Edge, MarkerType } from "@xyflow/react";
|
||||||
import { api } from "@/lib/api";
|
import { api } from "@/lib/api";
|
||||||
import { useCanvasStore } from "@/store/canvas";
|
import { useCanvasStore } from "@/store/canvas";
|
||||||
import { useSocketEvent } from "@/hooks/useSocketEvent";
|
|
||||||
import type { ActivityEntry } from "@/types/activity";
|
import type { ActivityEntry } from "@/types/activity";
|
||||||
|
|
||||||
// ── Constants ─────────────────────────────────────────────────────────────────
|
// ── Constants ─────────────────────────────────────────────────────────────────
|
||||||
@ -12,6 +11,9 @@ import type { ActivityEntry } from "@/types/activity";
|
|||||||
/** 60-minute look-back window for delegation activity */
|
/** 60-minute look-back window for delegation activity */
|
||||||
export const A2A_WINDOW_MS = 60 * 60 * 1000;
|
export const A2A_WINDOW_MS = 60 * 60 * 1000;
|
||||||
|
|
||||||
|
/** Polling interval — refresh edges every 60 seconds */
|
||||||
|
export const A2A_POLL_MS = 60 * 1_000;
|
||||||
|
|
||||||
/** Threshold for "hot" edges: < 5 minutes → animated + violet stroke */
|
/** Threshold for "hot" edges: < 5 minutes → animated + violet stroke */
|
||||||
export const A2A_HOT_MS = 5 * 60 * 1_000;
|
export const A2A_HOT_MS = 5 * 60 * 1_000;
|
||||||
|
|
||||||
@ -129,20 +131,6 @@ export function buildA2AEdges(
|
|||||||
* `a2aEdges`. Canvas.tsx merges these with topology edges and passes the
|
* `a2aEdges`. Canvas.tsx merges these with topology edges and passes the
|
||||||
* combined list to ReactFlow.
|
* combined list to ReactFlow.
|
||||||
*
|
*
|
||||||
* Update shape (issue #61 Stage 2, replaces the 60s polling loop):
|
|
||||||
* - On mount (when showA2AEdges): one HTTP fan-out per visible workspace
|
|
||||||
* (delegation rows, 60-min window). Bootstraps the local row buffer.
|
|
||||||
* - Steady state: subscribes to ACTIVITY_LOGGED via useSocketEvent.
|
|
||||||
* Each delegation event from a visible workspace is appended to the
|
|
||||||
* buffer; edges are re-derived via the existing buildA2AEdges helper.
|
|
||||||
* - showA2AEdges toggle off: clears edges + buffer.
|
|
||||||
* - Visible-ID-set change: re-bootstraps so a freshly-shown workspace
|
|
||||||
* backfills its 60-min history (existing visibleIdsKey selector
|
|
||||||
* behaviour preserved — that's the 2026-05-04 render-loop fix).
|
|
||||||
*
|
|
||||||
* No interval poll. The singleton ReconnectingSocket already owns
|
|
||||||
* reconnect / backoff / health-check; useSocketEvent inherits those.
|
|
||||||
*
|
|
||||||
* Mount this inside CanvasInner (no ReactFlow hook dependency).
|
* Mount this inside CanvasInner (no ReactFlow hook dependency).
|
||||||
*/
|
*/
|
||||||
export function A2ATopologyOverlay() {
|
export function A2ATopologyOverlay() {
|
||||||
@ -169,9 +157,7 @@ export function A2ATopologyOverlay() {
|
|||||||
// the symptom of this re-render storm.
|
// the symptom of this re-render storm.
|
||||||
//
|
//
|
||||||
// The fix is purely the dependency-stability change here; the fetch
|
// The fix is purely the dependency-stability change here; the fetch
|
||||||
// logic is unchanged. Post-#61 the polling-driven fetch is gone, but
|
// logic is unchanged.
|
||||||
// the visibleIdsKey gate is still required so a peer-discovery write
|
|
||||||
// doesn't trigger a wasteful re-bootstrap.
|
|
||||||
const visibleIdsKey = useCanvasStore((s) =>
|
const visibleIdsKey = useCanvasStore((s) =>
|
||||||
s.nodes
|
s.nodes
|
||||||
.filter((n) => !n.hidden)
|
.filter((n) => !n.hidden)
|
||||||
@ -185,42 +171,16 @@ export function A2ATopologyOverlay() {
|
|||||||
[visibleIdsKey]
|
[visibleIdsKey]
|
||||||
);
|
);
|
||||||
|
|
||||||
// Local rolling buffer of delegation rows. Pruned by A2A_WINDOW_MS on
|
// Fetch delegation activity for all visible workspaces and rebuild overlay edges.
|
||||||
// each rebuild so a long-lived session doesn't accumulate unbounded
|
const fetchAndUpdate = useCallback(async () => {
|
||||||
// history. The buffer's high-water mark is approximately:
|
|
||||||
// visibleIds.length × bootstrap-fetch-limit (500) + WS arrivals
|
|
||||||
// Real-world ceiling: ~3000 entries at the 60-min boundary, all of
|
|
||||||
// which buildA2AEdges aggregates into at most N² edges.
|
|
||||||
const bufferRef = useRef<ActivityEntry[]>([]);
|
|
||||||
// visibleIdsRef gives the WS handler the latest visible-ID set without
|
|
||||||
// re-subscribing on every render. The bus listener is registered
|
|
||||||
// exactly once per mount; subscriber-side filtering reads from this ref.
|
|
||||||
const visibleIdsRef = useRef(visibleIds);
|
|
||||||
visibleIdsRef.current = visibleIds;
|
|
||||||
|
|
||||||
// Re-derive overlay edges from the current buffer + push to store.
|
|
||||||
// Prunes by A2A_WINDOW_MS first so memory stays bounded across long
|
|
||||||
// sessions and the aggregation cost stays O(window-size).
|
|
||||||
const recomputeAndPush = useCallback(() => {
|
|
||||||
const cutoff = Date.now() - A2A_WINDOW_MS;
|
|
||||||
bufferRef.current = bufferRef.current.filter(
|
|
||||||
(r) => new Date(r.created_at).getTime() > cutoff
|
|
||||||
);
|
|
||||||
setA2AEdges(buildA2AEdges(bufferRef.current));
|
|
||||||
}, [setA2AEdges]);
|
|
||||||
|
|
||||||
// Bootstrap fan-out — one HTTP per visible workspace. Replaces the
|
|
||||||
// 60s polling loop entirely. Race-aware: any WS arrivals that landed
|
|
||||||
// in the buffer DURING the fetch (between the await and resume) are
|
|
||||||
// preserved by id-dedup-with-fetched-first ordering.
|
|
||||||
const bootstrap = useCallback(async () => {
|
|
||||||
if (visibleIds.length === 0) {
|
if (visibleIds.length === 0) {
|
||||||
bufferRef.current = [];
|
|
||||||
setA2AEdges([]);
|
setA2AEdges([]);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
const fetchedRows = (
|
// Fan-out — one request per visible workspace.
|
||||||
|
// Per-request failures are swallowed so one broken workspace doesn't blank the overlay.
|
||||||
|
const allRows = (
|
||||||
await Promise.all(
|
await Promise.all(
|
||||||
visibleIds.map((id) =>
|
visibleIds.map((id) =>
|
||||||
api
|
api
|
||||||
@ -232,76 +192,24 @@ export function A2ATopologyOverlay() {
|
|||||||
)
|
)
|
||||||
).flat();
|
).flat();
|
||||||
|
|
||||||
// Merge: fetched rows first, then any in-flight WS arrivals that
|
setA2AEdges(buildA2AEdges(allRows));
|
||||||
// accumulated during the await. Dedup by id so rows that appear
|
|
||||||
// in both paths are not double-counted in the aggregation.
|
|
||||||
const merged = [...fetchedRows, ...bufferRef.current];
|
|
||||||
const seen = new Set<string>();
|
|
||||||
bufferRef.current = merged.filter((r) => {
|
|
||||||
if (seen.has(r.id)) return false;
|
|
||||||
seen.add(r.id);
|
|
||||||
return true;
|
|
||||||
});
|
|
||||||
recomputeAndPush();
|
|
||||||
} catch {
|
} catch {
|
||||||
// Overlay failure is non-critical — canvas remains functional
|
// Overlay failure is non-critical — canvas remains functional
|
||||||
}
|
}
|
||||||
}, [visibleIds, setA2AEdges, recomputeAndPush]);
|
}, [visibleIds, setA2AEdges]);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (!showA2AEdges) {
|
if (!showA2AEdges) {
|
||||||
// Clear edges + buffer immediately when toggled off
|
// Clear edges immediately when toggled off
|
||||||
bufferRef.current = [];
|
|
||||||
setA2AEdges([]);
|
setA2AEdges([]);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
void bootstrap();
|
|
||||||
}, [showA2AEdges, bootstrap, setA2AEdges]);
|
|
||||||
|
|
||||||
// Live-update path. Filters server-side ACTIVITY_LOGGED events down
|
// Initial fetch, then poll every 60 s
|
||||||
// to delegation initiations from visible workspaces and appends each
|
void fetchAndUpdate();
|
||||||
// into the rolling buffer, re-deriving edges via buildA2AEdges.
|
const timer = setInterval(() => void fetchAndUpdate(), A2A_POLL_MS);
|
||||||
//
|
return () => clearInterval(timer);
|
||||||
// Only `method === "delegate"` rows count — the same filter
|
}, [showA2AEdges, fetchAndUpdate, setA2AEdges]);
|
||||||
// buildA2AEdges applies — so delegate_result rows arriving over the
|
|
||||||
// wire don't double-count.
|
|
||||||
useSocketEvent((msg) => {
|
|
||||||
if (!showA2AEdges) return;
|
|
||||||
if (msg.event !== "ACTIVITY_LOGGED") return;
|
|
||||||
|
|
||||||
const p = (msg.payload || {}) as Record<string, unknown>;
|
|
||||||
if (p.activity_type !== "delegation") return;
|
|
||||||
if (p.method !== "delegate") return;
|
|
||||||
|
|
||||||
const wsId = msg.workspace_id;
|
|
||||||
if (!visibleIdsRef.current.includes(wsId)) return;
|
|
||||||
|
|
||||||
// Synthesise an ActivityEntry from the WS payload so buildA2AEdges
|
|
||||||
// (which the bootstrap path also feeds) handles it identically.
|
|
||||||
const entry: ActivityEntry = {
|
|
||||||
id:
|
|
||||||
(p.id as string) ||
|
|
||||||
`ws-push-${msg.timestamp || Date.now()}-${wsId}`,
|
|
||||||
workspace_id: wsId,
|
|
||||||
activity_type: "delegation",
|
|
||||||
source_id: (p.source_id as string | null) ?? null,
|
|
||||||
target_id: (p.target_id as string | null) ?? null,
|
|
||||||
method: "delegate",
|
|
||||||
summary: (p.summary as string | null) ?? null,
|
|
||||||
request_body: null,
|
|
||||||
response_body: null,
|
|
||||||
duration_ms: (p.duration_ms as number | null) ?? null,
|
|
||||||
status: (p.status as string) || "ok",
|
|
||||||
error_detail: null,
|
|
||||||
created_at:
|
|
||||||
(p.created_at as string) ||
|
|
||||||
msg.timestamp ||
|
|
||||||
new Date().toISOString(),
|
|
||||||
};
|
|
||||||
|
|
||||||
bufferRef.current = [...bufferRef.current, entry];
|
|
||||||
recomputeAndPush();
|
|
||||||
});
|
|
||||||
|
|
||||||
// Pure side-effect — renders nothing
|
// Pure side-effect — renders nothing
|
||||||
return null;
|
return null;
|
||||||
|
|||||||
@ -3,7 +3,6 @@
|
|||||||
import { useState, useEffect, useCallback, useRef } from "react";
|
import { useState, useEffect, useCallback, useRef } from "react";
|
||||||
import { useCanvasStore } from "@/store/canvas";
|
import { useCanvasStore } from "@/store/canvas";
|
||||||
import { api } from "@/lib/api";
|
import { api } from "@/lib/api";
|
||||||
import { useSocketEvent } from "@/hooks/useSocketEvent";
|
|
||||||
import { COMM_TYPE_LABELS } from "@/lib/design-tokens";
|
import { COMM_TYPE_LABELS } from "@/lib/design-tokens";
|
||||||
|
|
||||||
interface Communication {
|
interface Communication {
|
||||||
@ -19,71 +18,32 @@ interface Communication {
|
|||||||
durationMs: number | null;
|
durationMs: number | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Workspace-server `ACTIVITY_LOGGED` payload shape. Pulled out so the
|
|
||||||
* WS handler below has a typed view of the same fields the HTTP
|
|
||||||
* bootstrap consumes — drift between the two paths is a class of bug
|
|
||||||
* AgentCommsPanel hit historically. */
|
|
||||||
interface ActivityLoggedPayload {
|
|
||||||
id?: string;
|
|
||||||
activity_type?: string;
|
|
||||||
source_id?: string | null;
|
|
||||||
target_id?: string | null;
|
|
||||||
workspace_id?: string;
|
|
||||||
summary?: string | null;
|
|
||||||
status?: string;
|
|
||||||
duration_ms?: number | null;
|
|
||||||
created_at?: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Fan-out cap for the bootstrap HTTP fetch on mount / on visibility
|
|
||||||
* re-open. Kept at 3 (carried over from the 2026-05-04 fix) so a
|
|
||||||
* freshly-mounted overlay on a 15-workspace tenant only spends 3
|
|
||||||
* round-trips bootstrapping. Live updates after that arrive via the
|
|
||||||
* WS subscription below — no polling, no fan-out to maintain. */
|
|
||||||
const BOOTSTRAP_FAN_OUT_CAP = 3;
|
|
||||||
|
|
||||||
/** Cap on the rendered list. Bootstrap + every WS push prepends, the
|
|
||||||
* list is sliced to this size after each update. Mirrors the prior
|
|
||||||
* polling-loop behaviour. */
|
|
||||||
const COMMS_RENDER_CAP = 20;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Overlay showing recent A2A communications between workspaces.
|
* Overlay showing recent A2A communications between workspaces.
|
||||||
*
|
* Renders as a floating log panel that auto-updates.
|
||||||
* Update shape (issue #61 Stage 1, replaces the 30s polling loop):
|
|
||||||
* - On mount (when visible): one HTTP bootstrap per online workspace,
|
|
||||||
* capped at BOOTSTRAP_FAN_OUT_CAP. Yields the initial recent-comms
|
|
||||||
* window without waiting for live events.
|
|
||||||
* - Steady state: subscribes to ACTIVITY_LOGGED via useSocketEvent.
|
|
||||||
* Each event with a matching activity_type from a visible online
|
|
||||||
* workspace gets synthesised into a Communication and prepended.
|
|
||||||
* - Visibility re-open: re-bootstraps so the user sees the freshest
|
|
||||||
* window even if WS was idle while collapsed.
|
|
||||||
*
|
|
||||||
* No interval poll. The singleton ReconnectingSocket in `store/socket.ts`
|
|
||||||
* already owns reconnect/backoff/health-check, and `useSocketEvent`
|
|
||||||
* inherits those guarantees. If WS is genuinely unhealthy, the overlay
|
|
||||||
* shows the bootstrap snapshot until the next visibility re-open or
|
|
||||||
* the next WS reconnect (which fires its own rehydrate burst).
|
|
||||||
*/
|
*/
|
||||||
export function CommunicationOverlay() {
|
export function CommunicationOverlay() {
|
||||||
const [comms, setComms] = useState<Communication[]>([]);
|
const [comms, setComms] = useState<Communication[]>([]);
|
||||||
const [visible, setVisible] = useState(true);
|
const [visible, setVisible] = useState(true);
|
||||||
const selectedNodeId = useCanvasStore((s) => s.selectedNodeId);
|
const selectedNodeId = useCanvasStore((s) => s.selectedNodeId);
|
||||||
const nodes = useCanvasStore((s) => s.nodes);
|
const nodes = useCanvasStore((s) => s.nodes);
|
||||||
// nodesRef gives the WS handler current node-name resolution without
|
|
||||||
// re-subscribing on every node-list change. The bus listener is
|
|
||||||
// registered exactly once per mount; subscriber-side filtering reads
|
|
||||||
// the latest value via this ref.
|
|
||||||
const nodesRef = useRef(nodes);
|
const nodesRef = useRef(nodes);
|
||||||
nodesRef.current = nodes;
|
nodesRef.current = nodes;
|
||||||
|
|
||||||
const bootstrapComms = useCallback(async () => {
|
const fetchComms = useCallback(async () => {
|
||||||
try {
|
try {
|
||||||
|
// Fan-out cap: each polled workspace = 1 round-trip. The platform
|
||||||
|
// rate limits at 600 req/min/IP; combined with heartbeats + other
|
||||||
|
// canvas polling, every workspace polled here costs ~6 req/min
|
||||||
|
// (1 every 30s × 1 per workspace). Capping at 3 keeps this
|
||||||
|
// overlay's footprint at 18 req/min worst case — well under
|
||||||
|
// budget even with 8+ workspaces visible. Caught 2026-05-04 when
|
||||||
|
// a user with 8+ workspaces (Design Director + 6 sub-agents +
|
||||||
|
// 3 standalones) saw sustained 429s in canvas console.
|
||||||
const onlineNodes = nodesRef.current.filter((n) => n.data.status === "online");
|
const onlineNodes = nodesRef.current.filter((n) => n.data.status === "online");
|
||||||
const allComms: Communication[] = [];
|
const allComms: Communication[] = [];
|
||||||
|
|
||||||
for (const node of onlineNodes.slice(0, BOOTSTRAP_FAN_OUT_CAP)) {
|
for (const node of onlineNodes.slice(0, 3)) {
|
||||||
try {
|
try {
|
||||||
const activities = await api.get<Array<{
|
const activities = await api.get<Array<{
|
||||||
id: string;
|
id: string;
|
||||||
@ -99,8 +59,8 @@ export function CommunicationOverlay() {
|
|||||||
|
|
||||||
for (const a of activities) {
|
for (const a of activities) {
|
||||||
if (a.activity_type === "a2a_send" || a.activity_type === "a2a_receive") {
|
if (a.activity_type === "a2a_send" || a.activity_type === "a2a_receive") {
|
||||||
const sourceNode = nodesRef.current.find((n) => n.id === (a.source_id || a.workspace_id));
|
const sourceNode = nodes.find((n) => n.id === (a.source_id || a.workspace_id));
|
||||||
const targetNode = nodesRef.current.find((n) => n.id === (a.target_id || ""));
|
const targetNode = nodes.find((n) => n.id === (a.target_id || ""));
|
||||||
allComms.push({
|
allComms.push({
|
||||||
id: a.id,
|
id: a.id,
|
||||||
sourceId: a.source_id || a.workspace_id,
|
sourceId: a.source_id || a.workspace_id,
|
||||||
@ -116,12 +76,11 @@ export function CommunicationOverlay() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch {
|
} catch {
|
||||||
// Per-workspace failures must not blank the panel — the same
|
// Skip workspaces that fail
|
||||||
// robustness the polling version had.
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Newest-first with id-dedup, capped at COMMS_RENDER_CAP.
|
// Sort by timestamp, newest first, dedupe
|
||||||
const seen = new Set<string>();
|
const seen = new Set<string>();
|
||||||
const sorted = allComms
|
const sorted = allComms
|
||||||
.sort((a, b) => b.timestamp.localeCompare(a.timestamp))
|
.sort((a, b) => b.timestamp.localeCompare(a.timestamp))
|
||||||
@ -130,78 +89,29 @@ export function CommunicationOverlay() {
|
|||||||
seen.add(c.id);
|
seen.add(c.id);
|
||||||
return true;
|
return true;
|
||||||
})
|
})
|
||||||
.slice(0, COMMS_RENDER_CAP);
|
.slice(0, 20);
|
||||||
|
|
||||||
setComms(sorted);
|
setComms(sorted);
|
||||||
} catch {
|
} catch {
|
||||||
// Bootstrap failure is non-blocking — the WS subscription below
|
// Silently handle API errors
|
||||||
// will populate the panel as live events arrive.
|
|
||||||
}
|
}
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
// Bootstrap once on mount + every time the user re-opens after a
|
|
||||||
// collapse. Closed-panel state intentionally drops live updates so
|
|
||||||
// the panel doesn't churn invisible state — the next open reloads.
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
|
// Gate polling on visibility — when the user collapses the overlay
|
||||||
|
// the data isn't being read, so the per-workspace fan-out becomes
|
||||||
|
// pure rate-limit overhead. Pre-fix this overlay polled regardless
|
||||||
|
// of whether the panel was shown, costing ~36 req/min from a
|
||||||
|
// hidden surface.
|
||||||
if (!visible) return;
|
if (!visible) return;
|
||||||
bootstrapComms();
|
fetchComms();
|
||||||
}, [bootstrapComms, visible]);
|
// 30s cadence (was 10s). At 3-workspace fan-out that's 6 req/min
|
||||||
|
// worst case from this overlay. Combined with heartbeats (~30/min)
|
||||||
// Live-update path. Filters server-side ACTIVITY_LOGGED events down
|
// and other canvas polling, leaves ample headroom under the 600/
|
||||||
// to the comm-overlay-relevant subset and prepends each into the
|
// min/IP server-side rate limit even at 8+ workspace tenants.
|
||||||
// rendered list with the same dedup the bootstrap path uses.
|
const interval = setInterval(fetchComms, 30000);
|
||||||
//
|
return () => clearInterval(interval);
|
||||||
// Scope guard: ignore events for workspaces not in the visible online
|
}, [fetchComms, visible]);
|
||||||
// set, so a user collapsing one workspace doesn't see its comms
|
|
||||||
// continue to scroll in. Same shape the bootstrap path applies.
|
|
||||||
useSocketEvent((msg) => {
|
|
||||||
if (!visible) return;
|
|
||||||
if (msg.event !== "ACTIVITY_LOGGED") return;
|
|
||||||
|
|
||||||
const p = (msg.payload || {}) as ActivityLoggedPayload;
|
|
||||||
const type = p.activity_type;
|
|
||||||
if (type !== "a2a_send" && type !== "a2a_receive" && type !== "task_update") return;
|
|
||||||
|
|
||||||
const wsId = msg.workspace_id;
|
|
||||||
const onlineSet = new Set(
|
|
||||||
nodesRef.current.filter((n) => n.data.status === "online").map((n) => n.id),
|
|
||||||
);
|
|
||||||
if (!onlineSet.has(wsId)) return;
|
|
||||||
|
|
||||||
const sourceId = p.source_id || wsId;
|
|
||||||
const targetId = p.target_id || "";
|
|
||||||
const sourceNode = nodesRef.current.find((n) => n.id === sourceId);
|
|
||||||
const targetNode = nodesRef.current.find((n) => n.id === targetId);
|
|
||||||
|
|
||||||
const incoming: Communication = {
|
|
||||||
id: p.id || `${msg.timestamp || Date.now()}:${sourceId}:${targetId}`,
|
|
||||||
sourceId,
|
|
||||||
targetId,
|
|
||||||
sourceName: sourceNode?.data.name || "Unknown",
|
|
||||||
targetName: targetNode?.data.name || "Unknown",
|
|
||||||
type: type as Communication["type"],
|
|
||||||
summary: p.summary || "",
|
|
||||||
status: p.status || "ok",
|
|
||||||
timestamp: p.created_at || msg.timestamp || new Date().toISOString(),
|
|
||||||
durationMs: p.duration_ms ?? null,
|
|
||||||
};
|
|
||||||
|
|
||||||
setComms((prev) => {
|
|
||||||
// Prepend, dedup by id, re-cap. Functional setState is necessary
|
|
||||||
// because two ACTIVITY_LOGGED events arriving in the same React
|
|
||||||
// batch would otherwise read a stale `comms` from the closure.
|
|
||||||
const seen = new Set<string>();
|
|
||||||
const merged = [incoming, ...prev]
|
|
||||||
.sort((a, b) => b.timestamp.localeCompare(a.timestamp))
|
|
||||||
.filter((c) => {
|
|
||||||
if (seen.has(c.id)) return false;
|
|
||||||
seen.add(c.id);
|
|
||||||
return true;
|
|
||||||
})
|
|
||||||
.slice(0, COMMS_RENDER_CAP);
|
|
||||||
return merged;
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
if (!visible || comms.length === 0) {
|
if (!visible || comms.length === 0) {
|
||||||
return (
|
return (
|
||||||
|
|||||||
@ -1,175 +0,0 @@
|
|||||||
"use client";
|
|
||||||
|
|
||||||
/**
|
|
||||||
* PurchaseSuccessModal — demo-only post-purchase confirmation.
|
|
||||||
*
|
|
||||||
* Mounted on the canvas root (`app/page.tsx`). On first paint it inspects
|
|
||||||
* `?purchase_success=1[&item=<name>]` on the current URL. If present, it
|
|
||||||
* renders a centred modal styled after `ConfirmDialog`, schedules a 5s
|
|
||||||
* auto-dismiss, and rewrites the URL via `history.replaceState` to drop
|
|
||||||
* the params so a refresh after dismiss does NOT re-show the modal.
|
|
||||||
*
|
|
||||||
* Mock for the funding demo — there is no real billing surface behind
|
|
||||||
* this. The marketplace "Purchase" button on the landing page redirects
|
|
||||||
* here with the params; this modal is the only thing the user sees of
|
|
||||||
* the "transaction".
|
|
||||||
*
|
|
||||||
* Styling matches the warm-paper @theme tokens (surface-sunken / line /
|
|
||||||
* ink / good) so it tracks light + dark without per-mode overrides.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import { useEffect, useRef, useState } from "react";
|
|
||||||
import { createPortal } from "react-dom";
|
|
||||||
|
|
||||||
const AUTO_DISMISS_MS = 5000;
|
|
||||||
|
|
||||||
function readPurchaseParams(): { open: boolean; item: string | null } {
|
|
||||||
if (typeof window === "undefined") return { open: false, item: null };
|
|
||||||
const sp = new URLSearchParams(window.location.search);
|
|
||||||
const flag = sp.get("purchase_success");
|
|
||||||
if (flag !== "1" && flag !== "true") return { open: false, item: null };
|
|
||||||
return { open: true, item: sp.get("item") };
|
|
||||||
}
|
|
||||||
|
|
||||||
function stripPurchaseParams() {
|
|
||||||
if (typeof window === "undefined") return;
|
|
||||||
const url = new URL(window.location.href);
|
|
||||||
url.searchParams.delete("purchase_success");
|
|
||||||
url.searchParams.delete("item");
|
|
||||||
// replaceState (not pushState) so back-button doesn't return to the
|
|
||||||
// pre-strip URL and re-trigger the modal.
|
|
||||||
window.history.replaceState({}, "", url.toString());
|
|
||||||
}
|
|
||||||
|
|
||||||
export function PurchaseSuccessModal() {
|
|
||||||
const [open, setOpen] = useState(false);
|
|
||||||
const [item, setItem] = useState<string | null>(null);
|
|
||||||
const [mounted, setMounted] = useState(false);
|
|
||||||
const dialogRef = useRef<HTMLDivElement>(null);
|
|
||||||
|
|
||||||
// Read the URL params once on mount. We don't subscribe to navigation —
|
|
||||||
// this modal is a one-shot for the demo redirect, not a persistent
|
|
||||||
// listener.
|
|
||||||
useEffect(() => {
|
|
||||||
setMounted(true);
|
|
||||||
const { open: shouldOpen, item: itemName } = readPurchaseParams();
|
|
||||||
if (shouldOpen) {
|
|
||||||
setOpen(true);
|
|
||||||
setItem(itemName);
|
|
||||||
// Clean the URL immediately so a refresh after the modal is closed
|
|
||||||
// (or even while it's still open) does NOT re-trigger it.
|
|
||||||
stripPurchaseParams();
|
|
||||||
}
|
|
||||||
}, []);
|
|
||||||
|
|
||||||
// Auto-dismiss timer + Escape handler.
|
|
||||||
useEffect(() => {
|
|
||||||
if (!open) return;
|
|
||||||
const t = window.setTimeout(() => setOpen(false), AUTO_DISMISS_MS);
|
|
||||||
const onKey = (e: KeyboardEvent) => {
|
|
||||||
if (e.key === "Escape") setOpen(false);
|
|
||||||
};
|
|
||||||
window.addEventListener("keydown", onKey);
|
|
||||||
// Focus the close button so keyboard users land on it after redirect.
|
|
||||||
const raf = requestAnimationFrame(() => {
|
|
||||||
dialogRef.current?.querySelector<HTMLButtonElement>("button")?.focus();
|
|
||||||
});
|
|
||||||
return () => {
|
|
||||||
window.clearTimeout(t);
|
|
||||||
window.removeEventListener("keydown", onKey);
|
|
||||||
cancelAnimationFrame(raf);
|
|
||||||
};
|
|
||||||
}, [open]);
|
|
||||||
|
|
||||||
if (!open || !mounted) return null;
|
|
||||||
|
|
||||||
const itemLabel = item ? decodeURIComponent(item) : "Your new agent";
|
|
||||||
|
|
||||||
return createPortal(
|
|
||||||
<div
|
|
||||||
className="fixed inset-0 z-[9999] flex items-center justify-center"
|
|
||||||
data-testid="purchase-success-modal"
|
|
||||||
>
|
|
||||||
{/* Backdrop — click closes, matches ConfirmDialog backdrop. */}
|
|
||||||
<div
|
|
||||||
className="absolute inset-0 bg-black/60 backdrop-blur-sm"
|
|
||||||
onClick={() => setOpen(false)}
|
|
||||||
aria-hidden="true"
|
|
||||||
/>
|
|
||||||
|
|
||||||
<div
|
|
||||||
ref={dialogRef}
|
|
||||||
role="dialog"
|
|
||||||
aria-modal="true"
|
|
||||||
aria-labelledby="purchase-success-title"
|
|
||||||
className="relative bg-surface-sunken border border-line rounded-xl shadow-2xl shadow-black/50 max-w-[420px] w-full mx-4 overflow-hidden"
|
|
||||||
>
|
|
||||||
<div className="px-6 pt-6 pb-4">
|
|
||||||
<div className="flex items-start gap-4">
|
|
||||||
{/* Success glyph — uses --color-good so it tracks the theme.
|
|
||||||
Inline SVG over an emoji so it stays readable + on-brand
|
|
||||||
in both light and dark. */}
|
|
||||||
<div
|
|
||||||
className="flex h-10 w-10 flex-shrink-0 items-center justify-center rounded-full"
|
|
||||||
style={{
|
|
||||||
background:
|
|
||||||
"color-mix(in srgb, var(--color-good) 15%, transparent)",
|
|
||||||
color: "var(--color-good)",
|
|
||||||
}}
|
|
||||||
>
|
|
||||||
<svg
|
|
||||||
width="22"
|
|
||||||
height="22"
|
|
||||||
viewBox="0 0 24 24"
|
|
||||||
fill="none"
|
|
||||||
aria-hidden="true"
|
|
||||||
>
|
|
||||||
<circle
|
|
||||||
cx="12"
|
|
||||||
cy="12"
|
|
||||||
r="10"
|
|
||||||
stroke="currentColor"
|
|
||||||
strokeWidth="1.5"
|
|
||||||
/>
|
|
||||||
<path
|
|
||||||
d="M7.5 12.5L10.5 15.5L16.5 9.5"
|
|
||||||
stroke="currentColor"
|
|
||||||
strokeWidth="1.8"
|
|
||||||
strokeLinecap="round"
|
|
||||||
strokeLinejoin="round"
|
|
||||||
/>
|
|
||||||
</svg>
|
|
||||||
</div>
|
|
||||||
<div className="flex-1">
|
|
||||||
<h3
|
|
||||||
id="purchase-success-title"
|
|
||||||
className="text-base font-semibold text-ink"
|
|
||||||
>
|
|
||||||
Purchase successful
|
|
||||||
</h3>
|
|
||||||
<p className="mt-1.5 text-[13px] leading-relaxed text-ink-mid">
|
|
||||||
<span className="font-medium text-ink">{itemLabel}</span> has
|
|
||||||
been added to your workspace. Provisioning starts in the
|
|
||||||
background — you can keep working while it spins up.
|
|
||||||
</p>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div className="flex items-center justify-between gap-3 px-6 py-3 border-t border-line bg-surface/50">
|
|
||||||
<span className="font-mono text-[10.5px] uppercase tracking-[0.12em] text-ink-soft">
|
|
||||||
auto-dismiss · {AUTO_DISMISS_MS / 1000}s
|
|
||||||
</span>
|
|
||||||
<button
|
|
||||||
type="button"
|
|
||||||
onClick={() => setOpen(false)}
|
|
||||||
className="px-3.5 py-1.5 text-[13px] rounded-lg bg-accent hover:bg-accent-strong text-white transition-colors focus:outline-none focus-visible:ring-2 focus-visible:ring-offset-2 focus-visible:ring-offset-surface-sunken focus-visible:ring-accent/60"
|
|
||||||
>
|
|
||||||
Close
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>,
|
|
||||||
document.body,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
@ -41,10 +41,6 @@ vi.mock("@/store/canvas", () => ({
|
|||||||
// ── Imports (after mocks) ─────────────────────────────────────────────────────
|
// ── Imports (after mocks) ─────────────────────────────────────────────────────
|
||||||
|
|
||||||
import { api } from "@/lib/api";
|
import { api } from "@/lib/api";
|
||||||
import {
|
|
||||||
emitSocketEvent,
|
|
||||||
_resetSocketEventListenersForTests,
|
|
||||||
} from "@/store/socket-events";
|
|
||||||
import {
|
import {
|
||||||
buildA2AEdges,
|
buildA2AEdges,
|
||||||
formatA2ARelativeTime,
|
formatA2ARelativeTime,
|
||||||
@ -346,151 +342,6 @@ describe("A2ATopologyOverlay component", () => {
|
|||||||
expect(mockGet.mock.calls.length).toBe(callsAfterMount);
|
expect(mockGet.mock.calls.length).toBe(callsAfterMount);
|
||||||
});
|
});
|
||||||
|
|
||||||
// ── #61 Stage 2: ACTIVITY_LOGGED subscription tests ────────────────────────
|
|
||||||
//
|
|
||||||
// Pin the post-#61 behaviour: WS push for delegation contributes to
|
|
||||||
// the overlay's edge buffer with NO additional HTTP fetch. Same shape
|
|
||||||
// as Stage 1 (CommunicationOverlay).
|
|
||||||
|
|
||||||
describe("#61 stage 2 — ACTIVITY_LOGGED subscription", () => {
|
|
||||||
beforeEach(() => {
|
|
||||||
_resetSocketEventListenersForTests();
|
|
||||||
});
|
|
||||||
afterEach(() => {
|
|
||||||
_resetSocketEventListenersForTests();
|
|
||||||
});
|
|
||||||
|
|
||||||
function emitDelegation(overrides: {
|
|
||||||
workspaceId?: string;
|
|
||||||
sourceId?: string;
|
|
||||||
targetId?: string;
|
|
||||||
method?: string;
|
|
||||||
activityType?: string;
|
|
||||||
} = {}) {
|
|
||||||
// Use Date.now() (real time, fake-timer-frozen) rather than the
|
|
||||||
// hardcoded NOW constant — buildA2AEdges prunes by Date.now() -
|
|
||||||
// A2A_WINDOW_MS, so a row dated against the wrong epoch silently
|
|
||||||
// falls outside the window and the test fails for a confusing
|
|
||||||
// reason ("edges array empty" vs "filter dropped my row").
|
|
||||||
const realNow = Date.now();
|
|
||||||
emitSocketEvent({
|
|
||||||
event: "ACTIVITY_LOGGED",
|
|
||||||
workspace_id: overrides.workspaceId ?? "ws-a",
|
|
||||||
timestamp: new Date(realNow).toISOString(),
|
|
||||||
payload: {
|
|
||||||
id: `act-${Math.random().toString(36).slice(2)}`,
|
|
||||||
activity_type: overrides.activityType ?? "delegation",
|
|
||||||
method: overrides.method ?? "delegate",
|
|
||||||
source_id: overrides.sourceId ?? "ws-a",
|
|
||||||
target_id: overrides.targetId ?? "ws-b",
|
|
||||||
status: "ok",
|
|
||||||
created_at: new Date(realNow - 30_000).toISOString(),
|
|
||||||
},
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
it("does NOT poll on a 60s interval after bootstrap (post-#61)", async () => {
|
|
||||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
||||||
mockGet.mockResolvedValue([] as any);
|
|
||||||
render(<A2ATopologyOverlay />);
|
|
||||||
await act(async () => { await Promise.resolve(); });
|
|
||||||
const callsAfterBootstrap = mockGet.mock.calls.length;
|
|
||||||
expect(callsAfterBootstrap).toBe(2); // ws-a + ws-b
|
|
||||||
|
|
||||||
// Pre-#61: a 60s clock tick would fire a fresh fan-out (2 more
|
|
||||||
// calls). Post-#61: no interval, no extra calls.
|
|
||||||
await act(async () => {
|
|
||||||
vi.advanceTimersByTime(120_000);
|
|
||||||
});
|
|
||||||
expect(mockGet.mock.calls.length).toBe(callsAfterBootstrap);
|
|
||||||
});
|
|
||||||
|
|
||||||
it("WS push for a delegation event from a visible workspace updates edges with NO HTTP call", async () => {
|
|
||||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
||||||
mockGet.mockResolvedValue([] as any);
|
|
||||||
render(<A2ATopologyOverlay />);
|
|
||||||
await act(async () => { await Promise.resolve(); await Promise.resolve(); });
|
|
||||||
mockGet.mockClear();
|
|
||||||
mockStoreState.setA2AEdges.mockClear();
|
|
||||||
|
|
||||||
await act(async () => {
|
|
||||||
emitDelegation({ sourceId: "ws-a", targetId: "ws-b" });
|
|
||||||
});
|
|
||||||
|
|
||||||
// Edges-set called with at least one a2a edge for the new push.
|
|
||||||
const calls = mockStoreState.setA2AEdges.mock.calls;
|
|
||||||
expect(calls.length).toBeGreaterThanOrEqual(1);
|
|
||||||
const lastCall = calls[calls.length - 1][0] as Array<{ id: string }>;
|
|
||||||
expect(lastCall.some((e) => e.id === "a2a-ws-a-ws-b")).toBe(true);
|
|
||||||
|
|
||||||
// Critical: no HTTP fetch fired during the WS path.
|
|
||||||
expect(mockGet).not.toHaveBeenCalled();
|
|
||||||
});
|
|
||||||
|
|
||||||
it("WS push for a non-delegation activity_type is ignored", async () => {
|
|
||||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
||||||
mockGet.mockResolvedValue([] as any);
|
|
||||||
render(<A2ATopologyOverlay />);
|
|
||||||
await act(async () => { await Promise.resolve(); });
|
|
||||||
mockStoreState.setA2AEdges.mockClear();
|
|
||||||
|
|
||||||
await act(async () => {
|
|
||||||
emitDelegation({ activityType: "a2a_send" });
|
|
||||||
});
|
|
||||||
|
|
||||||
// setA2AEdges must not be called by the WS handler — the only
|
|
||||||
// setA2AEdges calls in this test came from the initial bootstrap.
|
|
||||||
expect(mockStoreState.setA2AEdges).not.toHaveBeenCalled();
|
|
||||||
});
|
|
||||||
|
|
||||||
it("WS push for a delegate_result row is ignored (mirrors buildA2AEdges filter)", async () => {
|
|
||||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
||||||
mockGet.mockResolvedValue([] as any);
|
|
||||||
render(<A2ATopologyOverlay />);
|
|
||||||
await act(async () => { await Promise.resolve(); });
|
|
||||||
mockStoreState.setA2AEdges.mockClear();
|
|
||||||
|
|
||||||
await act(async () => {
|
|
||||||
emitDelegation({ method: "delegate_result" });
|
|
||||||
});
|
|
||||||
|
|
||||||
// delegate_result rows do not contribute to the edge count — they
|
|
||||||
// are completion signals, not initiations.
|
|
||||||
expect(mockStoreState.setA2AEdges).not.toHaveBeenCalled();
|
|
||||||
});
|
|
||||||
|
|
||||||
it("WS push from a hidden workspace is ignored", async () => {
|
|
||||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
||||||
mockGet.mockResolvedValue([] as any);
|
|
||||||
render(<A2ATopologyOverlay />);
|
|
||||||
await act(async () => { await Promise.resolve(); });
|
|
||||||
mockStoreState.setA2AEdges.mockClear();
|
|
||||||
|
|
||||||
await act(async () => {
|
|
||||||
emitDelegation({ workspaceId: "ws-hidden" });
|
|
||||||
});
|
|
||||||
|
|
||||||
expect(mockStoreState.setA2AEdges).not.toHaveBeenCalled();
|
|
||||||
});
|
|
||||||
|
|
||||||
it("WS push while showA2AEdges is false is ignored", async () => {
|
|
||||||
mockStoreState.showA2AEdges = false;
|
|
||||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
||||||
mockGet.mockResolvedValue([] as any);
|
|
||||||
render(<A2ATopologyOverlay />);
|
|
||||||
// The mount path with showA2AEdges=false calls setA2AEdges([])
|
|
||||||
// once — clear that to isolate the WS path.
|
|
||||||
mockStoreState.setA2AEdges.mockClear();
|
|
||||||
|
|
||||||
await act(async () => {
|
|
||||||
emitDelegation();
|
|
||||||
});
|
|
||||||
|
|
||||||
expect(mockStoreState.setA2AEdges).not.toHaveBeenCalled();
|
|
||||||
expect(mockGet).not.toHaveBeenCalled();
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
it("re-fetches when the visible ID set actually changes", async () => {
|
it("re-fetches when the visible ID set actually changes", async () => {
|
||||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
mockGet.mockResolvedValue([] as any);
|
mockGet.mockResolvedValue([] as any);
|
||||||
|
|||||||
@ -36,10 +36,6 @@ vi.mock("@/hooks/useWorkspaceName", () => ({
|
|||||||
useWorkspaceName: () => () => "Test WS",
|
useWorkspaceName: () => () => "Test WS",
|
||||||
}));
|
}));
|
||||||
|
|
||||||
import {
|
|
||||||
emitSocketEvent,
|
|
||||||
_resetSocketEventListenersForTests,
|
|
||||||
} from "@/store/socket-events";
|
|
||||||
import { ActivityTab } from "../tabs/ActivityTab";
|
import { ActivityTab } from "../tabs/ActivityTab";
|
||||||
|
|
||||||
// ── Fixtures ──────────────────────────────────────────────────────────────────
|
// ── Fixtures ──────────────────────────────────────────────────────────────────
|
||||||
@ -362,191 +358,6 @@ describe("ActivityTab — refresh button", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
// ── Suite 6.5: ACTIVITY_LOGGED subscription (#61 stage 3) ─────────────────────
|
|
||||||
//
|
|
||||||
// Pin the post-#61 behaviour: WS push extends the rendered list with NO
|
|
||||||
// additional HTTP fetch. The 5s polling loop is gone; live updates
|
|
||||||
// arrive over the WebSocket bus.
|
|
||||||
|
|
||||||
describe("ActivityTab — #61 stage 3: ACTIVITY_LOGGED subscription", () => {
|
|
||||||
beforeEach(() => {
|
|
||||||
vi.clearAllMocks();
|
|
||||||
mockGet.mockResolvedValue([]);
|
|
||||||
_resetSocketEventListenersForTests();
|
|
||||||
});
|
|
||||||
afterEach(() => {
|
|
||||||
cleanup();
|
|
||||||
_resetSocketEventListenersForTests();
|
|
||||||
});
|
|
||||||
|
|
||||||
function emitActivity(overrides: {
|
|
||||||
workspaceId?: string;
|
|
||||||
activityType?: string;
|
|
||||||
summary?: string;
|
|
||||||
id?: string;
|
|
||||||
} = {}) {
|
|
||||||
const realNow = Date.now();
|
|
||||||
emitSocketEvent({
|
|
||||||
event: "ACTIVITY_LOGGED",
|
|
||||||
workspace_id: overrides.workspaceId ?? "ws-1",
|
|
||||||
timestamp: new Date(realNow).toISOString(),
|
|
||||||
payload: {
|
|
||||||
id: overrides.id ?? `act-${Math.random().toString(36).slice(2)}`,
|
|
||||||
activity_type: overrides.activityType ?? "agent_log",
|
|
||||||
source_id: null,
|
|
||||||
target_id: null,
|
|
||||||
method: null,
|
|
||||||
summary: overrides.summary ?? "live-pushed",
|
|
||||||
status: "ok",
|
|
||||||
created_at: new Date(realNow - 5_000).toISOString(),
|
|
||||||
},
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
it("WS push for matching workspace prepends to the list with NO HTTP call", async () => {
|
|
||||||
render(<ActivityTab workspaceId="ws-1" />);
|
|
||||||
await waitFor(() => {
|
|
||||||
expect(screen.getByText(/0 activities|no activity/i)).toBeTruthy();
|
|
||||||
});
|
|
||||||
mockGet.mockClear();
|
|
||||||
|
|
||||||
await act(async () => {
|
|
||||||
emitActivity({ summary: "live-row-from-bus" });
|
|
||||||
});
|
|
||||||
|
|
||||||
await waitFor(() => {
|
|
||||||
expect(screen.getByText(/live-row-from-bus/)).toBeTruthy();
|
|
||||||
});
|
|
||||||
expect(mockGet).not.toHaveBeenCalled();
|
|
||||||
});
|
|
||||||
|
|
||||||
it("WS push for a different workspace is ignored", async () => {
|
|
||||||
render(<ActivityTab workspaceId="ws-1" />);
|
|
||||||
await waitFor(() => screen.getByText(/no activity/i));
|
|
||||||
|
|
||||||
await act(async () => {
|
|
||||||
emitActivity({
|
|
||||||
workspaceId: "ws-other",
|
|
||||||
summary: "should-not-render-other-ws",
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
expect(screen.queryByText(/should-not-render-other-ws/)).toBeNull();
|
|
||||||
});
|
|
||||||
|
|
||||||
it("WS push respects the active filter — non-matching activity_type is ignored", async () => {
|
|
||||||
render(<ActivityTab workspaceId="ws-1" />);
|
|
||||||
await waitFor(() => screen.getByText(/no activity/i));
|
|
||||||
|
|
||||||
// Apply "Tasks" filter.
|
|
||||||
clickButton(/tasks/i);
|
|
||||||
await waitFor(() => {
|
|
||||||
expect(
|
|
||||||
screen.getByRole("button", { name: /tasks/i }).getAttribute("aria-pressed"),
|
|
||||||
).toBe("true");
|
|
||||||
});
|
|
||||||
|
|
||||||
// Push an a2a_send (does NOT match task_update filter).
|
|
||||||
await act(async () => {
|
|
||||||
emitActivity({
|
|
||||||
activityType: "a2a_send",
|
|
||||||
summary: "should-not-render-filter-mismatch",
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
expect(
|
|
||||||
screen.queryByText(/should-not-render-filter-mismatch/),
|
|
||||||
).toBeNull();
|
|
||||||
});
|
|
||||||
|
|
||||||
it("WS push respects the active filter — matching activity_type is rendered", async () => {
|
|
||||||
render(<ActivityTab workspaceId="ws-1" />);
|
|
||||||
await waitFor(() => screen.getByText(/no activity/i));
|
|
||||||
|
|
||||||
clickButton(/tasks/i);
|
|
||||||
await waitFor(() => {
|
|
||||||
expect(
|
|
||||||
screen.getByRole("button", { name: /tasks/i }).getAttribute("aria-pressed"),
|
|
||||||
).toBe("true");
|
|
||||||
});
|
|
||||||
|
|
||||||
await act(async () => {
|
|
||||||
emitActivity({
|
|
||||||
activityType: "task_update",
|
|
||||||
summary: "task-filter-match",
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
await waitFor(() => {
|
|
||||||
expect(screen.getByText(/task-filter-match/)).toBeTruthy();
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
it("WS push while autoRefresh is paused is ignored", async () => {
|
|
||||||
render(<ActivityTab workspaceId="ws-1" />);
|
|
||||||
await waitFor(() => screen.getByText(/no activity/i));
|
|
||||||
|
|
||||||
// Toggle Live → Paused.
|
|
||||||
clickButton(/live/i);
|
|
||||||
await waitFor(() => {
|
|
||||||
expect(screen.getByText(/Paused/)).toBeTruthy();
|
|
||||||
});
|
|
||||||
|
|
||||||
await act(async () => {
|
|
||||||
emitActivity({ summary: "should-not-render-paused" });
|
|
||||||
});
|
|
||||||
|
|
||||||
expect(screen.queryByText(/should-not-render-paused/)).toBeNull();
|
|
||||||
});
|
|
||||||
|
|
||||||
it("WS push for a row already in the list is deduped (no double-render)", async () => {
|
|
||||||
// Bootstrap with one row — same id as the WS push to trigger dedup.
|
|
||||||
mockGet.mockResolvedValueOnce([
|
|
||||||
makeEntry({ id: "shared-id", summary: "bootstrap-summary" }),
|
|
||||||
]);
|
|
||||||
render(<ActivityTab workspaceId="ws-1" />);
|
|
||||||
await waitFor(() => {
|
|
||||||
expect(screen.getByText(/bootstrap-summary/)).toBeTruthy();
|
|
||||||
});
|
|
||||||
mockGet.mockClear();
|
|
||||||
|
|
||||||
// Push a row with the SAME id but a different summary — must not
|
|
||||||
// render the new summary; original row stays.
|
|
||||||
await act(async () => {
|
|
||||||
emitActivity({
|
|
||||||
id: "shared-id",
|
|
||||||
summary: "should-not-replace-existing",
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
expect(screen.queryByText(/should-not-replace-existing/)).toBeNull();
|
|
||||||
// Also verify count didn't grow.
|
|
||||||
expect(screen.getByText(/1 activities/)).toBeTruthy();
|
|
||||||
});
|
|
||||||
|
|
||||||
it("does NOT poll on a 5s interval after mount (post-#61)", async () => {
|
|
||||||
vi.useFakeTimers();
|
|
||||||
try {
|
|
||||||
render(<ActivityTab workspaceId="ws-1" />);
|
|
||||||
// Drain the mount-time bootstrap promise.
|
|
||||||
await act(async () => {
|
|
||||||
await Promise.resolve();
|
|
||||||
await Promise.resolve();
|
|
||||||
});
|
|
||||||
const callsAfterBootstrap = mockGet.mock.calls.length;
|
|
||||||
expect(callsAfterBootstrap).toBeGreaterThanOrEqual(1);
|
|
||||||
|
|
||||||
// Pre-#61: a 30s clock advance fires 6 more polls. Post-#61: 0.
|
|
||||||
await act(async () => {
|
|
||||||
vi.advanceTimersByTime(30_000);
|
|
||||||
});
|
|
||||||
expect(mockGet.mock.calls.length).toBe(callsAfterBootstrap);
|
|
||||||
} finally {
|
|
||||||
vi.useRealTimers();
|
|
||||||
}
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
// ── Suite 7: Activity count ───────────────────────────────────────────────────
|
// ── Suite 7: Activity count ───────────────────────────────────────────────────
|
||||||
|
|
||||||
describe("ActivityTab — activity count", () => {
|
describe("ActivityTab — activity count", () => {
|
||||||
|
|||||||
@ -1,28 +1,18 @@
|
|||||||
// @vitest-environment jsdom
|
// @vitest-environment jsdom
|
||||||
/**
|
/**
|
||||||
* CommunicationOverlay tests — pin both the 2026-05-04 fan-out cap fix
|
* CommunicationOverlay tests — pin the rate-limit fix shipped 2026-05-04.
|
||||||
* AND the 2026-05-07 polling → ACTIVITY_LOGGED-subscriber refactor
|
|
||||||
* (issue #61 stage 1).
|
|
||||||
*
|
*
|
||||||
* The overlay used to poll /workspaces/:id/activity?limit=5 on a 30s
|
* The overlay polls /workspaces/:id/activity?limit=5 for each online
|
||||||
* interval per online workspace (capped at 3). Post-#61: it bootstraps
|
* workspace. Pre-fix it (a) polled regardless of visibility and (b)
|
||||||
* once on mount via the same HTTP path (cap of 3 retained), then
|
* fanned out to 6 workspaces every 10s. With 8+ workspaces a user
|
||||||
* subscribes to ACTIVITY_LOGGED via the global socket bus for live
|
* triggered sustained 429s (server-side rate limit is 600 req/min/IP).
|
||||||
* updates. No interval poll.
|
|
||||||
*
|
*
|
||||||
* These tests pin:
|
* These tests pin:
|
||||||
* 1. Bootstrap fan-out cap of 3 — even with 6 online nodes, only 3
|
* 1. Fan-out cap of 3 — even with 6 online nodes, only 3 fetches
|
||||||
* HTTP fetches on mount.
|
* 2. Visibility gate — when collapsed, no polling
|
||||||
* 2. Visibility gate — when collapsed, no HTTP fetches; re-open
|
|
||||||
* re-bootstraps.
|
|
||||||
* 3. NO interval polling — advancing the clock past 30s does not fire
|
|
||||||
* additional HTTP calls.
|
|
||||||
* 4. WS push extends the rendered list without firing any HTTP call.
|
|
||||||
* 5. WS push for an offline workspace is ignored.
|
|
||||||
* 6. WS push for a non-comm activity_type is ignored.
|
|
||||||
*
|
*
|
||||||
* If a future refactor regresses any of these, CI fails before the
|
* If a future refactor pushes either dial back up, CI fails before
|
||||||
* regression hits a paying tenant.
|
* the regression hits a paying tenant.
|
||||||
*/
|
*/
|
||||||
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
||||||
import { render, cleanup, act, fireEvent } from "@testing-library/react";
|
import { render, cleanup, act, fireEvent } from "@testing-library/react";
|
||||||
@ -33,7 +23,7 @@ vi.mock("@/lib/api", () => ({
|
|||||||
api: { get: vi.fn() },
|
api: { get: vi.fn() },
|
||||||
}));
|
}));
|
||||||
|
|
||||||
// Six online nodes — enough to verify the bootstrap cap of 3.
|
// Six online nodes — enough to verify the cap of 3.
|
||||||
const mockStoreState = {
|
const mockStoreState = {
|
||||||
selectedNodeId: null as string | null,
|
selectedNodeId: null as string | null,
|
||||||
nodes: [
|
nodes: [
|
||||||
@ -66,10 +56,6 @@ vi.mock("@/lib/design-tokens", () => ({
|
|||||||
// ── Imports (after mocks) ─────────────────────────────────────────────────────
|
// ── Imports (after mocks) ─────────────────────────────────────────────────────
|
||||||
|
|
||||||
import { api } from "@/lib/api";
|
import { api } from "@/lib/api";
|
||||||
import {
|
|
||||||
emitSocketEvent,
|
|
||||||
_resetSocketEventListenersForTests,
|
|
||||||
} from "@/store/socket-events";
|
|
||||||
import { CommunicationOverlay } from "../CommunicationOverlay";
|
import { CommunicationOverlay } from "../CommunicationOverlay";
|
||||||
|
|
||||||
const mockGet = vi.mocked(api.get);
|
const mockGet = vi.mocked(api.get);
|
||||||
@ -80,34 +66,30 @@ beforeEach(() => {
|
|||||||
vi.useFakeTimers();
|
vi.useFakeTimers();
|
||||||
mockGet.mockReset();
|
mockGet.mockReset();
|
||||||
mockGet.mockResolvedValue([]);
|
mockGet.mockResolvedValue([]);
|
||||||
// Drop any subscribers the previous test left on the singleton bus —
|
|
||||||
// each render adds one via useSocketEvent.
|
|
||||||
_resetSocketEventListenersForTests();
|
|
||||||
});
|
});
|
||||||
|
|
||||||
afterEach(() => {
|
afterEach(() => {
|
||||||
cleanup();
|
cleanup();
|
||||||
vi.useRealTimers();
|
vi.useRealTimers();
|
||||||
_resetSocketEventListenersForTests();
|
|
||||||
});
|
});
|
||||||
|
|
||||||
// ── Tests ─────────────────────────────────────────────────────────────────────
|
// ── Tests ─────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
describe("CommunicationOverlay — bootstrap fan-out cap", () => {
|
describe("CommunicationOverlay — fan-out cap", () => {
|
||||||
it("bootstraps at most 3 of 6 online workspaces (rate-limit floor preserved post-#61)", async () => {
|
it("polls at most 3 of 6 online workspaces (rate-limit floor)", async () => {
|
||||||
await act(async () => {
|
await act(async () => {
|
||||||
render(<CommunicationOverlay />);
|
render(<CommunicationOverlay />);
|
||||||
});
|
});
|
||||||
// Mount fires the bootstrap synchronously — pre-#61 this was the
|
// Mount fires the first poll synchronously (no interval tick yet).
|
||||||
// first poll cycle; post-#61 it's the only HTTP fetch (live updates
|
// Pre-fix: 6 calls. Post-fix: 3.
|
||||||
// arrive via WS push). 6 nodes → 3 fetches.
|
|
||||||
expect(mockGet).toHaveBeenCalledTimes(3);
|
expect(mockGet).toHaveBeenCalledTimes(3);
|
||||||
|
// Verify the calls are for the FIRST 3 online nodes (slice order).
|
||||||
expect(mockGet).toHaveBeenCalledWith("/workspaces/ws-1/activity?limit=5");
|
expect(mockGet).toHaveBeenCalledWith("/workspaces/ws-1/activity?limit=5");
|
||||||
expect(mockGet).toHaveBeenCalledWith("/workspaces/ws-2/activity?limit=5");
|
expect(mockGet).toHaveBeenCalledWith("/workspaces/ws-2/activity?limit=5");
|
||||||
expect(mockGet).toHaveBeenCalledWith("/workspaces/ws-3/activity?limit=5");
|
expect(mockGet).toHaveBeenCalledWith("/workspaces/ws-3/activity?limit=5");
|
||||||
});
|
});
|
||||||
|
|
||||||
it("never bootstraps offline workspaces", async () => {
|
it("never polls offline workspaces", async () => {
|
||||||
await act(async () => {
|
await act(async () => {
|
||||||
render(<CommunicationOverlay />);
|
render(<CommunicationOverlay />);
|
||||||
});
|
});
|
||||||
@ -117,39 +99,40 @@ describe("CommunicationOverlay — bootstrap fan-out cap", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("CommunicationOverlay — no interval polling (post-#61)", () => {
|
describe("CommunicationOverlay — cadence", () => {
|
||||||
// The pre-#61 implementation re-fetched every 30s per workspace.
|
it("uses 30s interval cadence (was 10s pre-fix)", async () => {
|
||||||
// Post-#61 the only HTTP path is the bootstrap on mount + on
|
|
||||||
// visibility-toggle. This test pins the absence of any interval
|
|
||||||
// poll: a 60s clock advance must not produce a second round of
|
|
||||||
// fetches.
|
|
||||||
it("does NOT poll on a 30s interval after bootstrap", async () => {
|
|
||||||
await act(async () => {
|
await act(async () => {
|
||||||
render(<CommunicationOverlay />);
|
render(<CommunicationOverlay />);
|
||||||
});
|
});
|
||||||
expect(mockGet).toHaveBeenCalledTimes(3); // initial bootstrap
|
expect(mockGet).toHaveBeenCalledTimes(3); // initial mount poll
|
||||||
mockGet.mockClear();
|
|
||||||
|
|
||||||
// Advance 60s — well past any plausible cadence the prior version
|
// Advance 10s — pre-fix this would fire another poll. Post-fix: silent.
|
||||||
// could have used.
|
|
||||||
await act(async () => {
|
await act(async () => {
|
||||||
vi.advanceTimersByTime(60_000);
|
vi.advanceTimersByTime(10_000);
|
||||||
});
|
});
|
||||||
expect(mockGet).not.toHaveBeenCalled();
|
expect(mockGet).toHaveBeenCalledTimes(3);
|
||||||
|
|
||||||
|
// Advance to 30s — interval fires.
|
||||||
|
await act(async () => {
|
||||||
|
vi.advanceTimersByTime(20_000);
|
||||||
|
});
|
||||||
|
expect(mockGet).toHaveBeenCalledTimes(6); // +3 from second tick
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("CommunicationOverlay — visibility gate", () => {
|
describe("CommunicationOverlay — visibility gate", () => {
|
||||||
// The visibility gate now does two things post-#61:
|
// The visibility gate is the dial that drops collapsed-panel polling
|
||||||
// - while closed, the WS handler short-circuits (no setComms churn)
|
// to ZERO. The cadence test above can't catch its removal — if a
|
||||||
// - re-opening triggers a fresh bootstrap so the list reflects
|
// refactor dropped `if (!visible) return`, the cadence test would
|
||||||
// anything that happened while the panel was collapsed
|
// still pass because the effect would still fire every 30s.
|
||||||
//
|
//
|
||||||
// Direct probe: render with comms-returning mock so the panel
|
// Direct probe: render with comms-returning mock so the panel
|
||||||
// actually renders (close button only exists in the expanded panel,
|
// actually renders (close button only exists in the expanded panel,
|
||||||
// not the collapsed button-state). Click close, advance the clock,
|
// not the collapsed button-state). Click close, advance the clock,
|
||||||
// assert no further fetches.
|
// assert no further fetches.
|
||||||
it("stops fetching while collapsed and re-bootstraps on re-open", async () => {
|
it("stops polling after the user collapses the panel", async () => {
|
||||||
|
// Mock returns one a2a_send so comms.length > 0 → panel renders →
|
||||||
|
// close button accessible.
|
||||||
mockGet.mockResolvedValue([
|
mockGet.mockResolvedValue([
|
||||||
{
|
{
|
||||||
id: "act-1",
|
id: "act-1",
|
||||||
@ -167,202 +150,29 @@ describe("CommunicationOverlay — visibility gate", () => {
|
|||||||
const { getByLabelText } = await act(async () => {
|
const { getByLabelText } = await act(async () => {
|
||||||
return render(<CommunicationOverlay />);
|
return render(<CommunicationOverlay />);
|
||||||
});
|
});
|
||||||
// Drain pending microtasks (resolves the await in bootstrap) so
|
// Drain pending microtasks (resolves the await in fetchComms) so
|
||||||
// setComms lands and the panel renders. Don't advance time — it's
|
// setComms lands and the panel renders. Don't advance time — that
|
||||||
// not load-bearing for the gate test, but matches the pattern used
|
// would fire the next interval tick and pollute the assertion.
|
||||||
// pre-#61 for stability.
|
|
||||||
await act(async () => {
|
await act(async () => {
|
||||||
await Promise.resolve();
|
await Promise.resolve();
|
||||||
await Promise.resolve();
|
await Promise.resolve();
|
||||||
await Promise.resolve();
|
await Promise.resolve();
|
||||||
});
|
});
|
||||||
expect(mockGet).toHaveBeenCalledTimes(3); // initial bootstrap
|
// Initial mount polled 3 workspaces.
|
||||||
|
expect(mockGet).toHaveBeenCalledTimes(3);
|
||||||
mockGet.mockClear();
|
mockGet.mockClear();
|
||||||
|
|
||||||
// Click close. While closed, no fetches and no WS-driven updates.
|
// Click the close button. Synchronous getByLabelText avoids
|
||||||
|
// findBy's internal setTimeout (deadlocks under useFakeTimers).
|
||||||
const closeBtn = getByLabelText("Close communications panel");
|
const closeBtn = getByLabelText("Close communications panel");
|
||||||
await act(async () => {
|
await act(async () => {
|
||||||
fireEvent.click(closeBtn);
|
fireEvent.click(closeBtn);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Advance well past the 30s cadence — gate should suppress the tick.
|
||||||
await act(async () => {
|
await act(async () => {
|
||||||
vi.advanceTimersByTime(60_000);
|
vi.advanceTimersByTime(60_000);
|
||||||
});
|
});
|
||||||
expect(mockGet).not.toHaveBeenCalled();
|
expect(mockGet).not.toHaveBeenCalled();
|
||||||
|
|
||||||
// Re-open via the collapsed button. Must trigger a fresh bootstrap.
|
|
||||||
const openBtn = getByLabelText("Show communications panel");
|
|
||||||
await act(async () => {
|
|
||||||
fireEvent.click(openBtn);
|
|
||||||
});
|
|
||||||
await act(async () => {
|
|
||||||
await Promise.resolve();
|
|
||||||
await Promise.resolve();
|
|
||||||
});
|
|
||||||
expect(mockGet).toHaveBeenCalledTimes(3); // re-bootstrap on re-open
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
describe("CommunicationOverlay — WS subscription (#61 stage 1 core)", () => {
|
|
||||||
// The load-bearing post-#61 behaviour. Every test in this block must
|
|
||||||
// verify (a) the WS push DID update the rendered comms list, and
|
|
||||||
// (b) NO additional HTTP call was fired — the whole point of the
|
|
||||||
// refactor is to remove the polling-driven HTTP traffic.
|
|
||||||
function emitActivityLogged(overrides: Partial<{
|
|
||||||
workspaceId: string;
|
|
||||||
payload: Record<string, unknown>;
|
|
||||||
}> = {}) {
|
|
||||||
emitSocketEvent({
|
|
||||||
event: "ACTIVITY_LOGGED",
|
|
||||||
workspace_id: overrides.workspaceId ?? "ws-1",
|
|
||||||
timestamp: new Date().toISOString(),
|
|
||||||
payload: {
|
|
||||||
id: `act-${Math.random().toString(36).slice(2)}`,
|
|
||||||
activity_type: "a2a_send",
|
|
||||||
source_id: "ws-1",
|
|
||||||
target_id: "ws-2",
|
|
||||||
summary: "live push",
|
|
||||||
status: "ok",
|
|
||||||
duration_ms: 42,
|
|
||||||
created_at: new Date().toISOString(),
|
|
||||||
...overrides.payload,
|
|
||||||
},
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
it("WS push for a comm activity_type extends the rendered list with NO additional HTTP call", async () => {
|
|
||||||
const { container } = await act(async () => {
|
|
||||||
return render(<CommunicationOverlay />);
|
|
||||||
});
|
|
||||||
expect(mockGet).toHaveBeenCalledTimes(3); // bootstrap
|
|
||||||
mockGet.mockClear();
|
|
||||||
|
|
||||||
await act(async () => {
|
|
||||||
emitActivityLogged({ payload: { summary: "hello" } });
|
|
||||||
});
|
|
||||||
await act(async () => {
|
|
||||||
await Promise.resolve();
|
|
||||||
});
|
|
||||||
|
|
||||||
// Two pins:
|
|
||||||
// 1. comms list reflects the live push (look for the summary text)
|
|
||||||
// 2. zero HTTP fetches fired during the WS path
|
|
||||||
expect(container.textContent).toContain("hello");
|
|
||||||
expect(mockGet).not.toHaveBeenCalled();
|
|
||||||
});
|
|
||||||
|
|
||||||
it("WS push for an offline workspace is ignored", async () => {
|
|
||||||
const { container } = await act(async () => {
|
|
||||||
return render(<CommunicationOverlay />);
|
|
||||||
});
|
|
||||||
mockGet.mockClear();
|
|
||||||
|
|
||||||
await act(async () => {
|
|
||||||
emitActivityLogged({
|
|
||||||
workspaceId: "ws-offline",
|
|
||||||
payload: { source_id: "ws-offline", summary: "should-not-render" },
|
|
||||||
});
|
|
||||||
});
|
|
||||||
await act(async () => {
|
|
||||||
await Promise.resolve();
|
|
||||||
});
|
|
||||||
|
|
||||||
expect(container.textContent).not.toContain("should-not-render");
|
|
||||||
expect(mockGet).not.toHaveBeenCalled();
|
|
||||||
});
|
|
||||||
|
|
||||||
it("WS push for a non-comm activity_type is ignored (e.g. delegation)", async () => {
|
|
||||||
const { container } = await act(async () => {
|
|
||||||
return render(<CommunicationOverlay />);
|
|
||||||
});
|
|
||||||
mockGet.mockClear();
|
|
||||||
|
|
||||||
await act(async () => {
|
|
||||||
emitActivityLogged({
|
|
||||||
payload: {
|
|
||||||
activity_type: "delegation",
|
|
||||||
summary: "should-not-render-delegation",
|
|
||||||
},
|
|
||||||
});
|
|
||||||
});
|
|
||||||
await act(async () => {
|
|
||||||
await Promise.resolve();
|
|
||||||
});
|
|
||||||
|
|
||||||
expect(container.textContent).not.toContain("should-not-render-delegation");
|
|
||||||
expect(mockGet).not.toHaveBeenCalled();
|
|
||||||
});
|
|
||||||
|
|
||||||
it("WS push while the panel is collapsed is ignored (no churn on hidden state)", async () => {
|
|
||||||
// Bootstrap with one comm so the panel renders → close button
|
|
||||||
// accessible. Then collapse, emit a WS push, re-open: the rendered
|
|
||||||
// list must come from the re-bootstrap, NOT from the WS-push that
|
|
||||||
// arrived during the closed state. Also: nothing visible while
|
|
||||||
// closed (the collapsed button shows only the count, not summaries).
|
|
||||||
mockGet.mockResolvedValue([
|
|
||||||
{
|
|
||||||
id: "act-bootstrap",
|
|
||||||
workspace_id: "ws-1",
|
|
||||||
activity_type: "a2a_send",
|
|
||||||
source_id: "ws-1",
|
|
||||||
target_id: "ws-2",
|
|
||||||
summary: "bootstrap-summary",
|
|
||||||
status: "ok",
|
|
||||||
duration_ms: 1,
|
|
||||||
created_at: new Date().toISOString(),
|
|
||||||
},
|
|
||||||
]);
|
|
||||||
const { getByLabelText, container } = await act(async () => {
|
|
||||||
return render(<CommunicationOverlay />);
|
|
||||||
});
|
|
||||||
await act(async () => {
|
|
||||||
await Promise.resolve();
|
|
||||||
await Promise.resolve();
|
|
||||||
});
|
|
||||||
|
|
||||||
// Collapse.
|
|
||||||
const closeBtn = getByLabelText("Close communications panel");
|
|
||||||
await act(async () => {
|
|
||||||
fireEvent.click(closeBtn);
|
|
||||||
});
|
|
||||||
|
|
||||||
// Bootstrap mock returns nothing on the re-open path so we can
|
|
||||||
// distinguish "WS push leaked through the gate" from "re-bootstrap
|
|
||||||
// refilled the list."
|
|
||||||
mockGet.mockReset();
|
|
||||||
mockGet.mockResolvedValue([]);
|
|
||||||
|
|
||||||
await act(async () => {
|
|
||||||
emitActivityLogged({
|
|
||||||
payload: { summary: "leaked-while-closed" },
|
|
||||||
});
|
|
||||||
});
|
|
||||||
await act(async () => {
|
|
||||||
await Promise.resolve();
|
|
||||||
});
|
|
||||||
|
|
||||||
// Closed state: rendered DOM must not show any push-derived text.
|
|
||||||
expect(container.textContent).not.toContain("leaked-while-closed");
|
|
||||||
});
|
|
||||||
|
|
||||||
it("non-ACTIVITY_LOGGED events are ignored (e.g. WORKSPACE_OFFLINE)", async () => {
|
|
||||||
const { container } = await act(async () => {
|
|
||||||
return render(<CommunicationOverlay />);
|
|
||||||
});
|
|
||||||
mockGet.mockClear();
|
|
||||||
|
|
||||||
await act(async () => {
|
|
||||||
emitSocketEvent({
|
|
||||||
event: "WORKSPACE_OFFLINE",
|
|
||||||
workspace_id: "ws-1",
|
|
||||||
timestamp: new Date().toISOString(),
|
|
||||||
payload: { summary: "should-not-render-event" },
|
|
||||||
});
|
|
||||||
});
|
|
||||||
await act(async () => {
|
|
||||||
await Promise.resolve();
|
|
||||||
});
|
|
||||||
|
|
||||||
expect(container.textContent).not.toContain("should-not-render-event");
|
|
||||||
expect(mockGet).not.toHaveBeenCalled();
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@ -1,9 +1,8 @@
|
|||||||
"use client";
|
"use client";
|
||||||
|
|
||||||
import { useState, useEffect, useCallback, useRef } from "react";
|
import { useState, useEffect, useCallback } from "react";
|
||||||
import { api } from "@/lib/api";
|
import { api } from "@/lib/api";
|
||||||
import { ConversationTraceModal } from "@/components/ConversationTraceModal";
|
import { ConversationTraceModal } from "@/components/ConversationTraceModal";
|
||||||
import { useSocketEvent } from "@/hooks/useSocketEvent";
|
|
||||||
import { type ActivityEntry } from "@/types/activity";
|
import { type ActivityEntry } from "@/types/activity";
|
||||||
import { useWorkspaceName } from "@/hooks/useWorkspaceName";
|
import { useWorkspaceName } from "@/hooks/useWorkspaceName";
|
||||||
import { inferA2AErrorHint } from "./chat/a2aErrorHint";
|
import { inferA2AErrorHint } from "./chat/a2aErrorHint";
|
||||||
@ -49,15 +48,6 @@ export function ActivityTab({ workspaceId }: Props) {
|
|||||||
const [traceOpen, setTraceOpen] = useState(false);
|
const [traceOpen, setTraceOpen] = useState(false);
|
||||||
const resolveName = useWorkspaceName();
|
const resolveName = useWorkspaceName();
|
||||||
|
|
||||||
// Refs let the WS handler read the latest filter / autoRefresh
|
|
||||||
// selection without re-subscribing on every state change. The bus
|
|
||||||
// listener is registered exactly once per mount via useSocketEvent's
|
|
||||||
// ref-internal pattern; subscriber-side filtering reads from these.
|
|
||||||
const filterRef = useRef(filter);
|
|
||||||
filterRef.current = filter;
|
|
||||||
const autoRefreshRef = useRef(autoRefresh);
|
|
||||||
autoRefreshRef.current = autoRefresh;
|
|
||||||
|
|
||||||
const loadActivities = useCallback(async () => {
|
const loadActivities = useCallback(async () => {
|
||||||
try {
|
try {
|
||||||
const typeParam = filter !== "all" ? `?type=${filter}` : "";
|
const typeParam = filter !== "all" ? `?type=${filter}` : "";
|
||||||
@ -76,58 +66,11 @@ export function ActivityTab({ workspaceId }: Props) {
|
|||||||
loadActivities();
|
loadActivities();
|
||||||
}, [loadActivities]);
|
}, [loadActivities]);
|
||||||
|
|
||||||
// Live-update path (issue #61 stage 3, replaces the 5s setInterval).
|
useEffect(() => {
|
||||||
// ACTIVITY_LOGGED events from this workspace prepend to the rendered
|
if (!autoRefresh) return;
|
||||||
// list — dedup by id so a server-side update + a poll reply don't
|
const interval = setInterval(loadActivities, 5000);
|
||||||
// double-render the same row.
|
return () => clearInterval(interval);
|
||||||
//
|
}, [loadActivities, autoRefresh]);
|
||||||
// Honours the user's autoRefresh toggle: when paused, live updates
|
|
||||||
// are dropped until the user re-enables Live (or hits Refresh, which
|
|
||||||
// re-bootstraps via loadActivities).
|
|
||||||
//
|
|
||||||
// Filter awareness: matches the server-side `?type=<filter>`
|
|
||||||
// semantics so the panel doesn't show rows the user excluded.
|
|
||||||
useSocketEvent((msg) => {
|
|
||||||
if (!autoRefreshRef.current) return;
|
|
||||||
if (msg.event !== "ACTIVITY_LOGGED") return;
|
|
||||||
if (msg.workspace_id !== workspaceId) return;
|
|
||||||
|
|
||||||
const p = (msg.payload || {}) as Record<string, unknown>;
|
|
||||||
const activityType = (p.activity_type as string) || "";
|
|
||||||
|
|
||||||
const f = filterRef.current;
|
|
||||||
if (f !== "all" && activityType !== f) return;
|
|
||||||
|
|
||||||
const entry: ActivityEntry = {
|
|
||||||
id:
|
|
||||||
(p.id as string) ||
|
|
||||||
`ws-push-${msg.timestamp || Date.now()}-${msg.workspace_id}`,
|
|
||||||
workspace_id: msg.workspace_id,
|
|
||||||
activity_type: activityType,
|
|
||||||
source_id: (p.source_id as string | null) ?? null,
|
|
||||||
target_id: (p.target_id as string | null) ?? null,
|
|
||||||
method: (p.method as string | null) ?? null,
|
|
||||||
summary: (p.summary as string | null) ?? null,
|
|
||||||
request_body: (p.request_body as Record<string, unknown> | null) ?? null,
|
|
||||||
response_body:
|
|
||||||
(p.response_body as Record<string, unknown> | null) ?? null,
|
|
||||||
duration_ms: (p.duration_ms as number | null) ?? null,
|
|
||||||
status: (p.status as string) || "ok",
|
|
||||||
error_detail: (p.error_detail as string | null) ?? null,
|
|
||||||
created_at:
|
|
||||||
(p.created_at as string) ||
|
|
||||||
msg.timestamp ||
|
|
||||||
new Date().toISOString(),
|
|
||||||
};
|
|
||||||
|
|
||||||
setActivities((prev) => {
|
|
||||||
// Dedup by id — a row that arrived via the bootstrap fetch and
|
|
||||||
// also fires ACTIVITY_LOGGED from a delayed server-side hook
|
|
||||||
// must render exactly once.
|
|
||||||
if (prev.some((e) => e.id === entry.id)) return prev;
|
|
||||||
return [entry, ...prev];
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="flex flex-col h-full">
|
<div className="flex flex-col h-full">
|
||||||
|
|||||||
@ -9,7 +9,6 @@
|
|||||||
// AttachmentLightbox).
|
// AttachmentLightbox).
|
||||||
|
|
||||||
import { useState, useEffect, useRef } from "react";
|
import { useState, useEffect, useRef } from "react";
|
||||||
import { platformAuthHeaders } from "@/lib/api";
|
|
||||||
import type { ChatAttachment } from "./types";
|
import type { ChatAttachment } from "./types";
|
||||||
import { isPlatformAttachment, resolveAttachmentHref } from "./uploads";
|
import { isPlatformAttachment, resolveAttachmentHref } from "./uploads";
|
||||||
import { AttachmentChip } from "./AttachmentViews";
|
import { AttachmentChip } from "./AttachmentViews";
|
||||||
@ -44,8 +43,13 @@ export function AttachmentAudio({ workspaceId, attachment, onDownload, tone }: P
|
|||||||
void (async () => {
|
void (async () => {
|
||||||
try {
|
try {
|
||||||
const href = resolveAttachmentHref(workspaceId, attachment.uri);
|
const href = resolveAttachmentHref(workspaceId, attachment.uri);
|
||||||
|
const headers: Record<string, string> = {};
|
||||||
|
const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
|
||||||
|
if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
|
||||||
|
const slug = getTenantSlug();
|
||||||
|
if (slug) headers["X-Molecule-Org-Slug"] = slug;
|
||||||
const res = await fetch(href, {
|
const res = await fetch(href, {
|
||||||
headers: platformAuthHeaders(),
|
headers,
|
||||||
credentials: "include",
|
credentials: "include",
|
||||||
signal: AbortSignal.timeout(60_000),
|
signal: AbortSignal.timeout(60_000),
|
||||||
});
|
});
|
||||||
@ -112,5 +116,9 @@ export function AttachmentAudio({ workspaceId, attachment, onDownload, tone }: P
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Local getTenantSlug() removed — auth-header construction now goes
|
function getTenantSlug(): string | null {
|
||||||
// through platformAuthHeaders() from @/lib/api (#178).
|
if (typeof window === "undefined") return null;
|
||||||
|
const host = window.location.hostname;
|
||||||
|
const m = host.match(/^([^.]+)\.moleculesai\.app$/);
|
||||||
|
return m ? m[1] : null;
|
||||||
|
}
|
||||||
|
|||||||
@ -35,7 +35,6 @@
|
|||||||
// downscale via canvas, but defer that to v2.
|
// downscale via canvas, but defer that to v2.
|
||||||
|
|
||||||
import { useState, useEffect, useRef } from "react";
|
import { useState, useEffect, useRef } from "react";
|
||||||
import { platformAuthHeaders } from "@/lib/api";
|
|
||||||
import type { ChatAttachment } from "./types";
|
import type { ChatAttachment } from "./types";
|
||||||
import { isPlatformAttachment, resolveAttachmentHref } from "./uploads";
|
import { isPlatformAttachment, resolveAttachmentHref } from "./uploads";
|
||||||
import { AttachmentLightbox } from "./AttachmentLightbox";
|
import { AttachmentLightbox } from "./AttachmentLightbox";
|
||||||
@ -76,14 +75,22 @@ export function AttachmentImage({ workspaceId, attachment, onDownload, tone }: P
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Platform-auth path: identical to downloadChatFile but we keep
|
// Platform-auth path: identical to downloadChatFile but we keep
|
||||||
// the blob (don't trigger a Save-As). Auth headers come from the
|
// the blob (don't trigger a Save-As). Use the same headers it does
|
||||||
// shared `platformAuthHeaders()` helper — one source of truth for
|
// by going through it indirectly — no, downloadChatFile triggers a
|
||||||
// every authenticated raw fetch in the canvas (#178).
|
// Save-As. Need a separate fetch.
|
||||||
void (async () => {
|
void (async () => {
|
||||||
try {
|
try {
|
||||||
const href = resolveAttachmentHref(workspaceId, attachment.uri);
|
const href = resolveAttachmentHref(workspaceId, attachment.uri);
|
||||||
|
const headers: Record<string, string> = {};
|
||||||
|
// Read the same env var downloadChatFile reads — single source
|
||||||
|
// of truth would be cleaner; refactor opportunity for PR-2 if
|
||||||
|
// we add the same path to AttachmentVideo.
|
||||||
|
const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
|
||||||
|
if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
|
||||||
|
const slug = getTenantSlug();
|
||||||
|
if (slug) headers["X-Molecule-Org-Slug"] = slug;
|
||||||
const res = await fetch(href, {
|
const res = await fetch(href, {
|
||||||
headers: platformAuthHeaders(),
|
headers,
|
||||||
credentials: "include",
|
credentials: "include",
|
||||||
signal: AbortSignal.timeout(30_000),
|
signal: AbortSignal.timeout(30_000),
|
||||||
});
|
});
|
||||||
@ -177,7 +184,15 @@ export function AttachmentImage({ workspaceId, attachment, onDownload, tone }: P
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Local getTenantSlug() removed — auth-header construction now goes
|
// Internal helper — duplicated from uploads.ts (it's not exported
|
||||||
// through platformAuthHeaders() from @/lib/api which uses the canonical
|
// there). Kept local so this component doesn't reach into private
|
||||||
// getTenantSlug() from @/lib/tenant. This eliminates the duplicate
|
// surface; if AttachmentVideo / AttachmentPDF in PR-2/PR-3 also need
|
||||||
// hostname-regex + the duplicate bearer-token-attach pattern (#178).
|
// it, lift to an exported helper at that point (the third-caller
|
||||||
|
// rule).
|
||||||
|
function getTenantSlug(): string | null {
|
||||||
|
if (typeof window === "undefined") return null;
|
||||||
|
const host = window.location.hostname;
|
||||||
|
// Tenant subdomain shape: <slug>.moleculesai.app
|
||||||
|
const m = host.match(/^([^.]+)\.moleculesai\.app$/);
|
||||||
|
return m ? m[1] : null;
|
||||||
|
}
|
||||||
|
|||||||
@ -33,7 +33,6 @@
|
|||||||
// timeout, swap to chip. Implemented as a 3-second watchdog.
|
// timeout, swap to chip. Implemented as a 3-second watchdog.
|
||||||
|
|
||||||
import { useState, useEffect, useRef } from "react";
|
import { useState, useEffect, useRef } from "react";
|
||||||
import { platformAuthHeaders } from "@/lib/api";
|
|
||||||
import type { ChatAttachment } from "./types";
|
import type { ChatAttachment } from "./types";
|
||||||
import { isPlatformAttachment, resolveAttachmentHref } from "./uploads";
|
import { isPlatformAttachment, resolveAttachmentHref } from "./uploads";
|
||||||
import { AttachmentLightbox } from "./AttachmentLightbox";
|
import { AttachmentLightbox } from "./AttachmentLightbox";
|
||||||
@ -70,8 +69,13 @@ export function AttachmentPDF({ workspaceId, attachment, onDownload, tone }: Pro
|
|||||||
void (async () => {
|
void (async () => {
|
||||||
try {
|
try {
|
||||||
const href = resolveAttachmentHref(workspaceId, attachment.uri);
|
const href = resolveAttachmentHref(workspaceId, attachment.uri);
|
||||||
|
const headers: Record<string, string> = {};
|
||||||
|
const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
|
||||||
|
if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
|
||||||
|
const slug = getTenantSlug();
|
||||||
|
if (slug) headers["X-Molecule-Org-Slug"] = slug;
|
||||||
const res = await fetch(href, {
|
const res = await fetch(href, {
|
||||||
headers: platformAuthHeaders(),
|
headers,
|
||||||
credentials: "include",
|
credentials: "include",
|
||||||
signal: AbortSignal.timeout(60_000),
|
signal: AbortSignal.timeout(60_000),
|
||||||
});
|
});
|
||||||
@ -185,5 +189,9 @@ function PdfGlyph() {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Local getTenantSlug() removed — auth-header construction now goes
|
function getTenantSlug(): string | null {
|
||||||
// through platformAuthHeaders() from @/lib/api (#178).
|
if (typeof window === "undefined") return null;
|
||||||
|
const host = window.location.hostname;
|
||||||
|
const m = host.match(/^([^.]+)\.moleculesai\.app$/);
|
||||||
|
return m ? m[1] : null;
|
||||||
|
}
|
||||||
|
|||||||
@ -26,7 +26,6 @@
|
|||||||
// to download the full file.
|
// to download the full file.
|
||||||
|
|
||||||
import { useState, useEffect } from "react";
|
import { useState, useEffect } from "react";
|
||||||
import { platformAuthHeaders } from "@/lib/api";
|
|
||||||
import type { ChatAttachment } from "./types";
|
import type { ChatAttachment } from "./types";
|
||||||
import { isPlatformAttachment, resolveAttachmentHref } from "./uploads";
|
import { isPlatformAttachment, resolveAttachmentHref } from "./uploads";
|
||||||
import { AttachmentChip } from "./AttachmentViews";
|
import { AttachmentChip } from "./AttachmentViews";
|
||||||
@ -58,13 +57,13 @@ export function AttachmentTextPreview({ workspaceId, attachment, onDownload, ton
|
|||||||
void (async () => {
|
void (async () => {
|
||||||
try {
|
try {
|
||||||
const href = resolveAttachmentHref(workspaceId, attachment.uri);
|
const href = resolveAttachmentHref(workspaceId, attachment.uri);
|
||||||
// Only attach platform auth headers for in-platform URIs —
|
const headers: Record<string, string> = {};
|
||||||
// off-platform URLs (HTTP/HTTPS attachments) MUST NOT receive
|
if (isPlatformAttachment(attachment.uri)) {
|
||||||
// our bearer token (it would leak the admin token to a third
|
const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
|
||||||
// party). The branch is preserved with the new shared helper.
|
if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
|
||||||
const headers: Record<string, string> = isPlatformAttachment(attachment.uri)
|
const slug = getTenantSlug();
|
||||||
? platformAuthHeaders()
|
if (slug) headers["X-Molecule-Org-Slug"] = slug;
|
||||||
: {};
|
}
|
||||||
const res = await fetch(href, {
|
const res = await fetch(href, {
|
||||||
headers,
|
headers,
|
||||||
credentials: "include",
|
credentials: "include",
|
||||||
@ -183,5 +182,9 @@ export function AttachmentTextPreview({ workspaceId, attachment, onDownload, ton
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Local getTenantSlug() removed — auth-header construction now goes
|
function getTenantSlug(): string | null {
|
||||||
// through platformAuthHeaders() from @/lib/api (#178).
|
if (typeof window === "undefined") return null;
|
||||||
|
const host = window.location.hostname;
|
||||||
|
const m = host.match(/^([^.]+)\.moleculesai\.app$/);
|
||||||
|
return m ? m[1] : null;
|
||||||
|
}
|
||||||
|
|||||||
@ -25,7 +25,6 @@
|
|||||||
// fetch via service worker. v2 if measured-needed.
|
// fetch via service worker. v2 if measured-needed.
|
||||||
|
|
||||||
import { useState, useEffect, useRef } from "react";
|
import { useState, useEffect, useRef } from "react";
|
||||||
import { platformAuthHeaders } from "@/lib/api";
|
|
||||||
import type { ChatAttachment } from "./types";
|
import type { ChatAttachment } from "./types";
|
||||||
import { isPlatformAttachment, resolveAttachmentHref } from "./uploads";
|
import { isPlatformAttachment, resolveAttachmentHref } from "./uploads";
|
||||||
import { AttachmentChip } from "./AttachmentViews";
|
import { AttachmentChip } from "./AttachmentViews";
|
||||||
@ -62,8 +61,13 @@ export function AttachmentVideo({ workspaceId, attachment, onDownload, tone }: P
|
|||||||
void (async () => {
|
void (async () => {
|
||||||
try {
|
try {
|
||||||
const href = resolveAttachmentHref(workspaceId, attachment.uri);
|
const href = resolveAttachmentHref(workspaceId, attachment.uri);
|
||||||
|
const headers: Record<string, string> = {};
|
||||||
|
const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
|
||||||
|
if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
|
||||||
|
const slug = getTenantSlug();
|
||||||
|
if (slug) headers["X-Molecule-Org-Slug"] = slug;
|
||||||
const res = await fetch(href, {
|
const res = await fetch(href, {
|
||||||
headers: platformAuthHeaders(),
|
headers,
|
||||||
credentials: "include",
|
credentials: "include",
|
||||||
// Videos are larger than images on average; give the request
|
// Videos are larger than images on average; give the request
|
||||||
// more headroom. The server's per-request body cap (50MB) is
|
// more headroom. The server's per-request body cap (50MB) is
|
||||||
@ -143,5 +147,11 @@ export function AttachmentVideo({ workspaceId, attachment, onDownload, tone }: P
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Local getTenantSlug() removed — auth-header construction now goes
|
// Internal helper — same shape as AttachmentImage's. Lifted to a
|
||||||
// through platformAuthHeaders() from @/lib/api (#178).
|
// shared util in PR-2.5 if a third caller needs it (PDF, audio).
|
||||||
|
function getTenantSlug(): string | null {
|
||||||
|
if (typeof window === "undefined") return null;
|
||||||
|
const host = window.location.hostname;
|
||||||
|
const m = host.match(/^([^.]+)\.moleculesai\.app$/);
|
||||||
|
return m ? m[1] : null;
|
||||||
|
}
|
||||||
|
|||||||
@ -1,16 +1,12 @@
|
|||||||
import { PLATFORM_URL, platformAuthHeaders } from "@/lib/api";
|
import { PLATFORM_URL } from "@/lib/api";
|
||||||
|
import { getTenantSlug } from "@/lib/tenant";
|
||||||
import type { ChatAttachment } from "./types";
|
import type { ChatAttachment } from "./types";
|
||||||
|
|
||||||
/** Chat attachments are intentionally uploaded via a direct fetch()
|
/** Chat attachments are intentionally uploaded via a direct fetch()
|
||||||
* instead of the `api.post` helper — `api.post` JSON-stringifies the
|
* instead of the `api.post` helper — `api.post` JSON-stringifies the
|
||||||
* body, which would 500 on a Blob. Auth headers (tenant slug, admin
|
* body, which would 500 on a Blob. Mirrors the header plumbing
|
||||||
* token, credentials) come from `platformAuthHeaders()` — the same
|
* (tenant slug, admin token, credentials) so SaaS + self-hosted
|
||||||
* helper `request()` uses, so a missing bearer surfaces as a single
|
* callers work the same way. */
|
||||||
* fix site instead of N copies. We deliberately do NOT set
|
|
||||||
* Content-Type so the browser writes the multipart boundary into the
|
|
||||||
* header; setting it manually would yield a multipart body the server
|
|
||||||
* can't parse. See lib/api.ts platformAuthHeaders() for the full
|
|
||||||
* rationale on why this pair must stay matched. */
|
|
||||||
export async function uploadChatFiles(
|
export async function uploadChatFiles(
|
||||||
workspaceId: string,
|
workspaceId: string,
|
||||||
files: File[],
|
files: File[],
|
||||||
@ -20,12 +16,18 @@ export async function uploadChatFiles(
|
|||||||
const form = new FormData();
|
const form = new FormData();
|
||||||
for (const f of files) form.append("files", f, f.name);
|
for (const f of files) form.append("files", f, f.name);
|
||||||
|
|
||||||
|
const headers: Record<string, string> = {};
|
||||||
|
const slug = getTenantSlug();
|
||||||
|
if (slug) headers["X-Molecule-Org-Slug"] = slug;
|
||||||
|
const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
|
||||||
|
if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
|
||||||
|
|
||||||
// Uploads legitimately take a while on cold cache (tar write +
|
// Uploads legitimately take a while on cold cache (tar write +
|
||||||
// docker cp into the container). 60s is comfortable for the 25MB/
|
// docker cp into the container). 60s is comfortable for the 25MB/
|
||||||
// 50MB caps the server enforces.
|
// 50MB caps the server enforces.
|
||||||
const res = await fetch(`${PLATFORM_URL}/workspaces/${workspaceId}/chat/uploads`, {
|
const res = await fetch(`${PLATFORM_URL}/workspaces/${workspaceId}/chat/uploads`, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: platformAuthHeaders(),
|
headers,
|
||||||
body: form,
|
body: form,
|
||||||
credentials: "include",
|
credentials: "include",
|
||||||
signal: AbortSignal.timeout(60_000),
|
signal: AbortSignal.timeout(60_000),
|
||||||
@ -141,8 +143,14 @@ export async function downloadChatFile(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const headers: Record<string, string> = {};
|
||||||
|
const slug = getTenantSlug();
|
||||||
|
if (slug) headers["X-Molecule-Org-Slug"] = slug;
|
||||||
|
const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
|
||||||
|
if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
|
||||||
|
|
||||||
const res = await fetch(href, {
|
const res = await fetch(href, {
|
||||||
headers: platformAuthHeaders(),
|
headers,
|
||||||
credentials: "include",
|
credentials: "include",
|
||||||
signal: AbortSignal.timeout(60_000),
|
signal: AbortSignal.timeout(60_000),
|
||||||
});
|
});
|
||||||
|
|||||||
@ -1,130 +0,0 @@
|
|||||||
// @vitest-environment node
|
|
||||||
import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
|
|
||||||
|
|
||||||
// Tests for the boot-time matched-pair guard added to next.config.ts.
|
|
||||||
//
|
|
||||||
// Why this lives in src/lib/__tests__ even though the function is in
|
|
||||||
// canvas/next.config.ts:
|
|
||||||
// - next.config.ts runs as ESM-but-also-CJS depending on which
|
|
||||||
// consumer loads it (Next.js dev server vs Next.js build); we
|
|
||||||
// want the test to be a plain ESM module Vitest already handles.
|
|
||||||
// - Importing from "../../../next.config" pulls in the rest of the
|
|
||||||
// file (loadMonorepoEnv, the default export, etc.) which has
|
|
||||||
// side effects on module load (it runs loadMonorepoEnv()
|
|
||||||
// immediately). To keep the test hermetic we don't import — we
|
|
||||||
// duplicate the function under test.
|
|
||||||
//
|
|
||||||
// Sourcing the function from a shared module would be cleaner, but
|
|
||||||
// next.config.ts is required to be a single self-contained file by
|
|
||||||
// Next.js's loader on some host configurations. Pin invariant: the
|
|
||||||
// duplicated function below MUST stay byte-identical to the one in
|
|
||||||
// next.config.ts. If you change one, change the other and bump this
|
|
||||||
// comment.
|
|
||||||
|
|
||||||
function checkAdminTokenPair(): void {
|
|
||||||
const serverSet = !!process.env.ADMIN_TOKEN;
|
|
||||||
const clientSet = !!process.env.NEXT_PUBLIC_ADMIN_TOKEN;
|
|
||||||
if (serverSet === clientSet) return;
|
|
||||||
if (serverSet && !clientSet) {
|
|
||||||
// eslint-disable-next-line no-console
|
|
||||||
console.error(
|
|
||||||
"[next.config] ADMIN_TOKEN is set but NEXT_PUBLIC_ADMIN_TOKEN is not — " +
|
|
||||||
"canvas will 401 against workspace-server because the bearer header " +
|
|
||||||
"is never attached. Set both to the same value, or unset both.",
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
// eslint-disable-next-line no-console
|
|
||||||
console.error(
|
|
||||||
"[next.config] NEXT_PUBLIC_ADMIN_TOKEN is set but ADMIN_TOKEN is not — " +
|
|
||||||
"workspace-server will reject the bearer because no AdminAuth gate " +
|
|
||||||
"is configured. Set both to the same value, or unset both.",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
describe("checkAdminTokenPair", () => {
|
|
||||||
// Snapshot env so individual tests can stomp on it without leaking.
|
|
||||||
// Rebuild from snapshot in afterEach so the next test sees a known
|
|
||||||
// baseline regardless of mutation pattern.
|
|
||||||
let originalEnv: Record<string, string | undefined>;
|
|
||||||
let errorSpy: ReturnType<typeof vi.spyOn>;
|
|
||||||
|
|
||||||
beforeEach(() => {
|
|
||||||
originalEnv = {
|
|
||||||
ADMIN_TOKEN: process.env.ADMIN_TOKEN,
|
|
||||||
NEXT_PUBLIC_ADMIN_TOKEN: process.env.NEXT_PUBLIC_ADMIN_TOKEN,
|
|
||||||
};
|
|
||||||
delete process.env.ADMIN_TOKEN;
|
|
||||||
delete process.env.NEXT_PUBLIC_ADMIN_TOKEN;
|
|
||||||
errorSpy = vi.spyOn(console, "error").mockImplementation(() => {});
|
|
||||||
});
|
|
||||||
|
|
||||||
afterEach(() => {
|
|
||||||
if (originalEnv.ADMIN_TOKEN === undefined) delete process.env.ADMIN_TOKEN;
|
|
||||||
else process.env.ADMIN_TOKEN = originalEnv.ADMIN_TOKEN;
|
|
||||||
if (originalEnv.NEXT_PUBLIC_ADMIN_TOKEN === undefined) delete process.env.NEXT_PUBLIC_ADMIN_TOKEN;
|
|
||||||
else process.env.NEXT_PUBLIC_ADMIN_TOKEN = originalEnv.NEXT_PUBLIC_ADMIN_TOKEN;
|
|
||||||
errorSpy.mockRestore();
|
|
||||||
});
|
|
||||||
|
|
||||||
it("emits no warning when both are unset", () => {
|
|
||||||
checkAdminTokenPair();
|
|
||||||
expect(errorSpy).not.toHaveBeenCalled();
|
|
||||||
});
|
|
||||||
|
|
||||||
it("emits no warning when both are set (matched pair, the happy path)", () => {
|
|
||||||
process.env.ADMIN_TOKEN = "local-dev-admin";
|
|
||||||
process.env.NEXT_PUBLIC_ADMIN_TOKEN = "local-dev-admin";
|
|
||||||
checkAdminTokenPair();
|
|
||||||
expect(errorSpy).not.toHaveBeenCalled();
|
|
||||||
});
|
|
||||||
|
|
||||||
it("warns when ADMIN_TOKEN is set but NEXT_PUBLIC_ADMIN_TOKEN is not", () => {
|
|
||||||
process.env.ADMIN_TOKEN = "local-dev-admin";
|
|
||||||
checkAdminTokenPair();
|
|
||||||
expect(errorSpy).toHaveBeenCalledTimes(1);
|
|
||||||
// Exact-string assertion — substring would also pass when the
|
|
||||||
// function's branch logic is broken (e.g. emits both messages, or
|
|
||||||
// emits the wrong one). Pin the exact message that operators will
|
|
||||||
// see in their dev console so regressions are visible.
|
|
||||||
expect(errorSpy).toHaveBeenCalledWith(
|
|
||||||
"[next.config] ADMIN_TOKEN is set but NEXT_PUBLIC_ADMIN_TOKEN is not — " +
|
|
||||||
"canvas will 401 against workspace-server because the bearer header " +
|
|
||||||
"is never attached. Set both to the same value, or unset both.",
|
|
||||||
);
|
|
||||||
});
|
|
||||||
|
|
||||||
it("warns when NEXT_PUBLIC_ADMIN_TOKEN is set but ADMIN_TOKEN is not", () => {
|
|
||||||
process.env.NEXT_PUBLIC_ADMIN_TOKEN = "local-dev-admin";
|
|
||||||
checkAdminTokenPair();
|
|
||||||
expect(errorSpy).toHaveBeenCalledTimes(1);
|
|
||||||
expect(errorSpy).toHaveBeenCalledWith(
|
|
||||||
"[next.config] NEXT_PUBLIC_ADMIN_TOKEN is set but ADMIN_TOKEN is not — " +
|
|
||||||
"workspace-server will reject the bearer because no AdminAuth gate " +
|
|
||||||
"is configured. Set both to the same value, or unset both.",
|
|
||||||
);
|
|
||||||
});
|
|
||||||
|
|
||||||
// Empty string in process.env is the JS-side representation of `KEY=`
|
|
||||||
// (no value) in a .env file. Treating "" as unset makes the pair
|
|
||||||
// invariant symmetric: `KEY=` and `unset KEY` produce the same
|
|
||||||
// verdict. Without this branch, an operator who comments out the
|
|
||||||
// value but leaves the line would get a false-positive warning.
|
|
||||||
it("treats empty string as unset (so KEY= and unset KEY are equivalent)", () => {
|
|
||||||
process.env.ADMIN_TOKEN = "";
|
|
||||||
process.env.NEXT_PUBLIC_ADMIN_TOKEN = "";
|
|
||||||
checkAdminTokenPair();
|
|
||||||
expect(errorSpy).not.toHaveBeenCalled();
|
|
||||||
});
|
|
||||||
|
|
||||||
it("warns when ADMIN_TOKEN is set and NEXT_PUBLIC_ADMIN_TOKEN is empty string", () => {
|
|
||||||
process.env.ADMIN_TOKEN = "local-dev-admin";
|
|
||||||
process.env.NEXT_PUBLIC_ADMIN_TOKEN = "";
|
|
||||||
checkAdminTokenPair();
|
|
||||||
expect(errorSpy).toHaveBeenCalledTimes(1);
|
|
||||||
// First branch — server set, client unset.
|
|
||||||
expect(errorSpy).toHaveBeenCalledWith(
|
|
||||||
expect.stringContaining("ADMIN_TOKEN is set but NEXT_PUBLIC_ADMIN_TOKEN is not"),
|
|
||||||
);
|
|
||||||
});
|
|
||||||
});
|
|
||||||
@ -1,97 +0,0 @@
|
|||||||
// @vitest-environment jsdom
|
|
||||||
import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
|
|
||||||
|
|
||||||
// Tests for platformAuthHeaders — the shared helper extracted in #178
|
|
||||||
// to consolidate the bearer-token-attach + tenant-slug-attach pattern
|
|
||||||
// that was previously duplicated across 7 raw-fetch callsites in the
|
|
||||||
// canvas (uploads + 5 Attachment* components + the api.ts request()
|
|
||||||
// function).
|
|
||||||
//
|
|
||||||
// What we pin here:
|
|
||||||
// - Returns a fresh object each call (so callers can mutate without
|
|
||||||
// leaking into each other).
|
|
||||||
// - Empty result on a non-tenant host with no admin token (the
|
|
||||||
// localhost / self-hosted shape).
|
|
||||||
// - Bearer attached when NEXT_PUBLIC_ADMIN_TOKEN is set.
|
|
||||||
// - X-Molecule-Org-Slug attached when window.location.hostname is a
|
|
||||||
// tenant subdomain (<slug>.moleculesai.app).
|
|
||||||
// - Both attached when both apply (the production SaaS shape).
|
|
||||||
//
|
|
||||||
// Why jsdom: getTenantSlug() reads window.location.hostname. Node-only
|
|
||||||
// environment yields no window and getTenantSlug returns null
|
|
||||||
// unconditionally — wouldn't exercise the slug branch.
|
|
||||||
|
|
||||||
import { platformAuthHeaders } from "../api";
|
|
||||||
|
|
||||||
describe("platformAuthHeaders", () => {
|
|
||||||
let originalAdminToken: string | undefined;
|
|
||||||
|
|
||||||
beforeEach(() => {
|
|
||||||
originalAdminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
|
|
||||||
delete process.env.NEXT_PUBLIC_ADMIN_TOKEN;
|
|
||||||
});
|
|
||||||
|
|
||||||
afterEach(() => {
|
|
||||||
if (originalAdminToken === undefined) delete process.env.NEXT_PUBLIC_ADMIN_TOKEN;
|
|
||||||
else process.env.NEXT_PUBLIC_ADMIN_TOKEN = originalAdminToken;
|
|
||||||
// jsdom resets hostname between tests via the @vitest-environment
|
|
||||||
// pragma's per-test isolation. No explicit reset needed.
|
|
||||||
});
|
|
||||||
|
|
||||||
it("returns an empty object on a non-tenant host with no admin token", () => {
|
|
||||||
// jsdom default hostname is "localhost" — not a tenant slug, so
|
|
||||||
// getTenantSlug() returns null and no X-Molecule-Org-Slug is added.
|
|
||||||
const headers = platformAuthHeaders();
|
|
||||||
expect(headers).toEqual({});
|
|
||||||
});
|
|
||||||
|
|
||||||
it("attaches Authorization when NEXT_PUBLIC_ADMIN_TOKEN is set", () => {
|
|
||||||
process.env.NEXT_PUBLIC_ADMIN_TOKEN = "local-dev-admin";
|
|
||||||
const headers = platformAuthHeaders();
|
|
||||||
expect(headers).toEqual({ Authorization: "Bearer local-dev-admin" });
|
|
||||||
});
|
|
||||||
|
|
||||||
it("does NOT attach Authorization when NEXT_PUBLIC_ADMIN_TOKEN is empty string", () => {
|
|
||||||
// Empty-string env is the JS-side shape of `KEY=` in .env.
|
|
||||||
// Treating it as unset matches the matched-pair guard in
|
|
||||||
// next.config.ts (admin-token-pair.test.ts) — symmetric semantics.
|
|
||||||
process.env.NEXT_PUBLIC_ADMIN_TOKEN = "";
|
|
||||||
const headers = platformAuthHeaders();
|
|
||||||
expect(headers).toEqual({});
|
|
||||||
});
|
|
||||||
|
|
||||||
it("attaches X-Molecule-Org-Slug on a tenant subdomain", () => {
|
|
||||||
Object.defineProperty(window, "location", {
|
|
||||||
value: { hostname: "reno-stars.moleculesai.app" },
|
|
||||||
writable: true,
|
|
||||||
});
|
|
||||||
const headers = platformAuthHeaders();
|
|
||||||
expect(headers).toEqual({ "X-Molecule-Org-Slug": "reno-stars" });
|
|
||||||
});
|
|
||||||
|
|
||||||
it("attaches both when both apply (production SaaS shape)", () => {
|
|
||||||
Object.defineProperty(window, "location", {
|
|
||||||
value: { hostname: "reno-stars.moleculesai.app" },
|
|
||||||
writable: true,
|
|
||||||
});
|
|
||||||
process.env.NEXT_PUBLIC_ADMIN_TOKEN = "tenant-bearer";
|
|
||||||
const headers = platformAuthHeaders();
|
|
||||||
// Pin exact-equality on the full shape — substring/contains
|
|
||||||
// assertions would also pass for an extra-header bug.
|
|
||||||
expect(headers).toEqual({
|
|
||||||
"X-Molecule-Org-Slug": "reno-stars",
|
|
||||||
Authorization: "Bearer tenant-bearer",
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
it("returns a fresh object each call (callers can mutate safely)", () => {
|
|
||||||
process.env.NEXT_PUBLIC_ADMIN_TOKEN = "tok";
|
|
||||||
const a = platformAuthHeaders();
|
|
||||||
const b = platformAuthHeaders();
|
|
||||||
expect(a).not.toBe(b); // distinct refs
|
|
||||||
expect(a).toEqual(b); // same content
|
|
||||||
a["Content-Type"] = "application/json";
|
|
||||||
// Mutation on `a` does not leak into `b`.
|
|
||||||
expect(b["Content-Type"]).toBeUndefined();
|
|
||||||
});
|
|
||||||
});
|
|
||||||
@ -21,45 +21,6 @@ export interface RequestOptions {
|
|||||||
timeoutMs?: number;
|
timeoutMs?: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Build the platform auth header set used by every authenticated fetch
|
|
||||||
* from the canvas. Returns a fresh object so callers can mutate (e.g.
|
|
||||||
* append `Content-Type` for JSON requests, omit it for FormData).
|
|
||||||
*
|
|
||||||
* SaaS cross-origin shape:
|
|
||||||
* - `X-Molecule-Org-Slug` — derived from `window.location.hostname`
|
|
||||||
* by `getTenantSlug()`. Control plane uses it for fly-replay
|
|
||||||
* routing. Empty on localhost / non-tenant hosts — safe to omit.
|
|
||||||
* - `Authorization: Bearer <token>` — `NEXT_PUBLIC_ADMIN_TOKEN` baked
|
|
||||||
* into the canvas build (see canvas/Dockerfile L8/L11). Required by
|
|
||||||
* the workspace-server when `ADMIN_TOKEN` is set on the server side
|
|
||||||
* (Tier-2b AdminAuth gate, wsauth_middleware.go ~L245). Empty when
|
|
||||||
* no admin token was provisioned — the Tier-1 session-cookie path
|
|
||||||
* handles that case via `credentials:"include"`.
|
|
||||||
*
|
|
||||||
* Why a shared helper: the two-line "read env, attach bearer; read
|
|
||||||
* slug, attach header" pattern was duplicated across `request()` and
|
|
||||||
* 7 raw-fetch callsites (chat uploads/download + 5 Attachment*
|
|
||||||
* components) before this consolidation. A new poller or raw fetch
|
|
||||||
* that forgets one of the two headers silently 401s against
|
|
||||||
* workspace-server when ADMIN_TOKEN is set — the exact bug shape
|
|
||||||
* called out in #178 / closes the post-#176 self-review gap.
|
|
||||||
*
|
|
||||||
* Callers that want JSON Content-Type should spread this and add it
|
|
||||||
* themselves; FormData callers should NOT add Content-Type (the
|
|
||||||
* browser sets the multipart boundary). Centralizing the auth pair
|
|
||||||
* but leaving Content-Type up to the caller is the minimum viable
|
|
||||||
* shared shape.
|
|
||||||
*/
|
|
||||||
export function platformAuthHeaders(): Record<string, string> {
|
|
||||||
const headers: Record<string, string> = {};
|
|
||||||
const slug = getTenantSlug();
|
|
||||||
if (slug) headers["X-Molecule-Org-Slug"] = slug;
|
|
||||||
const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
|
|
||||||
if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
|
|
||||||
return headers;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function request<T>(
|
async function request<T>(
|
||||||
method: string,
|
method: string,
|
||||||
path: string,
|
path: string,
|
||||||
@ -67,16 +28,17 @@ async function request<T>(
|
|||||||
retryCount = 0,
|
retryCount = 0,
|
||||||
options?: RequestOptions,
|
options?: RequestOptions,
|
||||||
): Promise<T> {
|
): Promise<T> {
|
||||||
// JSON-bodied request — Content-Type is JSON. Auth pair comes from
|
// SaaS cross-origin shape:
|
||||||
// the shared helper; see its doc comment for the SaaS-shape rationale.
|
// - X-Molecule-Org-Slug: derived from window.location.hostname by
|
||||||
const headers: Record<string, string> = {
|
// getTenantSlug(). Control plane uses it for fly-replay routing.
|
||||||
"Content-Type": "application/json",
|
// Empty on localhost / non-tenant hosts — safe to omit.
|
||||||
...platformAuthHeaders(),
|
// - credentials:"include": sends the session cookie cross-origin.
|
||||||
};
|
// Cookie's Domain=.moleculesai.app attribute + cp's CORS allow this.
|
||||||
// Re-read slug locally for the 401 handler below — `headers` already
|
const headers: Record<string, string> = { "Content-Type": "application/json" };
|
||||||
// has it, but the 401 branch needs the bare value to gate the
|
|
||||||
// session-probe + redirect logic on tenant context.
|
|
||||||
const slug = getTenantSlug();
|
const slug = getTenantSlug();
|
||||||
|
if (slug) headers["X-Molecule-Org-Slug"] = slug;
|
||||||
|
const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
|
||||||
|
if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
|
||||||
|
|
||||||
const res = await fetch(`${PLATFORM_URL}${path}`, {
|
const res = await fetch(`${PLATFORM_URL}${path}`, {
|
||||||
method,
|
method,
|
||||||
|
|||||||
@ -7,32 +7,6 @@ export default defineConfig({
|
|||||||
test: {
|
test: {
|
||||||
environment: 'node',
|
environment: 'node',
|
||||||
exclude: ['e2e/**', 'node_modules/**', '**/dist/**'],
|
exclude: ['e2e/**', 'node_modules/**', '**/dist/**'],
|
||||||
// CI-conditional test timeout (issue #96).
|
|
||||||
//
|
|
||||||
// Vitest's 5000ms default is too tight for the first test in any
|
|
||||||
// file under our CI shape: `npx vitest run --coverage` on the
|
|
||||||
// self-hosted Gitea Actions Docker runner. The cold-start cost
|
|
||||||
// (v8 coverage instrumentation init + JSDOM bootstrap + module-
|
|
||||||
// graph import for @/components/* and @/lib/* + first React
|
|
||||||
// render) consistently consumes 5-7 seconds for the first
|
|
||||||
// synchronous test in heavyweight component files
|
|
||||||
// (ActivityTab.test.tsx, CreateWorkspaceDialog.test.tsx,
|
|
||||||
// ConfigTab.provider.test.tsx) — even though every subsequent
|
|
||||||
// test in the same file completes in 100-1500ms.
|
|
||||||
//
|
|
||||||
// Empirically the worst observed first-test was 6453ms in a
|
|
||||||
// single file (CreateWorkspaceDialog). 30000ms gives ~5x
|
|
||||||
// headroom over that on CI; we still keep 5000ms locally so
|
|
||||||
// genuine waitFor races / hung promises stay sensitive in dev.
|
|
||||||
//
|
|
||||||
// Same vitest pattern documented at:
|
|
||||||
// https://vitest.dev/config/testtimeout
|
|
||||||
// https://vitest.dev/guide/coverage#profiling-test-performance
|
|
||||||
//
|
|
||||||
// Per-test duration is still emitted to the CI log; if a test
|
|
||||||
// ever silently approaches 25-30s under this raised ceiling that
|
|
||||||
// will surface as a duration regression and we revisit.
|
|
||||||
testTimeout: process.env.CI ? 30000 : 5000,
|
|
||||||
// Coverage is instrumented but NOT yet a CI gate — first land
|
// Coverage is instrumented but NOT yet a CI gate — first land
|
||||||
// observability so we can see the baseline, then dial in
|
// observability so we can see the baseline, then dial in
|
||||||
// thresholds + a hard gate in a follow-up PR (#1815). Today's
|
// thresholds + a hard gate in a follow-up PR (#1815). Today's
|
||||||
|
|||||||
@ -1,43 +0,0 @@
|
|||||||
# docker-compose.dev.yml — overlay over docker-compose.yml for local dev
|
|
||||||
# with air-driven live reload of the platform (workspace-server) service.
|
|
||||||
#
|
|
||||||
# Usage:
|
|
||||||
# docker compose -f docker-compose.yml -f docker-compose.dev.yml up
|
|
||||||
# (or `make dev` shorthand from repo root)
|
|
||||||
#
|
|
||||||
# What this overlay changes vs docker-compose.yml alone:
|
|
||||||
# - Platform service uses workspace-server/Dockerfile.dev (air on top of
|
|
||||||
# golang:1.25-alpine) instead of the multi-stage prod Dockerfile.
|
|
||||||
# - Platform service bind-mounts the host's workspace-server/ source
|
|
||||||
# into /app/workspace-server so air sees source edits live.
|
|
||||||
# - Other services (postgres, redis, langfuse, etc.) inherit unchanged
|
|
||||||
# from docker-compose.yml.
|
|
||||||
#
|
|
||||||
# What stays the same:
|
|
||||||
# - All env vars, volumes, depends_on, healthchecks from docker-compose.yml.
|
|
||||||
# - Network topology + ports.
|
|
||||||
# - Postgres/Redis as service containers (no in-process replacements).
|
|
||||||
|
|
||||||
services:
|
|
||||||
platform:
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
dockerfile: workspace-server/Dockerfile.dev
|
|
||||||
# Rebind source: edits under host's workspace-server/ propagate live.
|
|
||||||
# The named volume on go-build-cache speeds up first build per container.
|
|
||||||
volumes:
|
|
||||||
- ./workspace-server:/app/workspace-server
|
|
||||||
- go-build-cache:/root/.cache/go-build
|
|
||||||
- go-mod-cache:/go/pkg/mod
|
|
||||||
# Air signals the running binary on rebuild; ensure shell stops cleanly.
|
|
||||||
init: true
|
|
||||||
# Mark the service as dev-mode so the platform can short-circuit any
|
|
||||||
# behavior that's incompatible with hot-reload (e.g. background
|
|
||||||
# cron-style watchers that don't survive process restart). No-op
|
|
||||||
# today; reserved for future flag use.
|
|
||||||
environment:
|
|
||||||
MOLECULE_DEV_HOT_RELOAD: "1"
|
|
||||||
|
|
||||||
volumes:
|
|
||||||
go-build-cache:
|
|
||||||
go-mod-cache:
|
|
||||||
@ -13,7 +13,6 @@ services:
|
|||||||
- pgdata:/var/lib/postgresql/data
|
- pgdata:/var/lib/postgresql/data
|
||||||
networks:
|
networks:
|
||||||
- molecule-monorepo-net
|
- molecule-monorepo-net
|
||||||
restart: unless-stopped
|
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-dev}"]
|
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-dev}"]
|
||||||
interval: 2s
|
interval: 2s
|
||||||
@ -51,7 +50,6 @@ services:
|
|||||||
- redisdata:/data
|
- redisdata:/data
|
||||||
networks:
|
networks:
|
||||||
- molecule-monorepo-net
|
- molecule-monorepo-net
|
||||||
restart: unless-stopped
|
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "redis-cli", "ping"]
|
test: ["CMD", "redis-cli", "ping"]
|
||||||
interval: 2s
|
interval: 2s
|
||||||
@ -128,10 +126,6 @@ services:
|
|||||||
REDIS_URL: redis://redis:6379
|
REDIS_URL: redis://redis:6379
|
||||||
PORT: "${PLATFORM_PORT:-8080}"
|
PORT: "${PLATFORM_PORT:-8080}"
|
||||||
PLATFORM_URL: "http://platform:${PLATFORM_PORT:-8080}"
|
PLATFORM_URL: "http://platform:${PLATFORM_PORT:-8080}"
|
||||||
# Container network namespace is already isolated; "all interfaces"
|
|
||||||
# inside the container = the bridge interface only. The fail-open
|
|
||||||
# default (127.0.0.1) would block host-to-container access.
|
|
||||||
BIND_ADDR: "${BIND_ADDR:-0.0.0.0}"
|
|
||||||
# Default MOLECULE_ENV=development so the WorkspaceAuth / AdminAuth
|
# Default MOLECULE_ENV=development so the WorkspaceAuth / AdminAuth
|
||||||
# middleware fail-open path activates when ADMIN_TOKEN is unset —
|
# middleware fail-open path activates when ADMIN_TOKEN is unset —
|
||||||
# otherwise the canvas (which runs without a bearer in pure local
|
# otherwise the canvas (which runs without a bearer in pure local
|
||||||
@ -201,28 +195,12 @@ services:
|
|||||||
# App private key — read-only bind-mount. The host-side path is
|
# App private key — read-only bind-mount. The host-side path is
|
||||||
# gitignored per .gitignore rules (/.secrets/ + *.pem).
|
# gitignored per .gitignore rules (/.secrets/ + *.pem).
|
||||||
- ./.secrets/github-app.pem:/secrets/github-app.pem:ro
|
- ./.secrets/github-app.pem:/secrets/github-app.pem:ro
|
||||||
# Per-role persona credentials (molecule-core#242 local surface).
|
|
||||||
# Sourced at workspace creation time by org_import.go::loadPersonaEnvFile
|
|
||||||
# when a workspace.yaml carries `role: <name>`. The host-side dir is
|
|
||||||
# populated by the operator-host bootstrap kit (28 dev-tree personas);
|
|
||||||
# /etc/molecule-bootstrap/personas is the in-container path the
|
|
||||||
# platform expects (matches the prod tenant-EC2 path so the same code
|
|
||||||
# works in both modes).
|
|
||||||
#
|
|
||||||
# Read-only mount — workspace-server only reads, never writes here.
|
|
||||||
# If the host dir is empty/missing the platform's loadPersonaEnvFile
|
|
||||||
# silently no-ops per its existing semantics, so this mount is safe
|
|
||||||
# even on a fresh machine that hasn't run the bootstrap kit yet.
|
|
||||||
- ${MOLECULE_PERSONA_ROOT_HOST:-${HOME}/.molecule-ai/personas}:/etc/molecule-bootstrap/personas:ro
|
|
||||||
ports:
|
ports:
|
||||||
- "${PLATFORM_PUBLISH_PORT:-8080}:${PLATFORM_PORT:-8080}"
|
- "${PLATFORM_PUBLISH_PORT:-8080}:${PLATFORM_PORT:-8080}"
|
||||||
networks:
|
networks:
|
||||||
- molecule-monorepo-net
|
- molecule-monorepo-net
|
||||||
restart: unless-stopped
|
|
||||||
healthcheck:
|
healthcheck:
|
||||||
# Plain GET — `--spider` would issue HEAD, which returns 404 because
|
test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:${PLATFORM_PORT:-8080}/health || exit 1"]
|
||||||
# /health is registered as GET only.
|
|
||||||
test: ["CMD-SHELL", "wget -qO /dev/null --tries=1 http://localhost:${PLATFORM_PORT:-8080}/health || exit 1"]
|
|
||||||
interval: 5s
|
interval: 5s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 10
|
retries: 10
|
||||||
@ -260,7 +238,7 @@ services:
|
|||||||
networks:
|
networks:
|
||||||
- molecule-monorepo-net
|
- molecule-monorepo-net
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD-SHELL", "wget -qO /dev/null --tries=1 http://127.0.0.1:${CANVAS_PORT:-3000} || exit 1"]
|
test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://127.0.0.1:${CANVAS_PORT:-3000} || exit 1"]
|
||||||
interval: 10s
|
interval: 10s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 10
|
retries: 10
|
||||||
|
|||||||
@ -1,74 +0,0 @@
|
|||||||
# ADR-002: Local-build mode signalled by `MOLECULE_IMAGE_REGISTRY` presence
|
|
||||||
|
|
||||||
* Status: Accepted (2026-05-07)
|
|
||||||
* Issue: #63 (closes Task #194)
|
|
||||||
* Decision: Hongming (CTO) + Claude Opus 4.7 (implementation)
|
|
||||||
|
|
||||||
## Context
|
|
||||||
|
|
||||||
Pre-2026-05-06, every Molecule deployment — both production tenants and OSS contributor laptops — pulled workspace-template-* container images from `ghcr.io/molecule-ai/`. Production tenants additionally set `MOLECULE_IMAGE_REGISTRY` to an AWS ECR mirror via Railway env / EC2 user-data, but the OSS default was the upstream GHCR org.
|
|
||||||
|
|
||||||
On 2026-05-06 the `Molecule-AI` GitHub org was suspended (saved memory: `feedback_github_botring_fingerprint`). GHCR now returns **403 Forbidden** for every `molecule-ai/workspace-template-*` manifest. OSS contributors who clone `molecule-core` and run `go run ./workspace-server/cmd/server` cannot provision a workspace — every first provision fails with:
|
|
||||||
|
|
||||||
```
|
|
||||||
docker image "ghcr.io/molecule-ai/workspace-template-claude-code:latest" not found after pull attempt
|
|
||||||
```
|
|
||||||
|
|
||||||
Production tenants are unaffected (their `MOLECULE_IMAGE_REGISTRY` points at ECR, which we still control), but OSS onboarding is broken. Workspace template repos are intentionally separate from `molecule-core` (each runtime is OSS-shape and forkable), and they are mirrored to Gitea (`https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-<runtime>`) — but the provisioner has no path that consumes Gitea source directly.
|
|
||||||
|
|
||||||
## Decision
|
|
||||||
|
|
||||||
When `MOLECULE_IMAGE_REGISTRY` is **unset** (or empty), the provisioner switches to a **local-build mode** that:
|
|
||||||
|
|
||||||
1. Looks up the workspace-template repo's HEAD sha on Gitea via a single API call.
|
|
||||||
2. Checks whether a SHA-pinned local image (`molecule-local/workspace-template-<runtime>:<sha12>`) already exists; if so, reuses it.
|
|
||||||
3. Otherwise shallow-clones the repo into `~/.cache/molecule/workspace-template-build/<runtime>/<sha12>/` and runs `docker build --platform=linux/amd64 -t <tag> .`.
|
|
||||||
4. Hands the SHA-pinned tag to Docker for ContainerCreate, bypassing the registry-pull path entirely.
|
|
||||||
|
|
||||||
When `MOLECULE_IMAGE_REGISTRY` is **set**, behavior is unchanged: pull the image from that registry. Existing prod tenants and self-hosters who mirror to a private registry are not affected.
|
|
||||||
|
|
||||||
## Consequences
|
|
||||||
|
|
||||||
### Positive
|
|
||||||
|
|
||||||
* **Zero-config OSS onboarding** — `git clone molecule-core && go run ./workspace-server/cmd/server` boots end-to-end without any registry credentials.
|
|
||||||
* **Production tenants protected** — same env var, same semantics in SaaS-mode. Migration is a no-op.
|
|
||||||
* **No new env var** — extending an existing var's semantics ("where to pull, OR build locally if absent") rather than introducing `MOLECULE_LOCAL_BUILD=1` keeps the surface small.
|
|
||||||
* **SHA-pinned cache** — repeat builds are O(API-call); only template-repo HEAD changes invalidate.
|
|
||||||
* **Production-parity image** — amd64 emulation on Apple Silicon honours `feedback_local_must_mimic_production`. The provisioner's existing `defaultImagePlatform()` already forces amd64 for parity; building amd64 locally lets that decision stay consistent.
|
|
||||||
|
|
||||||
### Negative
|
|
||||||
|
|
||||||
* **Conflates two concerns** — `MOLECULE_IMAGE_REGISTRY` now signals BOTH "where to pull" AND "build locally if absent." A future operator who unsets it expecting a hard error will instead get a slow first-provision. Documented in the runbook.
|
|
||||||
* **First-provision is slow on Apple Silicon** — 5–10 min via QEMU emulation on the cold path. Mitigated by SHA-cache (subsequent runs are <1s lookup + 0s build).
|
|
||||||
* **Coverage gap** — only 4 of 9 runtimes are mirrored to Gitea today (`claude-code`, `hermes`, `langgraph`, `autogen`). The other 5 fail with an actionable "not mirrored" error. Mirroring those repos is a separate task.
|
|
||||||
* **Implicit trust boundary** — operator running `go run` implicitly trusts `molecule-ai/molecule-ai-workspace-template-*` repos on Gitea. This is the same trust they would extend to the GHCR images today; not a new attack surface.
|
|
||||||
|
|
||||||
## Alternatives considered
|
|
||||||
|
|
||||||
1. **New env var `MOLECULE_LOCAL_BUILD=1`** — explicit, but requires OSS contributors to know it exists. Violates the zero-config goal.
|
|
||||||
2. **Push pre-built images to a Gitea container registry, mirror tag from upstream** — operationally cleaner but: (a) Gitea's container-registry add-on isn't deployed on the operator host, (b) defeats the OSS-contributor goal of "hack on the source, see your changes," since they'd still pull a stale image.
|
|
||||||
3. **Embed Dockerfiles in molecule-core itself, drop the standalone template repos** — would work but breaks the OSS-shape principle; templates are intentionally separable, anyone-can-fork artifacts.
|
|
||||||
4. **Build native arch on Apple Silicon (arm64) and drop the platform pin in local-mode** — fast, but creates `linux/arm64` images that diverge from the amd64-only prod runtime. Local-vs-prod debug behavior would diverge. Rejected per `feedback_local_must_mimic_production`.
|
|
||||||
|
|
||||||
## Security review
|
|
||||||
|
|
||||||
* **Gitea repo URL allowlist** — runtime name must be in the `knownRuntimes` allowlist (defence-in-depth against a future code path that lets cfg.Runtime carry untrusted input). Repo prefix is hardcoded to `https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-`; forks can override via `MOLECULE_LOCAL_TEMPLATE_REPO_PREFIX` (opt-in, default off).
|
|
||||||
* **Token handling** — clones are anonymous over HTTPS by default (templates are public). `MOLECULE_GITEA_TOKEN`, if set, is passed via URL userinfo for the clone and as `Authorization: token` for the API call. The token is **masked in every log line** via `maskTokenInURL` / `maskTokenInString` and never appears in the cache dir path.
|
|
||||||
* **No silent fallback** — if Gitea is unreachable or the runtime isn't mirrored, we return a clear error mentioning the repo URL and the missing runtime. We **never** fall back to GHCR/ECR (that would be a confusing bug for an OSS contributor who happened to have stale ECR creds in their docker config).
|
|
||||||
* **Build-arg injection** — `docker build` is invoked with NO `--build-arg` from external input. Dockerfile is consumed as-is.
|
|
||||||
* **Cache poisoning** — cache key is the Gitea HEAD sha + Dockerfile content; a force-push to the template repo's main branch regenerates the key on next run. Cache dir is per-user (`$HOME/.cache`), so cross-user attacks aren't relevant in single-user dev mode.
|
|
||||||
|
|
||||||
## Versioning + back-compat
|
|
||||||
|
|
||||||
* Existing prod tenants set `MOLECULE_IMAGE_REGISTRY=<ECR url>` → unchanged behavior.
|
|
||||||
* Existing local installs that set the var → unchanged behavior.
|
|
||||||
* Existing local installs that don't set it → switch to local-build path. Migration: none required (additive); first provision will take 5–10 min instead of failing.
|
|
||||||
* No deprecations.
|
|
||||||
|
|
||||||
## References
|
|
||||||
|
|
||||||
* Issue #63 — feat(workspace-server): local-dev provisioner builds from Gitea source
|
|
||||||
* Saved memory `feedback_local_must_mimic_production` — local docker must mimic prod, no bypasses
|
|
||||||
* Saved memory `reference_post_suspension_pipeline` — full post-2026-05-06 stack shape
|
|
||||||
* Saved memory `feedback_github_botring_fingerprint` — what got the org suspended
|
|
||||||
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
**Status:** living document — update when you ship a feature that touches one backend.
|
**Status:** living document — update when you ship a feature that touches one backend.
|
||||||
**Owner:** workspace-server + controlplane teams.
|
**Owner:** workspace-server + controlplane teams.
|
||||||
**Last audit:** 2026-05-07 (plugin install/uninstall closed for EC2 backend via EIC SSH push to the bind-mounted `/configs/plugins/<name>/`, mirroring the Files API PR #1702 pattern).
|
**Last audit:** 2026-05-05 (Claude agent — `provisionWorkspaceAuto` / `StopWorkspaceAuto` / `HasProvisioner` SoT pattern landed in PRs #2811 + #2824).
|
||||||
|
|
||||||
## Why this exists
|
## Why this exists
|
||||||
|
|
||||||
@ -54,7 +54,7 @@ For "do we have any backend?", use `HasProvisioner()`, never bare `h.provisioner
|
|||||||
| **Files API** | | | | |
|
| **Files API** | | | | |
|
||||||
| List / Read / Write / Replace / Delete | `container_files.go`, `template_import.go` | `docker exec` + tar `CopyToContainer` | SSH via EIC tunnel (PR #1702) | ✅ parity as of 2026-04-22 (previously docker-only) |
|
| List / Read / Write / Replace / Delete | `container_files.go`, `template_import.go` | `docker exec` + tar `CopyToContainer` | SSH via EIC tunnel (PR #1702) | ✅ parity as of 2026-04-22 (previously docker-only) |
|
||||||
| **Plugins** | | | | |
|
| **Plugins** | | | | |
|
||||||
| Install / uninstall / list | `plugins_install.go` + `plugins_install_eic.go` | `deliverToContainer()` → exec+`CopyToContainer` on local container | `instance_id` set → EIC SSH push of the staged tarball into the EC2's bind-mounted `/configs/plugins/<name>/` (per `workspaceFilePathPrefix`), `chown 1000:1000`, restart | ✅ parity |
|
| Install / uninstall / list | `plugins_install.go` | `deliverToContainer()` + volume rm | **gap — no live plugin delivery** | 🔴 **docker-only** |
|
||||||
| **Terminal (WebSocket)** | | | | |
|
| **Terminal (WebSocket)** | | | | |
|
||||||
| Dispatch | `terminal.go:90-105` | `instance_id=""` → `handleLocalConnect` → `docker attach` | `instance_id` set → `handleRemoteConnect` → EIC SSH + `docker exec` | ✅ parity (different implementations, same UX) |
|
| Dispatch | `terminal.go:90-105` | `instance_id=""` → `handleLocalConnect` → `docker attach` | `instance_id` set → `handleRemoteConnect` → EIC SSH + `docker exec` | ✅ parity (different implementations, same UX) |
|
||||||
| **A2A proxy** | | | | |
|
| **A2A proxy** | | | | |
|
||||||
|
|||||||
@ -10,7 +10,7 @@ tags: [platform, fly.io, deployment, infrastructure]
|
|||||||
|
|
||||||
Your infrastructure choice just got decoupled from your agent platform choice. Molecule AI now ships three production-ready workspace backends — `docker`, `flyio`, and `controlplane` — and switching between them takes a single environment variable. Your agent code, model choices, and workspace topology stay exactly the same.
|
Your infrastructure choice just got decoupled from your agent platform choice. Molecule AI now ships three production-ready workspace backends — `docker`, `flyio`, and `controlplane` — and switching between them takes a single environment variable. Your agent code, model choices, and workspace topology stay exactly the same.
|
||||||
|
|
||||||
This post covers what shipped in [PR #501](https://git.moleculesai.app/molecule-ai/molecule-core/pull/501) (Fly Machines provisioner) and [PR #503](https://git.moleculesai.app/molecule-ai/molecule-core/pull/503) (control plane provisioner), and which backend fits your situation.
|
This post covers what shipped in [PR #501](https://github.com/Molecule-AI/molecule-core/pull/501) (Fly Machines provisioner) and [PR #503](https://github.com/Molecule-AI/molecule-core/pull/503) (control plane provisioner), and which backend fits your situation.
|
||||||
|
|
||||||
## Before: One Deployment Model for Every Use Case
|
## Before: One Deployment Model for Every Use Case
|
||||||
|
|
||||||
@ -107,4 +107,4 @@ No changes to agent code, tool definitions, or orchestration logic. Swap `CONTAI
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
*[PR #501](https://git.moleculesai.app/molecule-ai/molecule-core/pull/501) (Fly Machines provisioner) and [PR #503](https://git.moleculesai.app/molecule-ai/molecule-core/pull/503) (control plane provisioner) are both merged to `main`. Molecule AI is open source — contributions welcome.*
|
*[PR #501](https://github.com/Molecule-AI/molecule-core/pull/501) (Fly Machines provisioner) and [PR #503](https://github.com/Molecule-AI/molecule-core/pull/503) (control plane provisioner) are both merged to `main`. Molecule AI is open source — contributions welcome.*
|
||||||
|
|||||||
@ -27,7 +27,7 @@ The biggest user-facing change: every Molecule AI org can now mint named, revoca
|
|||||||
|
|
||||||
→ [User guide: Organization API Keys](/docs/guides/org-api-keys.md)
|
→ [User guide: Organization API Keys](/docs/guides/org-api-keys.md)
|
||||||
→ [Architecture: Org API Keys](/docs/architecture/org-api-keys.md)
|
→ [Architecture: Org API Keys](/docs/architecture/org-api-keys.md)
|
||||||
→ PRs: [#1105](https://git.moleculesai.app/molecule-ai/molecule-core/pull/1105), [#1107](https://git.moleculesai.app/molecule-ai/molecule-core/pull/1107), [#1109](https://git.moleculesai.app/molecule-ai/molecule-core/pull/1109), [#1110](https://git.moleculesai.app/molecule-ai/molecule-core/pull/1110)
|
→ PRs: [#1105](https://github.com/Molecule-AI/molecule-core/pull/1105), [#1107](https://github.com/Molecule-AI/molecule-core/pull/1107), [#1109](https://github.com/Molecule-AI/molecule-core/pull/1109), [#1110](https://github.com/Molecule-AI/molecule-core/pull/1110)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@ -48,7 +48,7 @@ AdminAuth now accepts a session-verification tier that runs **before** the beare
|
|||||||
**Self-hosted / local dev:** `CP_UPSTREAM_URL` is unset → this feature is disabled, behaviour is unchanged.
|
**Self-hosted / local dev:** `CP_UPSTREAM_URL` is unset → this feature is disabled, behaviour is unchanged.
|
||||||
|
|
||||||
→ [Guide: Same-Origin Canvas Fetches & Session Auth](/docs/guides/same-origin-canvas-fetches.md)
|
→ [Guide: Same-Origin Canvas Fetches & Session Auth](/docs/guides/same-origin-canvas-fetches.md)
|
||||||
→ PRs: [#1099](https://git.moleculesai.app/molecule-ai/molecule-core/pull/1099), [#1100](https://git.moleculesai.app/molecule-ai/molecule-core/pull/1100)
|
→ PRs: [#1099](https://github.com/Molecule-AI/molecule-core/pull/1099), [#1100](https://github.com/Molecule-AI/molecule-core/pull/1100)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@ -87,7 +87,7 @@ The proxy is **fail-closed**: only an explicit allowlist of paths (`/cp/auth/`,
|
|||||||
This is also the structural fix for the lateral-movement risk that session auth introduced: without the allowlist, a tenant-authed browser user could have proxied `/cp/admin/*` requests upstream and exploited the fact that those endpoints accept WorkOS session cookies. The allowlist makes that impossible by construction.
|
This is also the structural fix for the lateral-movement risk that session auth introduced: without the allowlist, a tenant-authed browser user could have proxied `/cp/admin/*` requests upstream and exploited the fact that those endpoints accept WorkOS session cookies. The allowlist makes that impossible by construction.
|
||||||
|
|
||||||
→ [Guide: Same-Origin Canvas Fetches & Session Auth](/docs/guides/same-origin-canvas-fetches.md)
|
→ [Guide: Same-Origin Canvas Fetches & Session Auth](/docs/guides/same-origin-canvas-fetches.md)
|
||||||
→ PR: [#1095](https://git.moleculesai.app/molecule-ai/molecule-core/pull/1095)
|
→ PR: [#1095](https://github.com/Molecule-AI/molecule-core/pull/1095)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@ -99,7 +99,7 @@ The waitlist itself is a Canvas-administered list with email hashing in audit lo
|
|||||||
|
|
||||||
This is the operational surface that makes the above security work matter: the beta is invitation-only, credentials are scoped, and every admin action is auditable.
|
This is the operational surface that makes the above security work matter: the beta is invitation-only, credentials are scoped, and every admin action is auditable.
|
||||||
|
|
||||||
→ Control plane PRs [#145](https://git.moleculesai.app/molecule-ai/molecule-controlplane/pull/145), [#148](https://git.moleculesai.app/molecule-ai/molecule-controlplane/pull/148), [#150](https://git.moleculesai.app/molecule-ai/molecule-controlplane/pull/150)
|
→ Control plane PRs [#145](https://github.com/Molecule-AI/molecule-controlplane/pull/145), [#148](https://github.com/Molecule-AI/molecule-controlplane/pull/148), [#150](https://github.com/Molecule-AI/molecule-controlplane/pull/150)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|||||||
@ -12,7 +12,7 @@ Your team is in Discord. Your AI agents are in Molecule AI. Until today, those t
|
|||||||
|
|
||||||
That's now one webhook URL.
|
That's now one webhook URL.
|
||||||
|
|
||||||
Molecule AI workspaces can now connect to Discord. Here's what shipped in [PR #656](https://git.moleculesai.app/molecule-ai/molecule-core/pull/656).
|
Molecule AI workspaces can now connect to Discord. Here's what shipped in [PR #656](https://github.com/Molecule-AI/molecule-core/pull/656).
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@ -70,7 +70,7 @@ For inbound slash commands, point your Discord app's **Interactions Endpoint URL
|
|||||||
|
|
||||||
## Security: Webhook Tokens Don't Appear in Logs
|
## Security: Webhook Tokens Don't Appear in Logs
|
||||||
|
|
||||||
Webhook URLs contain a token (`/webhooks/{id}/{token}`). If that token leaks into server logs, it's a rotation event. The Discord adapter is explicit about this: HTTP request errors are logged without the URL, and the adapter returns a generic error message. This was hardened in [PR #659](https://git.moleculesai.app/molecule-ai/molecule-core/pull/659).
|
Webhook URLs contain a token (`/webhooks/{id}/{token}`). If that token leaks into server logs, it's a rotation event. The Discord adapter is explicit about this: HTTP request errors are logged without the URL, and the adapter returns a generic error message. This was hardened in [PR #659](https://github.com/Molecule-AI/molecule-core/pull/659).
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@ -97,4 +97,4 @@ Documentation: [Social Channels guide](/docs/agent-runtime/social-channels#disco
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
*Discord adapter shipped in [PR #656](https://git.moleculesai.app/molecule-ai/molecule-core/pull/656). Security hardening in [PR #659](https://git.moleculesai.app/molecule-ai/molecule-core/pull/659). Molecule AI is open source — contributions welcome.*
|
*Discord adapter shipped in [PR #656](https://github.com/Molecule-AI/molecule-core/pull/656). Security hardening in [PR #659](https://github.com/Molecule-AI/molecule-core/pull/659). Molecule AI is open source — contributions welcome.*
|
||||||
|
|||||||
@ -1,41 +1,5 @@
|
|||||||
# Local Development
|
# Local Development
|
||||||
|
|
||||||
## Workspace Template Images: Local-Build Mode (Issue #63)
|
|
||||||
|
|
||||||
OSS contributors who run `molecule-core` locally do **not** need to authenticate to GHCR or AWS ECR. When the `MOLECULE_IMAGE_REGISTRY` env var is **unset**, the platform automatically:
|
|
||||||
|
|
||||||
1. Looks up the HEAD sha of `https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-<runtime>` (single API call, no clone).
|
|
||||||
2. If a local image tagged `molecule-local/workspace-template-<runtime>:<sha12>` already exists, reuses it (cache hit).
|
|
||||||
3. Otherwise, shallow-clones the repo into `~/.cache/molecule/workspace-template-build/<runtime>/<sha12>/` and runs `docker build --platform=linux/amd64 -t <tag> .`.
|
|
||||||
4. Hands the SHA-pinned tag to Docker for `ContainerCreate`.
|
|
||||||
|
|
||||||
**First-provision build time:** 5–10 min on Apple Silicon (amd64 emulation). Subsequent provisions hit the cache and start in seconds. Cache is invalidated automatically when the template repo's HEAD moves.
|
|
||||||
|
|
||||||
**Currently mirrored on Gitea:** `claude-code`, `hermes`, `langgraph`, `autogen`. Other runtimes (`crewai`, `deepagents`, `codex`, `gemini-cli`, `openclaw`) fail with an actionable "not mirrored to Gitea" error pointing at the missing repo.
|
|
||||||
|
|
||||||
**Production tenants are unaffected** — every prod tenant sets `MOLECULE_IMAGE_REGISTRY` to its private ECR mirror via Railway env / EC2 user-data, so the SaaS pull path stays identical.
|
|
||||||
|
|
||||||
### Environment overrides
|
|
||||||
|
|
||||||
| Var | Default | Use case |
|
|
||||||
|-----|---------|----------|
|
|
||||||
| `MOLECULE_IMAGE_REGISTRY` | (unset) | Set to a real registry URL to switch from local-build to SaaS-pull mode. |
|
|
||||||
| `MOLECULE_LOCAL_BUILD_CACHE` | `~/.cache/molecule/workspace-template-build` | Override cache directory. |
|
|
||||||
| `MOLECULE_LOCAL_TEMPLATE_REPO_PREFIX` | `https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-` | Point at a fork. |
|
|
||||||
| `MOLECULE_GITEA_TOKEN` | (unset) | Required only if your fork has private template repos. |
|
|
||||||
|
|
||||||
### Verifying a switch from the GHCR-retag stopgap
|
|
||||||
|
|
||||||
Pre-fix, OSS contributors worked around the suspended GHCR org by manually retagging an `:latest` image. After this change, that workaround is **redundant**: simply unset `MOLECULE_IMAGE_REGISTRY` (or leave it unset), boot the platform, and provision a workspace. Logs will show:
|
|
||||||
|
|
||||||
```
|
|
||||||
Provisioner: local-build mode → using locally-built image molecule-local/workspace-template-claude-code:<sha12> for runtime claude-code
|
|
||||||
local-build: cloning https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-claude-code → ...
|
|
||||||
local-build: docker build done in <duration>
|
|
||||||
```
|
|
||||||
|
|
||||||
If you still see `ghcr.io/molecule-ai/...` in the boot log, double-check `env | grep MOLECULE_IMAGE_REGISTRY` — a stale shell export from the pre-fix workaround could keep SaaS-mode active.
|
|
||||||
|
|
||||||
## Starting the Stack
|
## Starting the Stack
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|||||||
@ -3,8 +3,8 @@
|
|||||||
**Date:** 2026-04-23
|
**Date:** 2026-04-23
|
||||||
**Severity:** High — every new SaaS tenant blocked
|
**Severity:** High — every new SaaS tenant blocked
|
||||||
**Detection path:** E2E Staging SaaS run 24848425822 failed at "tenant provisioning"; investigation of CP Railway logs surfaced the auth mismatch.
|
**Detection path:** E2E Staging SaaS run 24848425822 failed at "tenant provisioning"; investigation of CP Railway logs surfaced the auth mismatch.
|
||||||
**Status:** Fix pushed on [molecule-controlplane#238](https://git.moleculesai.app/molecule-ai/molecule-controlplane/pull/238).
|
**Status:** Fix pushed on [molecule-controlplane#238](https://github.com/Molecule-AI/molecule-controlplane/pull/238).
|
||||||
**Related:** [issue #239](https://git.moleculesai.app/molecule-ai/molecule-controlplane/issues/239) (Cloudflare DNS record quota), [testing-strategy.md](../engineering/testing-strategy.md)
|
**Related:** [issue #239](https://github.com/Molecule-AI/molecule-controlplane/issues/239) (Cloudflare DNS record quota), [testing-strategy.md](../engineering/testing-strategy.md)
|
||||||
|
|
||||||
## Summary
|
## Summary
|
||||||
|
|
||||||
@ -35,7 +35,7 @@ The flow was:
|
|||||||
|
|
||||||
### The commit that introduced the bug
|
### The commit that introduced the bug
|
||||||
|
|
||||||
[molecule-controlplane#235](https://git.moleculesai.app/molecule-ai/molecule-controlplane/pull/235) — "fix(provision): wait for tenant boot-event before falling back to canary". Merged 2026-04-22.
|
[molecule-controlplane#235](https://github.com/Molecule-AI/molecule-controlplane/pull/235) — "fix(provision): wait for tenant boot-event before falling back to canary". Merged 2026-04-22.
|
||||||
|
|
||||||
Before #235, readiness was determined via a canary probe through Cloudflare's edge — which didn't need CP-side auth, so the INSERT ordering didn't matter. #235 made boot-events the primary readiness signal but didn't move the INSERT earlier. The race was latent before but became load-bearing after.
|
Before #235, readiness was determined via a canary probe through Cloudflare's edge — which didn't need CP-side auth, so the INSERT ordering didn't matter. #235 made boot-events the primary readiness signal but didn't move the INSERT earlier. The race was latent before but became load-bearing after.
|
||||||
|
|
||||||
@ -90,7 +90,7 @@ bootReady, _ := provisioner.WaitForTenantReady(ctx, h.db, org.ID, 4*time.Minute)
|
|||||||
h.db.ExecContext(ctx, `UPDATE org_instances SET status = 'running' WHERE org_id = $1`, org.ID)
|
h.db.ExecContext(ctx, `UPDATE org_instances SET status = 'running' WHERE org_id = $1`, org.ID)
|
||||||
```
|
```
|
||||||
|
|
||||||
See [molecule-controlplane#238](https://git.moleculesai.app/molecule-ai/molecule-controlplane/pull/238) for the full diff.
|
See [molecule-controlplane#238](https://github.com/Molecule-AI/molecule-controlplane/pull/238) for the full diff.
|
||||||
|
|
||||||
## Lessons
|
## Lessons
|
||||||
|
|
||||||
@ -122,9 +122,9 @@ Early investigation blamed the hermes provider 401 bug (a separate, known issue
|
|||||||
|
|
||||||
## Follow-ups
|
## Follow-ups
|
||||||
|
|
||||||
- [ ] Land [molecule-controlplane#238](https://git.moleculesai.app/molecule-ai/molecule-controlplane/pull/238)
|
- [ ] Land [molecule-controlplane#238](https://github.com/Molecule-AI/molecule-controlplane/pull/238)
|
||||||
- [ ] Redeploy staging-api, verify E2E goes green
|
- [ ] Redeploy staging-api, verify E2E goes green
|
||||||
- [ ] Add CP integration test suite (see lesson #2)
|
- [ ] Add CP integration test suite (see lesson #2)
|
||||||
- [ ] Wire E2E failure → notification (see lesson #3)
|
- [ ] Wire E2E failure → notification (see lesson #3)
|
||||||
- [ ] Add invariant comment in `provisionTenant` (see lesson #4)
|
- [ ] Add invariant comment in `provisionTenant` (see lesson #4)
|
||||||
- [ ] Cloudflare DNS quota cleanup — [molecule-controlplane#239](https://git.moleculesai.app/molecule-ai/molecule-controlplane/issues/239)
|
- [ ] Cloudflare DNS quota cleanup — [molecule-controlplane#239](https://github.com/Molecule-AI/molecule-controlplane/issues/239)
|
||||||
|
|||||||
@ -138,5 +138,5 @@ If you see any of these, don't try to "clean it up in place" — **cherry-pick o
|
|||||||
|
|
||||||
## Related
|
## Related
|
||||||
|
|
||||||
- [Issue #1822](https://git.moleculesai.app/molecule-ai/molecule-core/issues/1822) — backend parity drift tracker (example of docs that have to stay current)
|
- [Issue #1822](https://github.com/Molecule-AI/molecule-core/issues/1822) — backend parity drift tracker (example of docs that have to stay current)
|
||||||
- [Postmortem: CP boot-event 401](./postmortem-2026-04-23-boot-event-401.md) — caught before shipping because a reviewer could read the diff
|
- [Postmortem: CP boot-event 401](./postmortem-2026-04-23-boot-event-401.md) — caught before shipping because a reviewer could read the diff
|
||||||
|
|||||||
@ -1,147 +0,0 @@
|
|||||||
# Rate-limit observability runbook
|
|
||||||
|
|
||||||
> Companion to issue #64 ("RATE_LIMIT default re-tune analysis"). After
|
|
||||||
> #60 deployed the per-tenant `keyFor` keying, the right RATE_LIMIT
|
|
||||||
> default became data-dependent. This runbook documents the metrics +
|
|
||||||
> queries an operator should run to confirm whether the current 600
|
|
||||||
> req/min/key default is correct, too tight, or too loose.
|
|
||||||
|
|
||||||
## What's already exposed
|
|
||||||
|
|
||||||
The workspace-server's existing Prometheus middleware
|
|
||||||
(`workspace-server/internal/metrics/metrics.go`) tracks every request
|
|
||||||
on every path:
|
|
||||||
|
|
||||||
```
|
|
||||||
molecule_http_requests_total{method, path, status} counter
|
|
||||||
molecule_http_request_duration_seconds_total{method,path,status} counter
|
|
||||||
```
|
|
||||||
|
|
||||||
Path is the matched route pattern (`/workspaces/:id/activity` etc), so
|
|
||||||
high-cardinality workspace UUIDs do not explode the label space.
|
|
||||||
|
|
||||||
The rate limiter middleware (#60, `workspace-server/internal/middleware/ratelimit.go`)
|
|
||||||
also stamps every response with `X-RateLimit-Limit`, `X-RateLimit-Remaining`,
|
|
||||||
and `X-RateLimit-Reset`. Operators with browser-side or proxy-side
|
|
||||||
header capture can read per-request bucket state directly.
|
|
||||||
|
|
||||||
No new instrumentation is needed for #64's acceptance criteria. The
|
|
||||||
metric surface is sufficient — this runbook just collects the queries.
|
|
||||||
|
|
||||||
## Queries to run after #60 deploys
|
|
||||||
|
|
||||||
### 1. Is the bucket actually firing 429s?
|
|
||||||
|
|
||||||
```promql
|
|
||||||
sum(rate(molecule_http_requests_total{status="429"}[5m]))
|
|
||||||
```
|
|
||||||
|
|
||||||
If this is zero on a given tenant, the bucket isn't being hit. If it's
|
|
||||||
sustained > 1/min, dig in.
|
|
||||||
|
|
||||||
### 2. Which routes attract 429s?
|
|
||||||
|
|
||||||
```promql
|
|
||||||
topk(
|
|
||||||
10,
|
|
||||||
sum by (path) (
|
|
||||||
rate(molecule_http_requests_total{status="429"}[5m])
|
|
||||||
)
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
Expected shape post-#60:
|
|
||||||
- `/workspaces/:id/activity` should be near zero — the canvas no longer
|
|
||||||
polls it on a 30s/60s/5s cadence (PRs #69 / #71 / #76).
|
|
||||||
- Probe / health / heartbeat paths should be ~0 (those routes have a
|
|
||||||
separate IP-fallback bucket).
|
|
||||||
|
|
||||||
If `/workspaces/:id/activity` 429s persist post-PRs-69/71/76 deploy, the
|
|
||||||
canvas isn't running the WS-subscriber path — investigate WS health
|
|
||||||
on that tenant.
|
|
||||||
|
|
||||||
### 3. Per-bucket-key inference (no direct exposure today)
|
|
||||||
|
|
||||||
The bucket map itself is in-memory only; we deliberately do **not**
|
|
||||||
expose `org:<uuid>` ↔ remaining-tokens because that map can include
|
|
||||||
SHA-256 hashes of bearer tokens. A tenant that wants per-key visibility
|
|
||||||
should rely on response headers (`X-RateLimit-Remaining` on every
|
|
||||||
response from a given session is the bucket's view of that session).
|
|
||||||
|
|
||||||
If you genuinely need server-side per-bucket counts for triage,
|
|
||||||
file a follow-up — the proper shape is a `/internal/ratelimit-stats`
|
|
||||||
endpoint that emits **counts per key prefix only** (e.g. `org:`, `tok:`,
|
|
||||||
`ip:`), never the key payloads. Don't roll that ad-hoc; it's a security
|
|
||||||
review surface.
|
|
||||||
|
|
||||||
## Decision tree for the re-tune
|
|
||||||
|
|
||||||
After 14 days of production traffic on a tenant, look at the queries
|
|
||||||
above and walk this tree:
|
|
||||||
|
|
||||||
```
|
|
||||||
Q1: Is the 429 rate sustained > 0.1/sec on any tenant?
|
|
||||||
├─ NO → The 600 default has comfortable headroom. Either keep it,
|
|
||||||
│ or lower it carefully (300) ONLY if you have a documented
|
|
||||||
│ reason (e.g. a misbehaving client we want to throttle harder).
|
|
||||||
│ Default to "no change" — see #64 for the math.
|
|
||||||
└─ YES → Q2.
|
|
||||||
|
|
||||||
Q2: Is the 429 rate concentrated on ONE tenant or spread across many?
|
|
||||||
├─ ONE tenant → Operator override: set RATE_LIMIT=1200 or 1800 on that
|
|
||||||
│ tenant's box. Document in the tenant's ops note. The
|
|
||||||
│ default does not need to change.
|
|
||||||
└─ MANY tenants → Q3.
|
|
||||||
|
|
||||||
Q3: Are the 429s on a route that polls (e.g. /activity / /peers)?
|
|
||||||
├─ YES → Confirm PRs #69, #71, #76 have actually deployed to those
|
|
||||||
│ tenants. If they have and 429s persist, the canvas may have
|
|
||||||
│ a regression — do not raise RATE_LIMIT. File a canvas issue.
|
|
||||||
└─ NO → 429s on mutating routes mean genuine load. Raise the default
|
|
||||||
to 1200 in `workspace-server/internal/router/router.go:54`.
|
|
||||||
Same PR should attach: the metric chart, the time window,
|
|
||||||
and a paragraph explaining what changed in our traffic shape.
|
|
||||||
```
|
|
||||||
|
|
||||||
## Alert rule template (drop-in for Prometheus)
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
# Sustained 429s — file is the SLO trip-wire. If this fires, walk the
|
|
||||||
# decision tree above. NB: the issue#64 acceptance criterion is "two
|
|
||||||
# weeks of metrics"; this alert is the inverse — it tells you something
|
|
||||||
# changed before the two weeks are up.
|
|
||||||
groups:
|
|
||||||
- name: workspace-server-ratelimit
|
|
||||||
rules:
|
|
||||||
- alert: WorkspaceServerRateLimit429Sustained
|
|
||||||
expr: |
|
|
||||||
sum by (instance) (
|
|
||||||
rate(molecule_http_requests_total{status="429"}[10m])
|
|
||||||
) > 0.1
|
|
||||||
for: 30m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
owner: workspace-server
|
|
||||||
annotations:
|
|
||||||
summary: "{{ $labels.instance }} sustained 429s — see ratelimit-observability runbook"
|
|
||||||
runbook: "https://git.moleculesai.app/molecule-ai/molecule-core/blob/main/docs/engineering/ratelimit-observability.md"
|
|
||||||
```
|
|
||||||
|
|
||||||
Threshold rationale: 0.1 req/s = 6/min sustained over 10min. Below
|
|
||||||
that, a 429 is almost certainly a transient burst that the canvas's
|
|
||||||
retry-once handler at `canvas/src/lib/api.ts:55` already absorbs. The
|
|
||||||
30m `for:` keeps the alert from chattering on a brief blip.
|
|
||||||
|
|
||||||
## Companion probe script
|
|
||||||
|
|
||||||
For one-off triage when an operator can reproduce the problem in their
|
|
||||||
own browser, `scripts/edge-429-probe.sh` (#62) reproduces a canvas-
|
|
||||||
sized burst against a tenant subdomain and dumps each 429's response
|
|
||||||
shape so the operator can distinguish workspace-server bucket overflow
|
|
||||||
from CF/Vercel edge rate-limiting without dashboard access.
|
|
||||||
|
|
||||||
```sh
|
|
||||||
./scripts/edge-429-probe.sh hongming.moleculesai.app --burst 80 --out /tmp/edge.txt
|
|
||||||
```
|
|
||||||
|
|
||||||
The script's report header explains how to read the output.
|
|
||||||
@ -103,9 +103,9 @@ A bad test:
|
|||||||
|
|
||||||
## Related
|
## Related
|
||||||
|
|
||||||
- [Issue #1821](https://git.moleculesai.app/molecule-ai/molecule-core/issues/1821) — policy tracking issue
|
- [Issue #1821](https://github.com/Molecule-AI/molecule-core/issues/1821) — policy tracking issue
|
||||||
- [Issue #1815](https://git.moleculesai.app/molecule-ai/molecule-core/issues/1815) — Canvas coverage instrumentation
|
- [Issue #1815](https://github.com/Molecule-AI/molecule-core/issues/1815) — Canvas coverage instrumentation
|
||||||
- [Issue #1818](https://git.moleculesai.app/molecule-ai/molecule-core/issues/1818) — Python pytest-cov
|
- [Issue #1818](https://github.com/Molecule-AI/molecule-core/issues/1818) — Python pytest-cov
|
||||||
- [Issue #1814](https://git.moleculesai.app/molecule-ai/molecule-core/issues/1814) — workspace_provision_test.go unblock
|
- [Issue #1814](https://github.com/Molecule-AI/molecule-core/issues/1814) — workspace_provision_test.go unblock
|
||||||
- [Issue #1816](https://git.moleculesai.app/molecule-ai/molecule-core/issues/1816) — tokens.go coverage
|
- [Issue #1816](https://github.com/Molecule-AI/molecule-core/issues/1816) — tokens.go coverage
|
||||||
- [Issue #1819](https://git.moleculesai.app/molecule-ai/molecule-core/issues/1819) — wsauth_middleware coverage
|
- [Issue #1819](https://github.com/Molecule-AI/molecule-core/issues/1819) — wsauth_middleware coverage
|
||||||
|
|||||||
@ -153,7 +153,7 @@ The `id` field is your workspace ID — remember it.
|
|||||||
|---|---|
|
|---|---|
|
||||||
| "Failed to send message — agent may be unreachable" | The tenant couldn't POST to your URL. Verify `curl https://<your-tunnel>/health` returns 200 from another machine. |
|
| "Failed to send message — agent may be unreachable" | The tenant couldn't POST to your URL. Verify `curl https://<your-tunnel>/health` returns 200 from another machine. |
|
||||||
| Response takes > 30s | Canvas times out around 30s. Keep initial implementations simple. For long-running work, return a placeholder and use [polling mode](#next-step-polling-mode-preview) (once available). |
|
| Response takes > 30s | Canvas times out around 30s. Keep initial implementations simple. For long-running work, return a placeholder and use [polling mode](#next-step-polling-mode-preview) (once available). |
|
||||||
| Agent duplicated in chat | Known canvas bug where WebSocket + HTTP responses both render. Fixed in [PR #1517](https://git.moleculesai.app/molecule-ai/molecule-core/pull/1517). |
|
| Agent duplicated in chat | Known canvas bug where WebSocket + HTTP responses both render. Fixed in [PR #1517](https://github.com/Molecule-AI/molecule-core/pull/1517). |
|
||||||
| Agent replies but canvas shows "Agent unreachable" | Check the tenant can reach your URL. Cloudflare quick tunnels rotate — the URL in your canvas may point at a dead tunnel after restart. |
|
| Agent replies but canvas shows "Agent unreachable" | Check the tenant can reach your URL. Cloudflare quick tunnels rotate — the URL in your canvas may point at a dead tunnel after restart. |
|
||||||
| Getting 404 when POSTing to tenant | Add `X-Molecule-Org-Id` header. The tenant's security layer 404s unmatched origin requests by design. |
|
| Getting 404 when POSTing to tenant | Add `X-Molecule-Org-Id` header. The tenant's security layer 404s unmatched origin requests by design. |
|
||||||
|
|
||||||
@ -255,7 +255,7 @@ If all four pass and canvas still shows your agent as unreachable, see the [remo
|
|||||||
## Feedback
|
## Feedback
|
||||||
|
|
||||||
This is a new path. Tell us what broke:
|
This is a new path. Tell us what broke:
|
||||||
- Open an issue: https://git.moleculesai.app/molecule-ai/molecule-core/issues/new?labels=external-workspace
|
- Open an issue: https://github.com/Molecule-AI/molecule-core/issues/new?labels=external-workspace
|
||||||
- Join #external-workspaces on our Slack
|
- Join #external-workspaces on our Slack
|
||||||
- Submit a PR improving this doc if something tripped you up — the faster we can make the quickstart, the more developers we bring in
|
- Submit a PR improving this doc if something tripped you up — the faster we can make the quickstart, the more developers we bring in
|
||||||
|
|
||||||
|
|||||||
@ -58,11 +58,8 @@ green — proves wire shape end-to-end against a real `hermes gateway run`
|
|||||||
subprocess + stub OpenAI-compat LLM. Caught + fixed a real `KeyError`
|
subprocess + stub OpenAI-compat LLM. Caught + fixed a real `KeyError`
|
||||||
in upstream `hermes_cli/tools_config.py` (PLATFORMS dict lookup
|
in upstream `hermes_cli/tools_config.py` (PLATFORMS dict lookup
|
||||||
crashed on plugin platforms) — fix on the patched fork branch
|
crashed on plugin platforms) — fix on the patched fork branch
|
||||||
(`molecule-ai/hermes-agent` `feat/platform-adapter-plugins`, commit
|
(`HongmingWang-Rabbit/hermes-agent` `feat/platform-adapter-plugins`,
|
||||||
`18e4849e`, hosted on Gitea at
|
commit `18e4849e`). Upstream PR #18775 OPEN; CONFLICTING with main.
|
||||||
`https://git.moleculesai.app/molecule-ai/hermes-agent` — moved from the
|
|
||||||
suspended `github.com/HongmingWang-Rabbit/hermes-agent`, see
|
|
||||||
`molecule-ai/internal#72`). Upstream PR #18775 OPEN; CONFLICTING with main.
|
|
||||||
Not on critical path for our platform — patched fork is what the
|
Not on critical path for our platform — patched fork is what the
|
||||||
workspace image installs.
|
workspace image installs.
|
||||||
|
|
||||||
@ -102,7 +99,7 @@ fork needed in production.
|
|||||||
- **Plugin package**: [Molecule-AI/hermes-platform-molecule-a2a](https://git.moleculesai.app/molecule-ai/hermes-platform-molecule-a2a)
|
- **Plugin package**: [Molecule-AI/hermes-platform-molecule-a2a](https://git.moleculesai.app/molecule-ai/hermes-platform-molecule-a2a)
|
||||||
v0.1.0 — public, MIT-licensed. 11 unit tests + 8 in-process E2E
|
v0.1.0 — public, MIT-licensed. 11 unit tests + 8 in-process E2E
|
||||||
+ 4 real-subprocess E2E checkpoints all green.
|
+ 4 real-subprocess E2E checkpoints all green.
|
||||||
- **Workspace template patch**: [Molecule-AI/molecule-ai-workspace-template-hermes#32](https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-hermes/pull/32)
|
- **Workspace template patch**: [Molecule-AI/molecule-ai-workspace-template-hermes#32](https://github.com/Molecule-AI/molecule-ai-workspace-template-hermes/pull/32)
|
||||||
— Dockerfile installs the patched fork + plugin into the hermes
|
— Dockerfile installs the patched fork + plugin into the hermes
|
||||||
installer's venv; start.sh seeds `platforms.molecule-a2a` config
|
installer's venv; start.sh seeds `platforms.molecule-a2a` config
|
||||||
stanza. Pre-demo deliberately install-only; adapter.py rewrite to
|
stanza. Pre-demo deliberately install-only; adapter.py rewrite to
|
||||||
@ -159,7 +156,7 @@ intermediate shim earns its complexity.
|
|||||||
**Status:** Template SHIPPED. Repo live at
|
**Status:** Template SHIPPED. Repo live at
|
||||||
[`Molecule-AI/molecule-ai-workspace-template-codex`](https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-codex)
|
[`Molecule-AI/molecule-ai-workspace-template-codex`](https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-codex)
|
||||||
(14 files, 1411 LOC, 12/12 tests). molecule-core registration in
|
(14 files, 1411 LOC, 12/12 tests). molecule-core registration in
|
||||||
[PR #2512](https://git.moleculesai.app/molecule-ai/molecule-core/pull/2512).
|
[PR #2512](https://github.com/Molecule-AI/molecule-core/pull/2512).
|
||||||
E2E with real A2A traffic remains.
|
E2E with real A2A traffic remains.
|
||||||
|
|
||||||
**Path:** Persistent `codex app-server` stdio JSON-RPC client
|
**Path:** Persistent `codex app-server` stdio JSON-RPC client
|
||||||
|
|||||||
@ -101,7 +101,7 @@ incident-shaped.
|
|||||||
## [v1.0.0] — initial release (RFC #2728, PRs #2729-#2742)
|
## [v1.0.0] — initial release (RFC #2728, PRs #2729-#2742)
|
||||||
|
|
||||||
Initial plugin contract + 11-PR rollout. See
|
Initial plugin contract + 11-PR rollout. See
|
||||||
[issue #2728](https://git.moleculesai.app/molecule-ai/molecule-core/issues/2728)
|
[issue #2728](https://github.com/Molecule-AI/molecule-core/issues/2728)
|
||||||
for the full RFC.
|
for the full RFC.
|
||||||
|
|
||||||
Endpoints: `/v1/health`, `/v1/namespaces/{name}` (PUT/PATCH/DELETE),
|
Endpoints: `/v1/health`, `/v1/namespaces/{name}` (PUT/PATCH/DELETE),
|
||||||
|
|||||||
@ -160,11 +160,11 @@ not expose.
|
|||||||
| `molecule-skill-update-docs` | `[claude_code]` | `[claude_code, hermes]` |
|
| `molecule-skill-update-docs` | `[claude_code]` | `[claude_code, hermes]` |
|
||||||
|
|
||||||
Companion PRs:
|
Companion PRs:
|
||||||
- [molecule-ai-plugin-ecc#2](https://git.moleculesai.app/molecule-ai/molecule-ai-plugin-ecc/pull/2)
|
- [molecule-ai-plugin-ecc#2](https://github.com/Molecule-AI/molecule-ai-plugin-ecc/pull/2)
|
||||||
- [molecule-ai-plugin-superpowers#2](https://git.moleculesai.app/molecule-ai/molecule-ai-plugin-superpowers/pull/2)
|
- [molecule-ai-plugin-superpowers#2](https://github.com/Molecule-AI/molecule-ai-plugin-superpowers/pull/2)
|
||||||
- [molecule-ai-plugin-molecule-dev#2](https://git.moleculesai.app/molecule-ai/molecule-ai-plugin-molecule-dev/pull/2)
|
- [molecule-ai-plugin-molecule-dev#2](https://github.com/Molecule-AI/molecule-ai-plugin-molecule-dev/pull/2)
|
||||||
- [molecule-ai-plugin-molecule-skill-cron-learnings#2](https://git.moleculesai.app/molecule-ai/molecule-ai-plugin-molecule-skill-cron-learnings/pull/2)
|
- [molecule-ai-plugin-molecule-skill-cron-learnings#2](https://github.com/Molecule-AI/molecule-ai-plugin-molecule-skill-cron-learnings/pull/2)
|
||||||
- [molecule-ai-plugin-molecule-skill-update-docs#2](https://git.moleculesai.app/molecule-ai/molecule-ai-plugin-molecule-skill-update-docs/pull/2)
|
- [molecule-ai-plugin-molecule-skill-update-docs#2](https://github.com/Molecule-AI/molecule-ai-plugin-molecule-skill-update-docs/pull/2)
|
||||||
|
|
||||||
Security note: Security Auditor was offline at time of change. Self-assessed
|
Security note: Security Auditor was offline at time of change. Self-assessed
|
||||||
as non-security-impacting — adding `hermes` to a string list in `plugin.yaml`
|
as non-security-impacting — adding `hermes` to a string list in `plugin.yaml`
|
||||||
|
|||||||
@ -1,137 +0,0 @@
|
|||||||
# Runbook — Handlers Postgres Integration port-collision substrate
|
|
||||||
|
|
||||||
**Status:** Resolved 2026-05-08 (PR for class B Hongming-owned CICD red sweep).
|
|
||||||
|
|
||||||
## Symptom
|
|
||||||
|
|
||||||
`Handlers Postgres Integration` workflow fails on staging push and PRs.
|
|
||||||
Step `Apply migrations to Postgres service` shows:
|
|
||||||
|
|
||||||
```
|
|
||||||
psql: error: connection to server at "127.0.0.1", port 5432 failed: Connection refused
|
|
||||||
```
|
|
||||||
|
|
||||||
Job-cleanup step further down logs:
|
|
||||||
|
|
||||||
```
|
|
||||||
Cleaning up services for job Handlers Postgres Integration
|
|
||||||
failed to remove container: Error response from daemon: No such container: <id>
|
|
||||||
```
|
|
||||||
|
|
||||||
…confirming the postgres service container was already gone before
|
|
||||||
cleanup ran.
|
|
||||||
|
|
||||||
## Root cause
|
|
||||||
|
|
||||||
Our Gitea act_runner (operator host `5.78.80.188`,
|
|
||||||
`/opt/molecule/runners/config.yaml`) sets:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
container:
|
|
||||||
network: host
|
|
||||||
```
|
|
||||||
|
|
||||||
…which act_runner applies to BOTH the job container AND every
|
|
||||||
`services:` container in a workflow. Multiple workflow instances
|
|
||||||
running concurrently across the 16 parallel runners each try to bind
|
|
||||||
postgres on `0.0.0.0:5432`. The first wins; subsequent instances exit
|
|
||||||
immediately with:
|
|
||||||
|
|
||||||
```
|
|
||||||
LOG: could not bind IPv4 address "0.0.0.0": Address in use
|
|
||||||
HINT: Is another postmaster already running on port 5432?
|
|
||||||
FATAL: could not create any TCP/IP sockets
|
|
||||||
```
|
|
||||||
|
|
||||||
act_runner sets `AutoRemove:true` on service containers, so Docker
|
|
||||||
garbage-collects them as soon as they exit. By the time the migrations
|
|
||||||
step runs `pg_isready` / `psql`, the container is gone and connection
|
|
||||||
refused.
|
|
||||||
|
|
||||||
Reproduction (operator host):
|
|
||||||
|
|
||||||
```bash
|
|
||||||
docker run --rm -d --name pg-A --network host \
|
|
||||||
-e POSTGRES_PASSWORD=test postgres:15-alpine
|
|
||||||
docker run -d --name pg-B --network host \
|
|
||||||
-e POSTGRES_PASSWORD=test postgres:15-alpine
|
|
||||||
docker logs pg-B # FATAL: could not create any TCP/IP sockets
|
|
||||||
```
|
|
||||||
|
|
||||||
## Why per-job override doesn't work
|
|
||||||
|
|
||||||
The natural fix — per-job `container.network` override — is silently
|
|
||||||
ignored by act_runner. The runner log emits:
|
|
||||||
|
|
||||||
```
|
|
||||||
--network and --net in the options will be ignored.
|
|
||||||
```
|
|
||||||
|
|
||||||
This is a documented act_runner constraint: container network is a
|
|
||||||
runner-wide setting, not per-job. Source: gitea/act_runner config docs
|
|
||||||
+ vegardit/docker-gitea-act-runner issue #7.
|
|
||||||
|
|
||||||
Flipping the global `container.network` to `bridge` would break every
|
|
||||||
other workflow in the repo (cache server discovery,
|
|
||||||
`molecule-monorepo-net` peer access during integration tests, etc.) —
|
|
||||||
unacceptable blast radius for a per-test bug.
|
|
||||||
|
|
||||||
## Fix shape
|
|
||||||
|
|
||||||
`handlers-postgres-integration.yml` no longer uses `services: postgres:`.
|
|
||||||
It launches a sibling postgres container manually on the existing
|
|
||||||
`molecule-monorepo-net` bridge network with a per-run unique name:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
env:
|
|
||||||
PG_NAME: pg-handlers-${{ github.run_id }}-${{ github.run_attempt }}
|
|
||||||
PG_NETWORK: molecule-monorepo-net
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Start sibling Postgres on bridge network
|
|
||||||
run: |
|
|
||||||
docker run -d --name "${PG_NAME}" --network "${PG_NETWORK}" \
|
|
||||||
...
|
|
||||||
postgres:15-alpine
|
|
||||||
PG_HOST=$(docker inspect "${PG_NAME}" \
|
|
||||||
--format "{{(index .NetworkSettings.Networks \"${PG_NETWORK}\").IPAddress}}")
|
|
||||||
echo "PG_HOST=${PG_HOST}" >> "$GITHUB_ENV"
|
|
||||||
|
|
||||||
# … migrations + tests use ${PG_HOST}, not 127.0.0.1 …
|
|
||||||
|
|
||||||
- if: always() && …
|
|
||||||
name: Stop sibling Postgres
|
|
||||||
run: docker rm -f "${PG_NAME}" || true
|
|
||||||
```
|
|
||||||
|
|
||||||
The host-net job container can reach a bridge-net container via the
|
|
||||||
bridge IP directly (verified manually, 2026-05-08). Two parallel runs
|
|
||||||
use different names + different bridge IPs — no collision.
|
|
||||||
|
|
||||||
## Future-proofing
|
|
||||||
|
|
||||||
Other workflows that hit the same shape (any `services:` with a
|
|
||||||
fixed-port image) will exhibit the same failure mode under
|
|
||||||
host-network runner config. Translate using this same pattern:
|
|
||||||
|
|
||||||
1. Drop the `services:` block.
|
|
||||||
2. Use `${{ github.run_id }}-${{ github.run_attempt }}` for unique
|
|
||||||
container name.
|
|
||||||
3. Launch on `molecule-monorepo-net` (already trusted bridge in
|
|
||||||
`docker-compose.infra.yml`).
|
|
||||||
4. Read back the bridge IP via `docker inspect` and export as a step env.
|
|
||||||
5. `if: always()` cleanup step at the end.
|
|
||||||
|
|
||||||
If the count of such workflows grows, factor into a composite action
|
|
||||||
(`./.github/actions/sibling-postgres`) so the substrate logic lives
|
|
||||||
in one place.
|
|
||||||
|
|
||||||
## Related
|
|
||||||
|
|
||||||
- Issue #88 (closed by #92): localhost → 127.0.0.1 fix that unmasked
|
|
||||||
this collision; the IPv6 fix is correct, port collision is the new
|
|
||||||
layer.
|
|
||||||
- Issue #94 created `molecule-monorepo-net` + `alpine:latest` as
|
|
||||||
prereqs.
|
|
||||||
- Saved memory `feedback_act_runner_github_server_url` documents
|
|
||||||
another act_runner-vs-GHA divergence (server URL).
|
|
||||||
@ -198,7 +198,7 @@ Lighthouse audit against staging.yourapp.com:
|
|||||||
FCP: 2.4s | LCP: 5.2s | CLS: 0.18 | TBT: 620ms
|
FCP: 2.4s | LCP: 5.2s | CLS: 0.18 | TBT: 620ms
|
||||||
|
|
||||||
Performance regression detected — opening GitHub issue.
|
Performance regression detected — opening GitHub issue.
|
||||||
Issue: https://git.moleculesai.app/molecule-ai/molecule-core/issues/1527
|
Issue: https://github.com/Molecule-AI/molecule-core/issues/1527
|
||||||
Label: performance-regression | Assignees: @your-team
|
Label: performance-regression | Assignees: @your-team
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@ -85,8 +85,8 @@ Fly Machines start in milliseconds and run in 35+ regions. Provisioning agent wo
|
|||||||
|
|
||||||
## Related
|
## Related
|
||||||
|
|
||||||
- PR #501: [feat(platform): Fly Machines provisioner](https://git.moleculesai.app/molecule-ai/molecule-core/pull/501)
|
- PR #501: [feat(platform): Fly Machines provisioner](https://github.com/Molecule-AI/molecule-core/pull/501)
|
||||||
- PR #481: [feat(ci): deploy to Fly after image push](https://git.moleculesai.app/molecule-ai/molecule-core/pull/481)
|
- PR #481: [feat(ci): deploy to Fly after image push](https://github.com/Molecule-AI/molecule-core/pull/481)
|
||||||
- [Fly Machines API docs](https://fly.io/docs/machines/api/)
|
- [Fly Machines API docs](https://fly.io/docs/machines/api/)
|
||||||
- [Platform API reference](../api-reference.md)
|
- [Platform API reference](../api-reference.md)
|
||||||
- Issue [#525](https://git.moleculesai.app/molecule-ai/molecule-core/issues/525)
|
- Issue [#525](https://github.com/Molecule-AI/molecule-core/issues/525)
|
||||||
|
|||||||
@ -61,6 +61,6 @@ The real power surfaces when you mix runtimes on the same Molecule AI tenant. Yo
|
|||||||
|
|
||||||
## Related
|
## Related
|
||||||
|
|
||||||
- PR #379: [feat(adapters): add gemini-cli runtime adapter](https://git.moleculesai.app/molecule-ai/molecule-core/pull/379)
|
- PR #379: [feat(adapters): add gemini-cli runtime adapter](https://github.com/Molecule-AI/molecule-core/pull/379)
|
||||||
- [Multi-provider Hermes docs](../architecture/hermes.md)
|
- [Multi-provider Hermes docs](../architecture/hermes.md)
|
||||||
- [Workspace runtimes reference](../reference/runtimes.md)
|
- [Workspace runtimes reference](../reference/runtimes.md)
|
||||||
|
|||||||
@ -68,7 +68,7 @@ ADK workspaces participate in the same A2A network as Claude Code, Gemini CLI, H
|
|||||||
|
|
||||||
## Related
|
## Related
|
||||||
|
|
||||||
- PR #550: [feat(adapters): add google-adk runtime adapter](https://git.moleculesai.app/molecule-ai/molecule-core/pull/550)
|
- PR #550: [feat(adapters): add google-adk runtime adapter](https://github.com/Molecule-AI/molecule-core/pull/550)
|
||||||
- [Google ADK (adk-python)](https://github.com/google/adk-python)
|
- [Google ADK (adk-python)](https://github.com/google/adk-python)
|
||||||
- [Gemini CLI runtime tutorial](./gemini-cli-runtime.md)
|
- [Gemini CLI runtime tutorial](./gemini-cli-runtime.md)
|
||||||
- [Platform API reference](../api-reference.md)
|
- [Platform API reference](../api-reference.md)
|
||||||
|
|||||||
@ -176,9 +176,9 @@ What is on the roadmap for Phase 2d (not yet shipped):
|
|||||||
|
|
||||||
## Related
|
## Related
|
||||||
|
|
||||||
- PR #240: [Phase 2a — native Anthropic dispatch](https://git.moleculesai.app/molecule-ai/molecule-core/pull/240)
|
- PR #240: [Phase 2a — native Anthropic dispatch](https://github.com/Molecule-AI/molecule-core/pull/240)
|
||||||
- PR #255: [Phase 2b — native Gemini dispatch](https://git.moleculesai.app/molecule-ai/molecule-core/pull/255)
|
- PR #255: [Phase 2b — native Gemini dispatch](https://github.com/Molecule-AI/molecule-core/pull/255)
|
||||||
- PR #267: [Phase 2c — multi-turn history on all paths](https://git.moleculesai.app/molecule-ai/molecule-core/pull/267)
|
- PR #267: [Phase 2c — multi-turn history on all paths](https://github.com/Molecule-AI/molecule-core/pull/267)
|
||||||
- [Hermes adapter design](../adapters/hermes-adapter-design.md)
|
- [Hermes adapter design](../adapters/hermes-adapter-design.md)
|
||||||
- [Platform API reference](../api-reference.md)
|
- [Platform API reference](../api-reference.md)
|
||||||
- Issue [#513](https://git.moleculesai.app/molecule-ai/molecule-core/issues/513)
|
- Issue [#513](https://github.com/Molecule-AI/molecule-core/issues/513)
|
||||||
|
|||||||
@ -90,6 +90,6 @@ Molecule AI canvas without code changes.
|
|||||||
|
|
||||||
## Related
|
## Related
|
||||||
|
|
||||||
- PR #480: [feat(channels): Lark / Feishu channel adapter](https://git.moleculesai.app/molecule-ai/molecule-core/pull/480)
|
- PR #480: [feat(channels): Lark / Feishu channel adapter](https://github.com/Molecule-AI/molecule-core/pull/480)
|
||||||
- [Social channels architecture](../agent-runtime/social-channels.md)
|
- [Social channels architecture](../agent-runtime/social-channels.md)
|
||||||
- [Channel adapter reference](../api-reference.md#channels)
|
- [Channel adapter reference](../api-reference.md#channels)
|
||||||
@ -1,46 +1,46 @@
|
|||||||
{
|
{
|
||||||
"_comment": "OSS surface registry — every repo listed here MUST be public on git.moleculesai.app. Layer-3 customer/private templates are NOT registered here; they are handled at provision-time via the per-tenant credential resolver (see internal#102 RFC). 'main' refs are pinned to tags before broad rollout.",
|
"_comment": "Pin refs to release tags for reproducible builds. 'main' is OK while all repos are internal.",
|
||||||
"version": 1,
|
"version": 1,
|
||||||
"plugins": [
|
"plugins": [
|
||||||
{"name": "browser-automation", "repo": "molecule-ai/molecule-ai-plugin-browser-automation", "ref": "main"},
|
{"name": "browser-automation", "repo": "Molecule-AI/molecule-ai-plugin-browser-automation", "ref": "main"},
|
||||||
{"name": "ecc", "repo": "molecule-ai/molecule-ai-plugin-ecc", "ref": "main"},
|
{"name": "ecc", "repo": "Molecule-AI/molecule-ai-plugin-ecc", "ref": "main"},
|
||||||
{"name": "gh-identity", "repo": "molecule-ai/molecule-ai-plugin-gh-identity", "ref": "main"},
|
{"name": "gh-identity", "repo": "Molecule-AI/molecule-ai-plugin-gh-identity", "ref": "main"},
|
||||||
{"name": "molecule-audit", "repo": "molecule-ai/molecule-ai-plugin-molecule-audit", "ref": "main"},
|
{"name": "molecule-audit", "repo": "Molecule-AI/molecule-ai-plugin-molecule-audit", "ref": "main"},
|
||||||
{"name": "molecule-audit-trail", "repo": "molecule-ai/molecule-ai-plugin-molecule-audit-trail", "ref": "main"},
|
{"name": "molecule-audit-trail", "repo": "Molecule-AI/molecule-ai-plugin-molecule-audit-trail", "ref": "main"},
|
||||||
{"name": "molecule-careful-bash", "repo": "molecule-ai/molecule-ai-plugin-molecule-careful-bash", "ref": "main"},
|
{"name": "molecule-careful-bash", "repo": "Molecule-AI/molecule-ai-plugin-molecule-careful-bash", "ref": "main"},
|
||||||
{"name": "molecule-compliance", "repo": "molecule-ai/molecule-ai-plugin-molecule-compliance", "ref": "main"},
|
{"name": "molecule-compliance", "repo": "Molecule-AI/molecule-ai-plugin-molecule-compliance", "ref": "main"},
|
||||||
{"name": "molecule-dev", "repo": "molecule-ai/molecule-ai-plugin-molecule-dev", "ref": "main"},
|
{"name": "molecule-dev", "repo": "Molecule-AI/molecule-ai-plugin-molecule-dev", "ref": "main"},
|
||||||
{"name": "molecule-freeze-scope", "repo": "molecule-ai/molecule-ai-plugin-molecule-freeze-scope", "ref": "main"},
|
{"name": "molecule-freeze-scope", "repo": "Molecule-AI/molecule-ai-plugin-molecule-freeze-scope", "ref": "main"},
|
||||||
{"name": "molecule-hitl", "repo": "molecule-ai/molecule-ai-plugin-molecule-hitl", "ref": "main"},
|
{"name": "molecule-hitl", "repo": "Molecule-AI/molecule-ai-plugin-molecule-hitl", "ref": "main"},
|
||||||
{"name": "molecule-prompt-watchdog", "repo": "molecule-ai/molecule-ai-plugin-molecule-prompt-watchdog", "ref": "main"},
|
{"name": "molecule-prompt-watchdog", "repo": "Molecule-AI/molecule-ai-plugin-molecule-prompt-watchdog", "ref": "main"},
|
||||||
{"name": "molecule-security-scan", "repo": "molecule-ai/molecule-ai-plugin-molecule-security-scan", "ref": "main"},
|
{"name": "molecule-security-scan", "repo": "Molecule-AI/molecule-ai-plugin-molecule-security-scan", "ref": "main"},
|
||||||
{"name": "molecule-session-context", "repo": "molecule-ai/molecule-ai-plugin-molecule-session-context", "ref": "main"},
|
{"name": "molecule-session-context", "repo": "Molecule-AI/molecule-ai-plugin-molecule-session-context", "ref": "main"},
|
||||||
{"name": "molecule-skill-code-review", "repo": "molecule-ai/molecule-ai-plugin-molecule-skill-code-review", "ref": "main"},
|
{"name": "molecule-skill-code-review", "repo": "Molecule-AI/molecule-ai-plugin-molecule-skill-code-review", "ref": "main"},
|
||||||
{"name": "molecule-skill-cron-learnings", "repo": "molecule-ai/molecule-ai-plugin-molecule-skill-cron-learnings", "ref": "main"},
|
{"name": "molecule-skill-cron-learnings", "repo": "Molecule-AI/molecule-ai-plugin-molecule-skill-cron-learnings", "ref": "main"},
|
||||||
{"name": "molecule-skill-cross-vendor-review", "repo": "molecule-ai/molecule-ai-plugin-molecule-skill-cross-vendor-review", "ref": "main"},
|
{"name": "molecule-skill-cross-vendor-review", "repo": "Molecule-AI/molecule-ai-plugin-molecule-skill-cross-vendor-review", "ref": "main"},
|
||||||
{"name": "molecule-skill-llm-judge", "repo": "molecule-ai/molecule-ai-plugin-molecule-skill-llm-judge", "ref": "main"},
|
{"name": "molecule-skill-llm-judge", "repo": "Molecule-AI/molecule-ai-plugin-molecule-skill-llm-judge", "ref": "main"},
|
||||||
{"name": "molecule-skill-update-docs", "repo": "molecule-ai/molecule-ai-plugin-molecule-skill-update-docs", "ref": "main"},
|
{"name": "molecule-skill-update-docs", "repo": "Molecule-AI/molecule-ai-plugin-molecule-skill-update-docs", "ref": "main"},
|
||||||
{"name": "molecule-workflow-retro", "repo": "molecule-ai/molecule-ai-plugin-molecule-workflow-retro", "ref": "main"},
|
{"name": "molecule-workflow-retro", "repo": "Molecule-AI/molecule-ai-plugin-molecule-workflow-retro", "ref": "main"},
|
||||||
{"name": "molecule-workflow-triage", "repo": "molecule-ai/molecule-ai-plugin-molecule-workflow-triage", "ref": "main"},
|
{"name": "molecule-workflow-triage", "repo": "Molecule-AI/molecule-ai-plugin-molecule-workflow-triage", "ref": "main"},
|
||||||
{"name": "superpowers", "repo": "molecule-ai/molecule-ai-plugin-superpowers", "ref": "main"}
|
{"name": "superpowers", "repo": "Molecule-AI/molecule-ai-plugin-superpowers", "ref": "main"}
|
||||||
],
|
],
|
||||||
"workspace_templates": [
|
"workspace_templates": [
|
||||||
{"name": "claude-code-default", "repo": "molecule-ai/molecule-ai-workspace-template-claude-code", "ref": "main"},
|
{"name": "claude-code-default", "repo": "Molecule-AI/molecule-ai-workspace-template-claude-code", "ref": "main"},
|
||||||
{"name": "hermes", "repo": "molecule-ai/molecule-ai-workspace-template-hermes", "ref": "main"},
|
{"name": "hermes", "repo": "Molecule-AI/molecule-ai-workspace-template-hermes", "ref": "main"},
|
||||||
{"name": "openclaw", "repo": "molecule-ai/molecule-ai-workspace-template-openclaw", "ref": "main"},
|
{"name": "openclaw", "repo": "Molecule-AI/molecule-ai-workspace-template-openclaw", "ref": "main"},
|
||||||
{"name": "codex", "repo": "molecule-ai/molecule-ai-workspace-template-codex", "ref": "main"},
|
{"name": "codex", "repo": "Molecule-AI/molecule-ai-workspace-template-codex", "ref": "main"},
|
||||||
{"name": "langgraph", "repo": "molecule-ai/molecule-ai-workspace-template-langgraph", "ref": "main"},
|
{"name": "langgraph", "repo": "Molecule-AI/molecule-ai-workspace-template-langgraph", "ref": "main"},
|
||||||
{"name": "crewai", "repo": "molecule-ai/molecule-ai-workspace-template-crewai", "ref": "main"},
|
{"name": "crewai", "repo": "Molecule-AI/molecule-ai-workspace-template-crewai", "ref": "main"},
|
||||||
{"name": "autogen", "repo": "molecule-ai/molecule-ai-workspace-template-autogen", "ref": "main"},
|
{"name": "autogen", "repo": "Molecule-AI/molecule-ai-workspace-template-autogen", "ref": "main"},
|
||||||
{"name": "deepagents", "repo": "molecule-ai/molecule-ai-workspace-template-deepagents", "ref": "main"},
|
{"name": "deepagents", "repo": "Molecule-AI/molecule-ai-workspace-template-deepagents", "ref": "main"},
|
||||||
{"name": "gemini-cli", "repo": "molecule-ai/molecule-ai-workspace-template-gemini-cli", "ref": "main"}
|
{"name": "gemini-cli", "repo": "Molecule-AI/molecule-ai-workspace-template-gemini-cli", "ref": "main"}
|
||||||
],
|
],
|
||||||
"org_templates": [
|
"org_templates": [
|
||||||
{"name": "molecule-dev", "repo": "molecule-ai/molecule-ai-org-template-molecule-dev", "ref": "main"},
|
{"name": "molecule-dev", "repo": "Molecule-AI/molecule-ai-org-template-molecule-dev", "ref": "main"},
|
||||||
{"name": "free-beats-all", "repo": "molecule-ai/molecule-ai-org-template-free-beats-all", "ref": "main"},
|
{"name": "free-beats-all", "repo": "Molecule-AI/molecule-ai-org-template-free-beats-all", "ref": "main"},
|
||||||
{"name": "medo-smoke", "repo": "molecule-ai/molecule-ai-org-template-medo-smoke", "ref": "main"},
|
{"name": "medo-smoke", "repo": "Molecule-AI/molecule-ai-org-template-medo-smoke", "ref": "main"},
|
||||||
{"name": "molecule-worker-gemini", "repo": "molecule-ai/molecule-ai-org-template-molecule-worker-gemini", "ref": "main"},
|
{"name": "molecule-worker-gemini", "repo": "Molecule-AI/molecule-ai-org-template-molecule-worker-gemini", "ref": "main"},
|
||||||
{"name": "ux-ab-lab", "repo": "molecule-ai/molecule-ai-org-template-ux-ab-lab", "ref": "main"},
|
{"name": "reno-stars", "repo": "Molecule-AI/molecule-ai-org-template-reno-stars", "ref": "main"},
|
||||||
{"name": "mock-bigorg", "repo": "molecule-ai/molecule-ai-org-template-mock-bigorg", "ref": "main"}
|
{"name": "ux-ab-lab", "repo": "Molecule-AI/molecule-ai-org-template-ux-ab-lab", "ref": "main"}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
@ -376,7 +376,7 @@ hold:
|
|||||||
non-plugin-sourced server, which Claude Code rejects with
|
non-plugin-sourced server, which Claude Code rejects with
|
||||||
`channel_enable requires a marketplace plugin`. Until the
|
`channel_enable requires a marketplace plugin`. Until the
|
||||||
official `moleculesai/claude-code-plugin` marketplace lands
|
official `moleculesai/claude-code-plugin` marketplace lands
|
||||||
(tracking [#2936](https://git.moleculesai.app/molecule-ai/molecule-core/issues/2936)),
|
(tracking [#2936](https://github.com/Molecule-AI/molecule-core/issues/2936)),
|
||||||
operators who want push must scaffold their own local marketplace
|
operators who want push must scaffold their own local marketplace
|
||||||
under
|
under
|
||||||
`~/.claude/marketplaces/molecule-local/` containing a
|
`~/.claude/marketplaces/molecule-local/` containing a
|
||||||
@ -389,7 +389,7 @@ hold:
|
|||||||
Symptom of any condition failing: messages arrive but only via the
|
Symptom of any condition failing: messages arrive but only via the
|
||||||
poll path (every ~1–60s), not real-time. There's currently no
|
poll path (every ~1–60s), not real-time. There's currently no
|
||||||
diagnostic surfaced — `molecule-mcp doctor` (tracking
|
diagnostic surfaced — `molecule-mcp doctor` (tracking
|
||||||
[#2937](https://git.moleculesai.app/molecule-ai/molecule-core/issues/2937)) is
|
[#2937](https://github.com/Molecule-AI/molecule-core/issues/2937)) is
|
||||||
planned.
|
planned.
|
||||||
|
|
||||||
If you don't need real-time push, the default poll path works
|
If you don't need real-time push, the default poll path works
|
||||||
|
|||||||
@ -17,23 +17,12 @@
|
|||||||
#
|
#
|
||||||
# Used by .github/workflows/auto-promote-stale-alarm.yml. Logic lives
|
# Used by .github/workflows/auto-promote-stale-alarm.yml. Logic lives
|
||||||
# here (not inline in the workflow YAML) so we can:
|
# here (not inline in the workflow YAML) so we can:
|
||||||
# - Unit-test it with a fixture (see test-check-stale-promote-pr.sh)
|
# - Unit-test it with a stubbed `gh` (see test-check-stale-promote-pr.sh)
|
||||||
# - Run it ad-hoc by an operator: `scripts/check-stale-promote-pr.sh`
|
# - Run it ad-hoc by an operator: `scripts/check-stale-promote-pr.sh`
|
||||||
# - Reuse the same surface in any sibling workflow that needs the same
|
# - Reuse the same surface in any sibling workflow that needs the same
|
||||||
# check (SSOT — one detector, many callers).
|
# check (SSOT — one detector, many callers).
|
||||||
#
|
#
|
||||||
# Requires: `curl`, `jq`. `GITEA_TOKEN` (or `GITHUB_TOKEN` / `GH_TOKEN`
|
# Requires: `gh` CLI, `jq`. `GH_TOKEN` env in the workflow context.
|
||||||
# for back-compat) in the workflow context. Reads `GITHUB_SERVER_URL`
|
|
||||||
# / `GITEA_API_URL` for the Gitea base, defaulting to
|
|
||||||
# https://git.moleculesai.app/api/v1.
|
|
||||||
#
|
|
||||||
# Post-2026-05-06 (Gitea migration, issue #75): the previous version
|
|
||||||
# called `gh pr list/view/comment`, all of which hit GitHub.com's
|
|
||||||
# GraphQL or /api/v3 REST shapes. Gitea exposes /api/v1/ only (no
|
|
||||||
# GraphQL → 405, no /api/v3 → 404). So this script now talks to the
|
|
||||||
# Gitea v1 API directly via curl. The fixture-driven unit tests are
|
|
||||||
# unchanged — they bypass the live fetch via PR_FIXTURE and still pass
|
|
||||||
# the historical (GitHub-shape) JSON which `detect_stale` consumes.
|
|
||||||
|
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
@ -47,15 +36,14 @@ set -euo pipefail
|
|||||||
# alarming. Override via env for tests + edge ops.
|
# alarming. Override via env for tests + edge ops.
|
||||||
STALE_HOURS="${STALE_HOURS:-4}"
|
STALE_HOURS="${STALE_HOURS:-4}"
|
||||||
|
|
||||||
# Repo defaults to GITHUB_REPOSITORY (act_runner sets this in workflow
|
# Repo defaults to the current `gh` context. Tests pass --repo explicitly.
|
||||||
# context). Tests pass --repo explicitly.
|
|
||||||
REPO="${GITHUB_REPOSITORY:-}"
|
REPO="${GITHUB_REPOSITORY:-}"
|
||||||
|
|
||||||
# Whether to post a comment to the PR. Off by default to avoid noise on
|
# Whether to post a comment to the PR. Off by default to avoid noise on
|
||||||
# manual ad-hoc runs; the cron workflow turns it on.
|
# manual ad-hoc runs; the cron workflow turns it on.
|
||||||
POST_COMMENT="${POST_COMMENT:-false}"
|
POST_COMMENT="${POST_COMMENT:-false}"
|
||||||
|
|
||||||
# Where to read the open-PR JSON from. Empty = call Gitea live. Tests
|
# Where to read the open-PR JSON from. Empty = call `gh` live. Tests
|
||||||
# point this at a fixture file.
|
# point this at a fixture file.
|
||||||
PR_FIXTURE="${PR_FIXTURE:-}"
|
PR_FIXTURE="${PR_FIXTURE:-}"
|
||||||
|
|
||||||
@ -63,17 +51,6 @@ PR_FIXTURE="${PR_FIXTURE:-}"
|
|||||||
# the staleness math is deterministic.
|
# the staleness math is deterministic.
|
||||||
NOW_OVERRIDE="${NOW_OVERRIDE:-}"
|
NOW_OVERRIDE="${NOW_OVERRIDE:-}"
|
||||||
|
|
||||||
# Gitea API base. act_runner forwards github.server_url as
|
|
||||||
# GITHUB_SERVER_URL; for the molecule-ai fleet that's
|
|
||||||
# https://git.moleculesai.app. Append /api/v1 to get the REST root.
|
|
||||||
# Override directly via GITEA_API_URL for tests / non-default hosts.
|
|
||||||
GITEA_API_URL="${GITEA_API_URL:-${GITHUB_SERVER_URL:-https://git.moleculesai.app}/api/v1}"
|
|
||||||
|
|
||||||
# Token. Workflow context sets GITHUB_TOKEN; we accept GITEA_TOKEN as
|
|
||||||
# the explicit name and GH_TOKEN for back-compat with operator habits
|
|
||||||
# from the GitHub era. First non-empty wins.
|
|
||||||
GITEA_TOKEN="${GITEA_TOKEN:-${GITHUB_TOKEN:-${GH_TOKEN:-}}}"
|
|
||||||
|
|
||||||
while [ $# -gt 0 ]; do
|
while [ $# -gt 0 ]; do
|
||||||
case "$1" in
|
case "$1" in
|
||||||
--repo) REPO="$2"; shift 2 ;;
|
--repo) REPO="$2"; shift 2 ;;
|
||||||
@ -106,7 +83,7 @@ now_epoch() {
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
# Parse RFC3339 timestamps the way Gitea / GitHub emit them (e.g.
|
# Parse RFC3339 timestamps the way GitHub emits them (e.g.
|
||||||
# "2026-05-05T23:15:00Z"). gnu-date uses -d, bsd-date uses -j -f. Cover
|
# "2026-05-05T23:15:00Z"). gnu-date uses -d, bsd-date uses -j -f. Cover
|
||||||
# both because the workflow runs on ubuntu-latest (gnu) but operators
|
# both because the workflow runs on ubuntu-latest (gnu) but operators
|
||||||
# may run this script on macOS (bsd).
|
# may run this script on macOS (bsd).
|
||||||
@ -129,100 +106,14 @@ to_epoch() {
|
|||||||
# Fetch open auto-promote PRs
|
# Fetch open auto-promote PRs
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
|
|
||||||
# Gitea v1 returns PRs with the canonical Gitea shape (number, title,
|
|
||||||
# created_at, html_url, mergeable, state). The previous GitHub-CLI
|
|
||||||
# version returned a derived `mergeStateStatus` / `reviewDecision`
|
|
||||||
# pair which only GitHub computes — Gitea doesn't expose them
|
|
||||||
# natively. Rebuild equivalents:
|
|
||||||
#
|
|
||||||
# mergeStateStatus = BLOCKED ↔ Gitea: state==open AND mergeable==true
|
|
||||||
# AND no APPROVED review yet
|
|
||||||
# (i.e. branch protection is gating
|
|
||||||
# the auto-merge pending an approval)
|
|
||||||
# reviewDecision = REVIEW_REQUIRED ↔ Gitea: 0 APPROVED reviews
|
|
||||||
#
|
|
||||||
# This mirrors the SAME silent-block failure mode the GitHub version
|
|
||||||
# detected: auto-merge armed, branch protection requires 1 review,
|
|
||||||
# nobody's approved yet.
|
|
||||||
#
|
|
||||||
# Implementation: pull the open PR list base=main, then for each PR
|
|
||||||
# pull /pulls/{n}/reviews and synthesize the GitHub-shape JSON the
|
|
||||||
# rest of the script + the test fixtures consume.
|
|
||||||
fetch_prs() {
|
fetch_prs() {
|
||||||
if [ -n "$PR_FIXTURE" ]; then
|
if [ -n "$PR_FIXTURE" ]; then
|
||||||
cat "$PR_FIXTURE"
|
cat "$PR_FIXTURE"
|
||||||
return 0
|
return 0
|
||||||
fi
|
fi
|
||||||
if [ -z "$GITEA_TOKEN" ]; then
|
gh pr list --repo "$REPO" \
|
||||||
echo "::error::GITEA_TOKEN / GITHUB_TOKEN unset — cannot fetch PRs from $GITEA_API_URL" >&2
|
--base main --head staging --state open \
|
||||||
return 1
|
--json number,title,createdAt,mergeStateStatus,reviewDecision,url
|
||||||
fi
|
|
||||||
local prs_json
|
|
||||||
prs_json="$(curl --fail-with-body -sS \
|
|
||||||
-H "Authorization: token ${GITEA_TOKEN}" \
|
|
||||||
-H "Accept: application/json" \
|
|
||||||
"${GITEA_API_URL}/repos/${REPO}/pulls?state=open&base=main&limit=50" \
|
|
||||||
2>/dev/null)" || {
|
|
||||||
echo "::error::Failed to fetch PRs from ${GITEA_API_URL}/repos/${REPO}/pulls" >&2
|
|
||||||
return 1
|
|
||||||
}
|
|
||||||
|
|
||||||
# Filter to head=staging (the auto-promote shape) and synthesize
|
|
||||||
# mergeStateStatus + reviewDecision per PR. Approval count via
|
|
||||||
# /pulls/{n}/reviews. Errors fall through to 0-approvals (treated
|
|
||||||
# as REVIEW_REQUIRED) preserving the existing "fail-safe — alarm if
|
|
||||||
# uncertain" semantic.
|
|
||||||
local synthesized="[]"
|
|
||||||
while IFS= read -r pr; do
|
|
||||||
[ -z "$pr" ] && continue
|
|
||||||
[ "$pr" = "null" ] && continue
|
|
||||||
local num
|
|
||||||
num="$(printf '%s' "$pr" | jq -r '.number')"
|
|
||||||
[ -z "$num" ] && continue
|
|
||||||
[ "$num" = "null" ] && continue
|
|
||||||
local approved_count
|
|
||||||
approved_count="$(curl --fail-with-body -sS \
|
|
||||||
-H "Authorization: token ${GITEA_TOKEN}" \
|
|
||||||
-H "Accept: application/json" \
|
|
||||||
"${GITEA_API_URL}/repos/${REPO}/pulls/${num}/reviews" 2>/dev/null \
|
|
||||||
| jq '[.[] | select(.state == "APPROVED" and (.dismissed // false) == false)] | length' \
|
|
||||||
2>/dev/null || echo 0)"
|
|
||||||
local mergeable
|
|
||||||
mergeable="$(printf '%s' "$pr" | jq -r '.mergeable')"
|
|
||||||
local merge_state="UNKNOWN"
|
|
||||||
local review_decision="REVIEW_REQUIRED"
|
|
||||||
if [ "$mergeable" = "true" ]; then
|
|
||||||
if [ "$approved_count" -ge 1 ]; then
|
|
||||||
merge_state="CLEAN"
|
|
||||||
review_decision="APPROVED"
|
|
||||||
else
|
|
||||||
# mergeable but no approving review — exactly the wedge state
|
|
||||||
# the alarm targets.
|
|
||||||
merge_state="BLOCKED"
|
|
||||||
review_decision="REVIEW_REQUIRED"
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
# not mergeable (conflicts, behind, failed checks) — different
|
|
||||||
# failure mode, the author owns the fix; the alarm doesn't fire.
|
|
||||||
merge_state="DIRTY"
|
|
||||||
review_decision="REVIEW_REQUIRED"
|
|
||||||
fi
|
|
||||||
synthesized="$(printf '%s' "$synthesized" \
|
|
||||||
| jq -c --argjson pr "$pr" \
|
|
||||||
--arg ms "$merge_state" \
|
|
||||||
--arg rd "$review_decision" \
|
|
||||||
'. + [{
|
|
||||||
number: $pr.number,
|
|
||||||
title: $pr.title,
|
|
||||||
createdAt: $pr.created_at,
|
|
||||||
mergeStateStatus: $ms,
|
|
||||||
reviewDecision: $rd,
|
|
||||||
url: $pr.html_url
|
|
||||||
}]')"
|
|
||||||
done < <(printf '%s' "$prs_json" \
|
|
||||||
| jq -c '.[] | select(.head.ref == "staging")' 2>/dev/null)
|
|
||||||
|
|
||||||
printf '%s\n' "$synthesized"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
@ -280,40 +171,18 @@ post_comment() {
|
|||||||
if [ "$POST_COMMENT" != "true" ]; then
|
if [ "$POST_COMMENT" != "true" ]; then
|
||||||
return 0
|
return 0
|
||||||
fi
|
fi
|
||||||
if [ -z "$GITEA_TOKEN" ]; then
|
|
||||||
echo "::warning::GITEA_TOKEN unset — cannot post stale-alarm comment on PR #$pr_num" >&2
|
|
||||||
return 0
|
|
||||||
fi
|
|
||||||
# Idempotency: only one alarm comment per PR. Look for the marker
|
# Idempotency: only one alarm comment per PR. Look for the marker
|
||||||
# string in existing comments before posting a new one. Gitea's
|
# string in existing comments before posting a new one.
|
||||||
# /repos/{owner}/{repo}/issues/{n}/comments returns the same shape
|
|
||||||
# for issues + PRs (PRs are issues internally on Gitea, same as
|
|
||||||
# GitHub's REST).
|
|
||||||
local existing
|
local existing
|
||||||
existing="$(curl --fail-with-body -sS \
|
existing="$(gh pr view "$pr_num" --repo "$REPO" --json comments \
|
||||||
-H "Authorization: token ${GITEA_TOKEN}" \
|
--jq '.comments[] | select(.body | test("scripts/check-stale-promote-pr.sh per issue #2975")) | .databaseId' \
|
||||||
-H "Accept: application/json" \
|
|
||||||
"${GITEA_API_URL}/repos/${REPO}/issues/${pr_num}/comments?limit=50" 2>/dev/null \
|
|
||||||
| jq -r '.[] | select(.body | test("scripts/check-stale-promote-pr.sh per issue #2975")) | .id' \
|
|
||||||
| head -n1)"
|
| head -n1)"
|
||||||
if [ -n "$existing" ]; then
|
if [ -n "$existing" ]; then
|
||||||
echo "::notice::PR #$pr_num already has a stale-alarm comment ($existing) — not re-posting"
|
echo "::notice::PR #$pr_num already has a stale-alarm comment ($existing) — not re-posting"
|
||||||
return 0
|
return 0
|
||||||
fi
|
fi
|
||||||
local body
|
comment_body "$age_h" | gh pr comment "$pr_num" --repo "$REPO" --body-file -
|
||||||
body="$(comment_body "$age_h")"
|
echo "::notice::Posted stale-alarm comment on PR #$pr_num (age=${age_h}h)"
|
||||||
if curl --fail-with-body -sS \
|
|
||||||
-X POST \
|
|
||||||
-H "Authorization: token ${GITEA_TOKEN}" \
|
|
||||||
-H "Accept: application/json" \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
"${GITEA_API_URL}/repos/${REPO}/issues/${pr_num}/comments" \
|
|
||||||
-d "$(jq -nc --arg b "$body" '{body: $b}')" \
|
|
||||||
>/dev/null 2>&1; then
|
|
||||||
echo "::notice::Posted stale-alarm comment on PR #$pr_num (age=${age_h}h)"
|
|
||||||
else
|
|
||||||
echo "::warning::Failed to POST stale-alarm comment on PR #$pr_num" >&2
|
|
||||||
fi
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# -----------------------------------------------------------------------------
|
# -----------------------------------------------------------------------------
|
||||||
|
|||||||
@ -6,26 +6,6 @@
|
|||||||
# ./scripts/clone-manifest.sh <manifest.json> <ws-templates-dir> <org-templates-dir> <plugins-dir>
|
# ./scripts/clone-manifest.sh <manifest.json> <ws-templates-dir> <org-templates-dir> <plugins-dir>
|
||||||
#
|
#
|
||||||
# Requires: git, jq (lighter than python3 — ~2MB vs ~50MB in Alpine)
|
# Requires: git, jq (lighter than python3 — ~2MB vs ~50MB in Alpine)
|
||||||
#
|
|
||||||
# Auth (optional):
|
|
||||||
# Post-2026-05-08 (#192): every repo in manifest.json is public on
|
|
||||||
# git.moleculesai.app. Anonymous clone works for the entire registered
|
|
||||||
# set. The OSS-surface contract is recorded in manifest.json's _comment
|
|
||||||
# — Layer-3 customer/private templates (e.g. reno-stars) are NOT in the
|
|
||||||
# manifest; they are handled at provision-time via the per-tenant
|
|
||||||
# credential resolver (internal#102 RFC).
|
|
||||||
#
|
|
||||||
# MOLECULE_GITEA_TOKEN is therefore optional today. Kept supported for
|
|
||||||
# two reasons: (a) historical CI configs that still inject
|
|
||||||
# AUTO_SYNC_TOKEN remain harmless, (b) reserved for the case where a
|
|
||||||
# private internal-only template is later registered via a ci-readonly
|
|
||||||
# team grant — review must explicitly sign off on that, since it
|
|
||||||
# violates the public-OSS-surface contract.
|
|
||||||
#
|
|
||||||
# The token (when set) never enters the Docker image: this script runs
|
|
||||||
# in the trusted CI context BEFORE `docker buildx build`, populates
|
|
||||||
# .tenant-bundle-deps/, then `Dockerfile.tenant` COPYs from there with
|
|
||||||
# the .git directories already stripped (see line ~67 below).
|
|
||||||
|
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
@ -65,27 +45,11 @@ clone_category() {
|
|||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Build the clone URL. When MOLECULE_GITEA_TOKEN is set (CI path)
|
echo " cloning $repo -> $target_dir/$name (ref=$ref)"
|
||||||
# embed it as basic-auth so private repos succeed. The username
|
|
||||||
# part ("oauth2") is conventional and ignored by Gitea — only the
|
|
||||||
# token-as-password is verified.
|
|
||||||
#
|
|
||||||
# manifest.json was migrated to lowercase org slugs on
|
|
||||||
# 2026-05-07 (post-suspension reconciliation), so we use $repo
|
|
||||||
# verbatim — no on-the-fly tolower transform needed.
|
|
||||||
if [ -n "${MOLECULE_GITEA_TOKEN:-}" ]; then
|
|
||||||
clone_url="https://oauth2:${MOLECULE_GITEA_TOKEN}@git.moleculesai.app/${repo}.git"
|
|
||||||
display_url="https://oauth2:***@git.moleculesai.app/${repo}.git"
|
|
||||||
else
|
|
||||||
clone_url="https://git.moleculesai.app/${repo}.git"
|
|
||||||
display_url="$clone_url"
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo " cloning $display_url -> $target_dir/$name (ref=$ref)"
|
|
||||||
if [ "$ref" = "main" ]; then
|
if [ "$ref" = "main" ]; then
|
||||||
git clone --depth=1 -q "$clone_url" "$target_dir/$name"
|
git clone --depth=1 -q "https://github.com/${repo}.git" "$target_dir/$name"
|
||||||
else
|
else
|
||||||
git clone --depth=1 -q --branch "$ref" "$clone_url" "$target_dir/$name"
|
git clone --depth=1 -q --branch "$ref" "https://github.com/${repo}.git" "$target_dir/$name"
|
||||||
fi
|
fi
|
||||||
CLONED=$((CLONED + 1))
|
CLONED=$((CLONED + 1))
|
||||||
i=$((i + 1))
|
i=$((i + 1))
|
||||||
|
|||||||
@ -1,155 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
# edge-429-probe.sh — capture 429 origin (workspace-server vs CF/Vercel edge)
|
|
||||||
# during a simulated canvas-burst against a tenant subdomain.
|
|
||||||
#
|
|
||||||
# Issue molecule-core#62. The post-#60 verification step asks an
|
|
||||||
# operator with CF/Vercel dashboard access to confirm whether the
|
|
||||||
# layout-chunk 429s observed in DevTools were:
|
|
||||||
# (a) workspace-server bucket overflow (closes once #60 deploys), or
|
|
||||||
# (b) actual edge-layer rate-limiting (CF or Vercel).
|
|
||||||
#
|
|
||||||
# This script doesn't need dashboard access. It reproduces the burst
|
|
||||||
# pattern locally and dumps every 429's response shape so the operator
|
|
||||||
# can distinguish (a) from (b) by inspection: workspace-server emits a
|
|
||||||
# JSON body, CF emits HTML, Vercel emits a different HTML. Headers tell
|
|
||||||
# the same story (cf-ray vs x-vercel-*).
|
|
||||||
#
|
|
||||||
# Usage:
|
|
||||||
# ./scripts/edge-429-probe.sh <tenant-host> [--burst N] [--waves N] [--pause SECS] [--out file]
|
|
||||||
#
|
|
||||||
# Example:
|
|
||||||
# ./scripts/edge-429-probe.sh hongming.moleculesai.app --burst 80 --out /tmp/edge.txt
|
|
||||||
#
|
|
||||||
# The script is read-only against the target — it only issues GETs to
|
|
||||||
# public-by-design endpoints. No mutating requests, no credential use.
|
|
||||||
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
# ── Help / usage handling first, before positional capture ────────────────────
|
|
||||||
case "${1:-}" in
|
|
||||||
-h|--help|"")
|
|
||||||
sed -n '/^# edge-429-probe.sh/,/^$/p' "$0" | sed 's/^# \{0,1\}//'
|
|
||||||
exit 0
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
HOST="$1"; shift
|
|
||||||
BURST=80
|
|
||||||
WAVES=3
|
|
||||||
WAVE_PAUSE=2
|
|
||||||
OUT=""
|
|
||||||
|
|
||||||
while [ "${1:-}" != "" ]; do
|
|
||||||
case "$1" in
|
|
||||||
--burst) BURST="$2"; shift 2 ;;
|
|
||||||
--waves) WAVES="$2"; shift 2 ;;
|
|
||||||
--pause) WAVE_PAUSE="$2"; shift 2 ;;
|
|
||||||
--out) OUT="$2"; shift 2 ;;
|
|
||||||
-h|--help)
|
|
||||||
sed -n '/^# edge-429-probe.sh/,/^$/p' "$0" | sed 's/^# \{0,1\}//'
|
|
||||||
exit 0
|
|
||||||
;;
|
|
||||||
*) echo "unknown arg: $1" >&2; exit 2 ;;
|
|
||||||
esac
|
|
||||||
done
|
|
||||||
|
|
||||||
# ── Endpoint discovery ────────────────────────────────────────────────────────
|
|
||||||
echo "→ Discovering a layout-chunk URL from canvas root..." >&2
|
|
||||||
ROOT_BODY=$(curl -fsSL --max-time 10 "https://${HOST}/" 2>/dev/null || true)
|
|
||||||
LAYOUT_PATH=$(echo "$ROOT_BODY" \
|
|
||||||
| grep -oE '/_next/static/chunks/layout-[A-Za-z0-9_-]+\.js' \
|
|
||||||
| head -1 || true)
|
|
||||||
if [ -z "$LAYOUT_PATH" ]; then
|
|
||||||
LAYOUT_PATH="/_next/static/chunks/layout-probe-not-found.js"
|
|
||||||
echo " (no layout chunk discovered — using sentinel path; 404 on this is expected)" >&2
|
|
||||||
else
|
|
||||||
echo " layout chunk: $LAYOUT_PATH" >&2
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Probe URL: a generic activity endpoint. The rate-limiter middleware
|
|
||||||
# runs BEFORE workspace-id validation, so unauth/invalid-id requests
|
|
||||||
# still hit the bucket.
|
|
||||||
ACTIVITY_PATH="/workspaces/00000000-0000-0000-0000-000000000000/activity?probe=edge-429"
|
|
||||||
|
|
||||||
# ── Fire one curl, write a single-line JSON-ish status record to stdout ──────
|
|
||||||
# Inlined into xargs as a heredoc-style command rather than a function so
|
|
||||||
# the function-export pitfalls (some shells lose `export -f` across xargs)
|
|
||||||
# don't apply. Each output line is a parseable record; failed curls emit
|
|
||||||
# a curl_err record so request volume is preserved.
|
|
||||||
TMP_RESULTS="$(mktemp -t edge-429-probe.XXXXXX)"
|
|
||||||
trap 'rm -f "$TMP_RESULTS"' EXIT
|
|
||||||
|
|
||||||
run_burst() {
|
|
||||||
# $1 = path; $2 = label; $3 = wave_id
|
|
||||||
local path="$1" label="$2" wave="$3"
|
|
||||||
local i
|
|
||||||
for i in $(seq 1 "$BURST"); do
|
|
||||||
{
|
|
||||||
out=$(curl -sS --max-time 10 -o /dev/null \
|
|
||||||
-w 'status=%{http_code} size=%{size_download} time=%{time_total} server=%{header.server} cf_ray=%{header.cf-ray} x_vercel=%{header.x-vercel-id} retry_after=%{header.retry-after} content_type=%{header.content-type} x_ratelimit_limit=%{header.x-ratelimit-limit} x_ratelimit_remaining=%{header.x-ratelimit-remaining} x_ratelimit_reset=%{header.x-ratelimit-reset}\n' \
|
|
||||||
"https://${HOST}${path}" 2>/dev/null) || out="status=curl_err"
|
|
||||||
printf 'label=%s-%s-%s %s\n' "$label" "$wave" "$i" "$out" >> "$TMP_RESULTS"
|
|
||||||
} &
|
|
||||||
done
|
|
||||||
wait
|
|
||||||
}
|
|
||||||
|
|
||||||
emit() {
|
|
||||||
if [ -n "$OUT" ]; then
|
|
||||||
printf '%s\n' "$*" >> "$OUT"
|
|
||||||
else
|
|
||||||
printf '%s\n' "$*"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
if [ -n "$OUT" ]; then : > "$OUT"; fi
|
|
||||||
|
|
||||||
emit "# edge-429-probe report"
|
|
||||||
emit "# host=$HOST burst=$BURST waves=$WAVES pause=${WAVE_PAUSE}s"
|
|
||||||
emit "# layout_path=$LAYOUT_PATH"
|
|
||||||
emit "# activity_path=$ACTIVITY_PATH"
|
|
||||||
emit "# generated=$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
|
||||||
emit ""
|
|
||||||
|
|
||||||
for wave in $(seq 1 "$WAVES"); do
|
|
||||||
emit "## wave $wave"
|
|
||||||
: > "$TMP_RESULTS"
|
|
||||||
run_burst "$LAYOUT_PATH" "layout" "$wave"
|
|
||||||
run_burst "$ACTIVITY_PATH" "activity" "$wave"
|
|
||||||
while read -r line; do
|
|
||||||
emit " $line"
|
|
||||||
done < "$TMP_RESULTS"
|
|
||||||
if [ "$wave" -lt "$WAVES" ]; then
|
|
||||||
sleep "$WAVE_PAUSE"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
emit ""
|
|
||||||
emit "## summary — how to read the report"
|
|
||||||
emit "# status=429 + content_type starts with application/json + x_ratelimit_limit set"
|
|
||||||
emit "# => workspace-server bucket overflow. Closes when #60 deploys."
|
|
||||||
emit "# status=429 + cf_ray set + content_type=text/html"
|
|
||||||
emit "# => Cloudflare WAF / rate-limit. Audit dashboard rules per #62."
|
|
||||||
emit "# status=429 + x_vercel set + content_type=text/html"
|
|
||||||
emit "# => Vercel edge / Bot Fight Mode. Audit Vercel project per #62."
|
|
||||||
emit "# status=429 with no server/cf_ray/x_vercel"
|
|
||||||
emit "# => corporate proxy or VPN. Not actionable in this repo."
|
|
||||||
|
|
||||||
if [ -n "$OUT" ]; then
|
|
||||||
echo "→ Report written to $OUT" >&2
|
|
||||||
# Match only data lines (begin with two-space indent + "label="),
|
|
||||||
# not the summary's reference text which also mentions "status=429".
|
|
||||||
# grep -c outputs "0" + exits 1 when zero matches; `|| true` masks
|
|
||||||
# the exit status so set -e doesn't trip without losing the count.
|
|
||||||
total=$(grep -c '^ label=' "$OUT" 2>/dev/null || true)
|
|
||||||
total429=$(grep -c '^ label=.*status=429' "$OUT" 2>/dev/null || true)
|
|
||||||
total=${total:-0}
|
|
||||||
total429=${total429:-0}
|
|
||||||
echo "→ Totals: ${total429} of ${total} requests returned 429" >&2
|
|
||||||
if [ "${total429}" -gt 0 ]; then
|
|
||||||
echo "→ Per-label 429 counts:" >&2
|
|
||||||
grep '^ label=.*status=429' "$OUT" \
|
|
||||||
| sed -E 's/^ label=([^-]+).*/ \1/' \
|
|
||||||
| sort | uniq -c >&2
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
@ -19,15 +19,9 @@ Exit codes:
|
|||||||
0 — no collisions
|
0 — no collisions
|
||||||
1 — collision detected; output names the conflicting PR(s) for the author
|
1 — collision detected; output names the conflicting PR(s) for the author
|
||||||
|
|
||||||
Designed to run from a Gitea Actions PR check. Reads PR metadata via direct
|
Designed to run from a GitHub Actions PR check. Reads PR metadata via the
|
||||||
HTTP calls to Gitea's REST API (`/api/v1/`), which on the molecule-ai fleet
|
GitHub CLI (gh) which is preinstalled on ubuntu-latest runners. Runs in
|
||||||
lives at https://git.moleculesai.app. Runs in under 10s against a typical PR.
|
under 10s against a typical PR.
|
||||||
|
|
||||||
Post-2026-05-06 (Gitea migration, issue #75): the previous version called
|
|
||||||
the GitHub CLI (``gh pr list``, ``gh pr diff``). On Gitea those calls hit
|
|
||||||
either the GraphQL endpoint (HTTP 405) or /api/v3 (HTTP 404). This module
|
|
||||||
now talks to /api/v1 directly via urllib so it works against any Gitea
|
|
||||||
host without a `gh` install or extra dependencies.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
@ -37,70 +31,12 @@ import os
|
|||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import urllib.error
|
|
||||||
import urllib.parse
|
|
||||||
import urllib.request
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
MIGRATIONS_DIR = "workspace-server/migrations"
|
MIGRATIONS_DIR = "workspace-server/migrations"
|
||||||
MIGRATION_FILE_RE = re.compile(r"^(\d+)_[^/]+\.(up|down)\.sql$")
|
MIGRATION_FILE_RE = re.compile(r"^(\d+)_[^/]+\.(up|down)\.sql$")
|
||||||
|
|
||||||
|
|
||||||
def _gitea_api_url() -> str:
|
|
||||||
"""Resolve the Gitea API base URL.
|
|
||||||
|
|
||||||
act_runner forwards github.server_url as GITHUB_SERVER_URL; for the
|
|
||||||
molecule-ai fleet that's https://git.moleculesai.app. Append /api/v1
|
|
||||||
to get the REST root. Override directly via GITEA_API_URL for tests
|
|
||||||
or non-default hosts.
|
|
||||||
"""
|
|
||||||
env_override = os.environ.get("GITEA_API_URL", "").rstrip("/")
|
|
||||||
if env_override:
|
|
||||||
return env_override
|
|
||||||
server = os.environ.get("GITHUB_SERVER_URL", "https://git.moleculesai.app").rstrip("/")
|
|
||||||
return f"{server}/api/v1"
|
|
||||||
|
|
||||||
|
|
||||||
def _gitea_token() -> str:
|
|
||||||
"""Resolve the Gitea token from env. GITEA_TOKEN wins; falls back
|
|
||||||
to GITHUB_TOKEN (set by act_runner) and GH_TOKEN (operator habit
|
|
||||||
from the GitHub era)."""
|
|
||||||
return (
|
|
||||||
os.environ.get("GITEA_TOKEN")
|
|
||||||
or os.environ.get("GITHUB_TOKEN")
|
|
||||||
or os.environ.get("GH_TOKEN")
|
|
||||||
or ""
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _gitea_get(path: str, params: dict[str, str] | None = None) -> bytes | None:
|
|
||||||
"""GET against /api/v1; returns response body or None on HTTP error.
|
|
||||||
|
|
||||||
Errors return None (not raise) because callers handle missing data
|
|
||||||
by emitting an actionable workflow message rather than crashing the
|
|
||||||
PR check on a transient API blip.
|
|
||||||
"""
|
|
||||||
base = _gitea_api_url()
|
|
||||||
qs = ""
|
|
||||||
if params:
|
|
||||||
qs = "?" + urllib.parse.urlencode(params)
|
|
||||||
url = f"{base}/{path.lstrip('/')}{qs}"
|
|
||||||
req = urllib.request.Request(url)
|
|
||||||
token = _gitea_token()
|
|
||||||
if token:
|
|
||||||
req.add_header("Authorization", f"token {token}")
|
|
||||||
req.add_header("Accept", "application/json")
|
|
||||||
try:
|
|
||||||
with urllib.request.urlopen(req, timeout=20) as resp: # noqa: S310
|
|
||||||
return resp.read()
|
|
||||||
except urllib.error.HTTPError as e:
|
|
||||||
sys.stderr.write(f"Gitea API HTTP {e.code} on {path}: {e.reason}\n")
|
|
||||||
return None
|
|
||||||
except (urllib.error.URLError, TimeoutError) as e:
|
|
||||||
sys.stderr.write(f"Gitea API network error on {path}: {e}\n")
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def run(cmd: list[str], check: bool = True) -> str:
|
def run(cmd: list[str], check: bool = True) -> str:
|
||||||
"""Run a subprocess and return stdout. Raise on non-zero when check=True."""
|
"""Run a subprocess and return stdout. Raise on non-zero when check=True."""
|
||||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||||
@ -160,49 +96,32 @@ def open_prs_with_migration_prefix(
|
|||||||
repo: str, prefix: int, exclude_pr: int
|
repo: str, prefix: int, exclude_pr: int
|
||||||
) -> list[dict]:
|
) -> list[dict]:
|
||||||
"""Return open PRs (other than `exclude_pr`) that add a migration with
|
"""Return open PRs (other than `exclude_pr`) that add a migration with
|
||||||
`prefix`. Walks open PRs via Gitea's `/repos/{owner}/{repo}/pulls` and
|
`prefix`. Uses `gh pr diff` per PR — we only need to walk PRs that are
|
||||||
pulls each one's changed-file list via `/pulls/{n}/files`. The cost is
|
actually in flight, so the cost is bounded by open-PR count.
|
||||||
bounded by open-PR count, which is small (<100) on this repo. The
|
|
||||||
return shape mimics the GitHub CLI's `--json number,headRefName`:
|
|
||||||
``[{"number": int, "headRefName": str}, ...]``.
|
|
||||||
"""
|
"""
|
||||||
body = _gitea_get(
|
out = run([
|
||||||
f"repos/{repo}/pulls",
|
"gh", "pr", "list", "--repo", repo, "--state", "open",
|
||||||
{"state": "open", "limit": "50"},
|
"--json", "number,headRefName", "--limit", "100",
|
||||||
)
|
])
|
||||||
if body is None:
|
prs = json.loads(out)
|
||||||
# Best-effort: a transient Gitea blip shouldn't fail the PR
|
|
||||||
# check (the base-branch collision check runs locally and is
|
|
||||||
# the more common failure mode).
|
|
||||||
return []
|
|
||||||
prs = json.loads(body)
|
|
||||||
matches: list[dict] = []
|
matches: list[dict] = []
|
||||||
for pr in prs:
|
for pr in prs:
|
||||||
num = pr["number"]
|
num = pr["number"]
|
||||||
if num == exclude_pr:
|
if num == exclude_pr:
|
||||||
continue
|
continue
|
||||||
# Gitea returns the head ref under .head.ref (REST shape);
|
|
||||||
# GitHub CLI's --json headRefName flattens it. Normalize on
|
|
||||||
# the way out so callers see the historical shape.
|
|
||||||
head_ref_name = (pr.get("head") or {}).get("ref", "")
|
|
||||||
files_body = _gitea_get(f"repos/{repo}/pulls/{num}/files", {"limit": "100"})
|
|
||||||
if files_body is None:
|
|
||||||
continue
|
|
||||||
try:
|
try:
|
||||||
files = json.loads(files_body)
|
files = run([
|
||||||
except json.JSONDecodeError:
|
"gh", "pr", "diff", str(num), "--repo", repo, "--name-only",
|
||||||
|
], check=False)
|
||||||
|
except Exception: # noqa: BLE001
|
||||||
continue
|
continue
|
||||||
for f in files:
|
for raw in files.splitlines():
|
||||||
# Gitea's /pulls/{n}/files returns objects with `.filename`
|
|
||||||
# (same as GitHub's REST). Older Gitea versions emit
|
|
||||||
# `.name` instead — handle both.
|
|
||||||
raw = f.get("filename") or f.get("name") or ""
|
|
||||||
path = Path(raw.strip())
|
path = Path(raw.strip())
|
||||||
if not path.name:
|
if not path.name:
|
||||||
continue
|
continue
|
||||||
m = MIGRATION_FILE_RE.match(path.name)
|
m = MIGRATION_FILE_RE.match(path.name)
|
||||||
if m and int(m.group(1)) == prefix:
|
if m and int(m.group(1)) == prefix:
|
||||||
matches.append({"number": num, "headRefName": head_ref_name})
|
matches.append(pr)
|
||||||
break
|
break
|
||||||
return matches
|
return matches
|
||||||
|
|
||||||
@ -219,10 +138,7 @@ def main() -> int:
|
|||||||
pr_number = int(pr_number_env)
|
pr_number = int(pr_number_env)
|
||||||
base_ref = os.environ.get("BASE_REF", "origin/staging")
|
base_ref = os.environ.get("BASE_REF", "origin/staging")
|
||||||
head_ref = os.environ.get("HEAD_REF", "HEAD")
|
head_ref = os.environ.get("HEAD_REF", "HEAD")
|
||||||
# Default kept lowercase to match the Gitea-canonical org name
|
repo = os.environ.get("GITHUB_REPOSITORY", "Molecule-AI/molecule-core")
|
||||||
# (post-2026-05-06 migration). Tests + workflow context override
|
|
||||||
# via GITHUB_REPOSITORY which act_runner sets per-run.
|
|
||||||
repo = os.environ.get("GITHUB_REPOSITORY", "molecule-ai/molecule-core")
|
|
||||||
|
|
||||||
added = migrations_in_diff(base_ref, head_ref)
|
added = migrations_in_diff(base_ref, head_ref)
|
||||||
if not added:
|
if not added:
|
||||||
|
|||||||
@ -105,5 +105,5 @@ Hard per-workflow timeouts (15–40 min) cap runaway cost. Three teardown layers
|
|||||||
|
|
||||||
## Known gaps (tracked elsewhere)
|
## Known gaps (tracked elsewhere)
|
||||||
|
|
||||||
- [#1369](https://git.moleculesai.app/molecule-ai/molecule-core/issues/1369): SaaS canvas Files / Terminal / Peers tabs — architecturally broken; whitelisted in the spec
|
- [#1369](https://github.com/Molecule-AI/molecule-core/issues/1369): SaaS canvas Files / Terminal / Peers tabs — architecturally broken; whitelisted in the spec
|
||||||
- LLM-driven delegation (autonomous `delegate_task` tool use) — probabilistic, not in v1; proxy mechanics covered
|
- LLM-driven delegation (autonomous `delegate_task` tool use) — probabilistic, not in v1; proxy mechanics covered
|
||||||
|
|||||||
@ -1,7 +1,5 @@
|
|||||||
# Production-shape local harness
|
# Production-shape local harness
|
||||||
|
|
||||||
<!-- Retrigger Harness Replays after Class G #168 + clone-manifest fix (#42). -->
|
|
||||||
|
|
||||||
The harness brings up the SaaS tenant topology on localhost using the
|
The harness brings up the SaaS tenant topology on localhost using the
|
||||||
same `Dockerfile.tenant` image that ships to production. Tests target
|
same `Dockerfile.tenant` image that ships to production. Tests target
|
||||||
the cf-proxy on `http://localhost:8080` and pass the tenant identity
|
the cf-proxy on `http://localhost:8080` and pass the tenant identity
|
||||||
|
|||||||
@ -1,14 +0,0 @@
|
|||||||
# cf-proxy harness image — nginx + the harness's tenant-routing config baked
|
|
||||||
# in at build time.
|
|
||||||
#
|
|
||||||
# Why bake (not bind-mount): on Gitea Actions / act_runner, the runner is a
|
|
||||||
# container talking to the OUTER docker daemon over the host socket; runc
|
|
||||||
# resolves bind-mount source paths on the outer host filesystem, where the
|
|
||||||
# repo at `/workspace/.../tests/harness/cf-proxy/nginx.conf` is invisible.
|
|
||||||
# Compose `configs:` (with `file:`) falls back to bind mounts when swarm is
|
|
||||||
# not active, so it hits the same gap. A build-time COPY uploads the file
|
|
||||||
# as part of the docker build context — the daemon receives the tarball
|
|
||||||
# directly and never bind-mounts. See issue #88 item 2.
|
|
||||||
FROM nginx:1.27-alpine
|
|
||||||
|
|
||||||
COPY nginx.conf /etc/nginx/nginx.conf
|
|
||||||
@ -167,26 +167,15 @@ services:
|
|||||||
# Production shape: same single CF tunnel front-doors every tenant
|
# Production shape: same single CF tunnel front-doors every tenant
|
||||||
# subdomain — the Host header carries the tenant identity, not the
|
# subdomain — the Host header carries the tenant identity, not the
|
||||||
# routing destination. Local cf-proxy mirrors this exactly.
|
# routing destination. Local cf-proxy mirrors this exactly.
|
||||||
#
|
|
||||||
# nginx.conf delivery: built into a custom image via cf-proxy/Dockerfile
|
|
||||||
# (a thin nginx:1.27-alpine + COPY). NOT a bind mount and NOT a
|
|
||||||
# compose `configs:` block, both of which break under Gitea's
|
|
||||||
# act_runner: the runner talks to the OUTER docker daemon over the
|
|
||||||
# host socket, and runc resolves bind sources on the outer host
|
|
||||||
# filesystem, where `/workspace/.../tests/harness/cf-proxy/nginx.conf`
|
|
||||||
# is invisible. Compose `configs:` falls back to bind mounts without
|
|
||||||
# swarm, so it hits the same gap. A build context, by contrast, is
|
|
||||||
# uploaded to the daemon as a tarball at build time — no bind. See
|
|
||||||
# issue #88 item 2.
|
|
||||||
cf-proxy:
|
cf-proxy:
|
||||||
build:
|
image: nginx:1.27-alpine
|
||||||
context: ./cf-proxy
|
|
||||||
dockerfile: Dockerfile
|
|
||||||
depends_on:
|
depends_on:
|
||||||
tenant-alpha:
|
tenant-alpha:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
tenant-beta:
|
tenant-beta:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
|
volumes:
|
||||||
|
- ./cf-proxy/nginx.conf:/etc/nginx/nginx.conf:ro
|
||||||
# Bind to 127.0.0.1 only — hardcoded ADMIN_TOKENs make 0.0.0.0
|
# Bind to 127.0.0.1 only — hardcoded ADMIN_TOKENs make 0.0.0.0
|
||||||
# exposure unsafe even on a local network.
|
# exposure unsafe even on a local network.
|
||||||
ports:
|
ports:
|
||||||
|
|||||||
@ -1,252 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
# tools/branch-protection/check_name_parity.sh — assert every required-
|
|
||||||
# check name listed in apply.sh maps to a workflow job whose "always
|
|
||||||
# emits this status" shape is intact.
|
|
||||||
#
|
|
||||||
# Closes #144 / encodes the saved memory
|
|
||||||
# feedback_branch_protection_check_name_parity:
|
|
||||||
#
|
|
||||||
# "Path filters (e.g., detect-changes → conditional skip) silently
|
|
||||||
# break branch protection because no job emits the protected
|
|
||||||
# sentinel status when path-filter returns false."
|
|
||||||
#
|
|
||||||
# Two safe shapes for a required-check job:
|
|
||||||
#
|
|
||||||
# 1. Single-job-with-per-step-if (path-filter case):
|
|
||||||
# The workflow has NO top-level `paths:` filter; the always-running
|
|
||||||
# job has steps gated on `if: needs.<gate>.outputs.<flag> == 'true'`
|
|
||||||
# so the no-op step alone fires when paths exclude the commit.
|
|
||||||
# Used by ci.yml's Platform/Canvas/Python/Shellcheck and by
|
|
||||||
# e2e-api.yml / e2e-staging-canvas.yml / runtime-prbuild-compat.yml.
|
|
||||||
#
|
|
||||||
# 2. Aggregator-with-needs+always() (matrix-refactor case):
|
|
||||||
# An aggregator job named after the protected check `needs:` the
|
|
||||||
# matrix children + uses `if: always()` + checks each child's
|
|
||||||
# result. (Not currently in this repo but supported.)
|
|
||||||
#
|
|
||||||
# Unsafe shape this script catches:
|
|
||||||
# - Workflow has top-level `paths:` filter AND the protected check
|
|
||||||
# name is on a single job. When paths-filter excludes a commit, the
|
|
||||||
# workflow doesn't fire — branch protection waits forever.
|
|
||||||
#
|
|
||||||
# Exit codes:
|
|
||||||
# 0 — every required check name has at least one safe-shape match
|
|
||||||
# 1 — a required name has no match OR matches an unsafe shape
|
|
||||||
# 2 — script-internal error (apply.sh missing, awk failure, etc.)
|
|
||||||
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
||||||
REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
|
||||||
WORKFLOWS_DIR="$REPO_ROOT/.github/workflows"
|
|
||||||
APPLY_SH="$SCRIPT_DIR/apply.sh"
|
|
||||||
|
|
||||||
if [[ ! -f "$APPLY_SH" ]]; then
|
|
||||||
echo "check_name_parity: missing apply.sh at $APPLY_SH" >&2
|
|
||||||
exit 2
|
|
||||||
fi
|
|
||||||
if [[ ! -d "$WORKFLOWS_DIR" ]]; then
|
|
||||||
echo "check_name_parity: missing .github/workflows at $WORKFLOWS_DIR" >&2
|
|
||||||
exit 2
|
|
||||||
fi
|
|
||||||
|
|
||||||
# ─── Extract the union of required check names from apply.sh ──────
|
|
||||||
# apply.sh has STAGING_CHECKS and MAIN_CHECKS heredocs; union them so
|
|
||||||
# we audit any name that gates EITHER branch. Filters out blank lines
|
|
||||||
# and the heredoc end marker. Sorted + uniq so the audit output is stable.
|
|
||||||
#
|
|
||||||
# Captures the heredoc end-marker dynamically from the `<<'MARKER'`
|
|
||||||
# token on the opening line — the token can be `EOF` (production
|
|
||||||
# apply.sh), `EOF2` (test fixtures with nested heredocs), or any other
|
|
||||||
# bash-legal identifier. Without dynamic extraction, test fixtures
|
|
||||||
# with nested heredocs would either skip-capture (wrong end marker)
|
|
||||||
# or capture the inner end marker as a stray check name.
|
|
||||||
#
|
|
||||||
# Two-step approach to keep awk-portable across BSD awk (macOS) and
|
|
||||||
# gawk (Linux): grep finds the heredoc-opening lines, sed extracts the
|
|
||||||
# marker, then awk does the capture. Pure-awk attempts hit BSD-vs-GNU
|
|
||||||
# regex/variable-init differences that regress silently — this shape
|
|
||||||
# stays in POSIX-portable territory.
|
|
||||||
extract_heredoc_block() {
|
|
||||||
local file="$1"
|
|
||||||
local marker="$2"
|
|
||||||
awk -v marker="$marker" '
|
|
||||||
$0 ~ "<<.?" marker { capture=1; next }
|
|
||||||
$0 == marker && capture { capture=0; next }
|
|
||||||
capture && NF { print }
|
|
||||||
' "$file"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Find every heredoc-end marker used in apply.sh (typically just EOF
|
|
||||||
# in the production script, but EOF2 / TAG / ABC are all valid in
|
|
||||||
# fixtures or future expansions). Each marker maps to one or more
|
|
||||||
# heredoc blocks; we union all of them.
|
|
||||||
markers=$(grep -E "<<['\"]?[A-Za-z0-9_]+['\"]?[[:space:]]*\\|\\|" "$APPLY_SH" \
|
|
||||||
| sed -E "s/.*<<['\"]?([A-Za-z0-9_]+)['\"]?.*/\\1/" \
|
|
||||||
| sort -u)
|
|
||||||
|
|
||||||
required_names=""
|
|
||||||
while IFS= read -r marker; do
|
|
||||||
[[ -z "$marker" ]] && continue
|
|
||||||
block=$(extract_heredoc_block "$APPLY_SH" "$marker")
|
|
||||||
if [[ -n "$block" ]]; then
|
|
||||||
required_names+="$block"$'\n'
|
|
||||||
fi
|
|
||||||
done <<< "$markers"
|
|
||||||
|
|
||||||
required_names=$(printf '%s' "$required_names" | sort -u | sed '/^$/d')
|
|
||||||
|
|
||||||
if [[ -z "$required_names" ]]; then
|
|
||||||
echo "check_name_parity: failed to extract required check names from apply.sh" >&2
|
|
||||||
exit 2
|
|
||||||
fi
|
|
||||||
|
|
||||||
# ─── For each required name, find the workflow file that owns it ──
|
|
||||||
# A workflow "owns" a name if any `name:` line in the file equals the
|
|
||||||
# required name. We look at job-level names AND the workflow-level
|
|
||||||
# `name:` (the latter prefixes "Analyze" jobs in codeql.yml).
|
|
||||||
#
|
|
||||||
# Then we check whether the owning workflow has a top-level `paths:`
|
|
||||||
# filter. The unsafe shape is:
|
|
||||||
# - top-level paths: filter present
|
|
||||||
# - AND the named job is gated only at the workflow level (no per-
|
|
||||||
# step `if:` gates)
|
|
||||||
#
|
|
||||||
# Distinguishing "no `paths:` filter" from "paths: filter + per-step
|
|
||||||
# gating" requires parsing the YAML semantics. We do it heuristically:
|
|
||||||
#
|
|
||||||
# - "no top-level paths:" → safe by construction (workflow always
|
|
||||||
# fires)
|
|
||||||
# - "paths: present" → check that the matching job has at
|
|
||||||
# least one `if: needs.<x>.outputs`
|
|
||||||
# step gate. If yes, that's the
|
|
||||||
# single-job-with-per-step-if shape.
|
|
||||||
# If no, flag as unsafe.
|
|
||||||
#
|
|
||||||
# Heuristic so it stays a portable bash + awk + grep tool — full YAML
|
|
||||||
# parsing would need yq which isn't a dependency. The known unsafe
|
|
||||||
# shape (workflow-level paths: AND no per-step if-gates) is what we're
|
|
||||||
# trying to catch.
|
|
||||||
|
|
||||||
failed=0
|
|
||||||
declare -a unsafe_findings=()
|
|
||||||
|
|
||||||
while IFS= read -r name; do
|
|
||||||
[[ -z "$name" ]] && continue
|
|
||||||
# Find every workflow file that contains a job with `name: <name>` or
|
|
||||||
# whose top-level workflow `name:` plus matrix substitution would
|
|
||||||
# produce <name>. Need to be careful about quoting — YAML allows
|
|
||||||
# `name: Foo`, `name: "Foo"`, `name: 'Foo'`. Strip quotes.
|
|
||||||
matches=()
|
|
||||||
while IFS= read -r f; do
|
|
||||||
# Look for an exact `name:` match (anywhere in the file). The
|
|
||||||
# workflow-level name line is at column 0; job-level names are
|
|
||||||
# indented. Either is acceptable for parity — what matters is
|
|
||||||
# whether the EMITTED check-run name is the one we required.
|
|
||||||
# Strip surrounding quotes/whitespace before comparing.
|
|
||||||
if awk -v want="$name" '
|
|
||||||
/^[[:space:]]*name:[[:space:]]*/ {
|
|
||||||
line = $0
|
|
||||||
sub(/^[[:space:]]*name:[[:space:]]*/, "", line)
|
|
||||||
# Strip surrounding " or '\''
|
|
||||||
gsub(/^["\047]|["\047]$/, "", line)
|
|
||||||
# Strip trailing whitespace + comment
|
|
||||||
sub(/[[:space:]]*#.*$/, "", line)
|
|
||||||
sub(/[[:space:]]+$/, "", line)
|
|
||||||
if (line == want) found = 1
|
|
||||||
}
|
|
||||||
END { exit !found }
|
|
||||||
' "$f"; then
|
|
||||||
matches+=("$f")
|
|
||||||
fi
|
|
||||||
done < <(find "$WORKFLOWS_DIR" -name '*.yml' -o -name '*.yaml')
|
|
||||||
|
|
||||||
if [[ ${#matches[@]} -eq 0 ]]; then
|
|
||||||
# Special case — Analyze (go/javascript-typescript/python) is
|
|
||||||
# generated by codeql.yml's matrix expansion of `Analyze (${{
|
|
||||||
# matrix.language }})`. Don't flag those as missing if codeql.yml
|
|
||||||
# exists with the expected base name.
|
|
||||||
case "$name" in
|
|
||||||
"Analyze (go)"|"Analyze (javascript-typescript)"|"Analyze (python)")
|
|
||||||
# shellcheck disable=SC2016
|
|
||||||
# The literal `${{ matrix.language }}` is the GHA template
|
|
||||||
# syntax we're searching FOR — not a shell expansion. SC2016
|
|
||||||
# would have us add quotes that defeat the search.
|
|
||||||
if [[ -f "$WORKFLOWS_DIR/codeql.yml" ]] && \
|
|
||||||
grep -q 'name: Analyze (${{[[:space:]]*matrix.language[[:space:]]*}})' "$WORKFLOWS_DIR/codeql.yml"; then
|
|
||||||
matches=("$WORKFLOWS_DIR/codeql.yml")
|
|
||||||
fi
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ ${#matches[@]} -eq 0 ]]; then
|
|
||||||
unsafe_findings+=("MISSING: required check name '$name' has no matching workflow job")
|
|
||||||
failed=1
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
|
|
||||||
# For each owning workflow, classify safe vs unsafe.
|
|
||||||
for f in "${matches[@]}"; do
|
|
||||||
rel="${f#"$REPO_ROOT"/}"
|
|
||||||
# Heuristic: does the workflow have a top-level `paths:` filter?
|
|
||||||
# Top-level here means under the `on:` key, not under jobs.<x>.if.
|
|
||||||
# Workflow-level paths filters appear at indent depth 4 (under
|
|
||||||
# `push:` or `pull_request:`). Job-level `if:` paths-filter doesn't
|
|
||||||
# block the workflow from firing.
|
|
||||||
has_top_paths=0
|
|
||||||
if awk '
|
|
||||||
# Track whether we are inside the `on:` block. The `on:` block
|
|
||||||
# starts at column 0 (`on:` key) and ends when the next column-0
|
|
||||||
# key appears.
|
|
||||||
/^on:[[:space:]]*$/ { in_on = 1; next }
|
|
||||||
/^[a-zA-Z]/ && in_on { in_on = 0 }
|
|
||||||
in_on && /^[[:space:]]+paths:[[:space:]]*$/ { print "yes"; exit }
|
|
||||||
in_on && /^[[:space:]]+paths:[[:space:]]*\[/ { print "yes"; exit }
|
|
||||||
' "$f" | grep -q yes; then
|
|
||||||
has_top_paths=1
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ "$has_top_paths" -eq 0 ]]; then
|
|
||||||
# Safe: workflow always fires. If there are inner per-step if-
|
|
||||||
# gates (single-job-with-per-step-if pattern), the no-op step
|
|
||||||
# produces SUCCESS for the protected name — branch-protection-clean.
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Unsafe candidate — has top-level paths: AND we need to verify
|
|
||||||
# the per-step if-gate pattern is absent. Look for any `if:`
|
|
||||||
# referencing a paths-filter / detect-changes output inside the
|
|
||||||
# owning job's body. If at least one is present, classify as the
|
|
||||||
# single-job-with-per-step-if pattern (safe).
|
|
||||||
#
|
|
||||||
# The regex is intentionally anchored loosely — actual workflow
|
|
||||||
# YAML writes per-step if-gates as ` - if: needs.X.outputs.Y`
|
|
||||||
# (with the `-` step-marker between the leading spaces and the
|
|
||||||
# `if`). Anchoring on `^[[:space:]]+if:` would miss those.
|
|
||||||
if grep -qE "if:[[:space:]]+needs\.[a-zA-Z_-]+\.outputs\." "$f"; then
|
|
||||||
# Per-step if-gates exist. Combined with top-level paths: this
|
|
||||||
# would be a buggy mix (the workflow might still skip entirely
|
|
||||||
# when paths exclude). Flag as unsafe — the safe pattern omits
|
|
||||||
# the top-level paths: filter altogether and gates per-step.
|
|
||||||
unsafe_findings+=("UNSAFE-MIX: $rel has top-level paths: AND per-step if-gates — when paths exclude the commit, the workflow doesn't fire and the required check '$name' is silently absent. Drop the top-level paths: filter; keep the per-step if-gates.")
|
|
||||||
failed=1
|
|
||||||
else
|
|
||||||
# Top-level paths: with no per-step if-gates: the canonical
|
|
||||||
# check-name parity bug.
|
|
||||||
unsafe_findings+=("UNSAFE-PATH-FILTER: $rel has top-level paths: filter and no per-step if-gates. When paths exclude the commit, no job emits the required check '$name' — branch protection waits forever. Either drop the paths: filter and add per-step if-gates against a detect-changes output, or add an aggregator-with-needs+always() job that emits '$name'.")
|
|
||||||
failed=1
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
done <<< "$required_names"
|
|
||||||
|
|
||||||
if [[ "$failed" -eq 0 ]]; then
|
|
||||||
echo "check_name_parity: OK — every required check name maps to a safe workflow shape."
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "check_name_parity: FOUND $((${#unsafe_findings[@]})) issue(s):" >&2
|
|
||||||
for finding in "${unsafe_findings[@]}"; do
|
|
||||||
echo " - $finding" >&2
|
|
||||||
done
|
|
||||||
exit 1
|
|
||||||
@ -1,285 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
# tools/branch-protection/test_check_name_parity.sh — unit tests for
|
|
||||||
# check_name_parity.sh.
|
|
||||||
#
|
|
||||||
# Builds synthetic apply.sh + workflow files in a tmpdir for each case,
|
|
||||||
# invokes the script with REPO_ROOT pointing at the tmpdir, and asserts
|
|
||||||
# on exit code + stderr. Per feedback_assert_exact_not_substring we
|
|
||||||
# pin the EXACT exit code AND a substring of the stderr that names the
|
|
||||||
# offending workflow + name combo — so a "false-pass that prints the
|
|
||||||
# wrong message" still fails the test.
|
|
||||||
#
|
|
||||||
# Run locally: bash tools/branch-protection/test_check_name_parity.sh
|
|
||||||
# Run in CI: same — added to ci.yml's shellcheck job's "E2E bash unit
|
|
||||||
# tests" step alongside test_model_slug.sh.
|
|
||||||
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
||||||
SCRIPT_UNDER_TEST="$SCRIPT_DIR/check_name_parity.sh"
|
|
||||||
|
|
||||||
if [[ ! -x "$SCRIPT_UNDER_TEST" ]]; then
|
|
||||||
echo "test_check_name_parity: script under test missing or not executable: $SCRIPT_UNDER_TEST" >&2
|
|
||||||
exit 2
|
|
||||||
fi
|
|
||||||
|
|
||||||
PASSED=0
|
|
||||||
FAILED=0
|
|
||||||
|
|
||||||
# Tracks the active tmpdir for the running case so the trap can clean
|
|
||||||
# up even when assertions abort the case mid-flight.
|
|
||||||
TMPDIR_FOR_CASE=""
|
|
||||||
trap '[[ -n "$TMPDIR_FOR_CASE" && -d "$TMPDIR_FOR_CASE" ]] && rm -rf "$TMPDIR_FOR_CASE"' EXIT
|
|
||||||
|
|
||||||
# Build a synthetic repo at $1 with apply.sh listing $2 (one name per
|
|
||||||
# line) as the staging required set + zero main required, then write
|
|
||||||
# whatever .github/workflows/* files the test case adds.
|
|
||||||
make_fake_repo() {
|
|
||||||
local root="$1"
|
|
||||||
local checks="$2"
|
|
||||||
mkdir -p "$root/tools/branch-protection"
|
|
||||||
mkdir -p "$root/.github/workflows"
|
|
||||||
cat > "$root/tools/branch-protection/apply.sh" <<EOF
|
|
||||||
#!/usr/bin/env bash
|
|
||||||
# Stub apply.sh — only the heredoc-shaped check lists matter for the
|
|
||||||
# parity script. Other functions intentionally absent.
|
|
||||||
|
|
||||||
read -r -d '' STAGING_CHECKS <<'EOF2' || true
|
|
||||||
$checks
|
|
||||||
EOF2
|
|
||||||
|
|
||||||
read -r -d '' MAIN_CHECKS <<'EOF2' || true
|
|
||||||
$checks
|
|
||||||
EOF2
|
|
||||||
EOF
|
|
||||||
chmod +x "$root/tools/branch-protection/apply.sh"
|
|
||||||
# Place the script-under-test alongside its sibling apply.sh so the
|
|
||||||
# script's REPO_ROOT walk finds the synthetic .github/workflows/.
|
|
||||||
cp "$SCRIPT_UNDER_TEST" "$root/tools/branch-protection/check_name_parity.sh"
|
|
||||||
}
|
|
||||||
|
|
||||||
run_case() {
|
|
||||||
local desc="$1"
|
|
||||||
local checks="$2"
|
|
||||||
local workflow_yaml="$3" # contents to write
|
|
||||||
local workflow_filename="$4"
|
|
||||||
local expected_exit="$5"
|
|
||||||
local expected_stderr_substring="$6"
|
|
||||||
TMPDIR_FOR_CASE=$(mktemp -d)
|
|
||||||
make_fake_repo "$TMPDIR_FOR_CASE" "$checks"
|
|
||||||
printf '%s' "$workflow_yaml" > "$TMPDIR_FOR_CASE/.github/workflows/$workflow_filename"
|
|
||||||
local stderr_file
|
|
||||||
stderr_file=$(mktemp)
|
|
||||||
local actual_exit=0
|
|
||||||
bash "$TMPDIR_FOR_CASE/tools/branch-protection/check_name_parity.sh" 2>"$stderr_file" >/dev/null || actual_exit=$?
|
|
||||||
local stderr_content
|
|
||||||
stderr_content=$(cat "$stderr_file")
|
|
||||||
rm "$stderr_file"
|
|
||||||
if [[ "$actual_exit" -ne "$expected_exit" ]]; then
|
|
||||||
echo "FAIL: $desc"
|
|
||||||
echo " expected exit: $expected_exit, got: $actual_exit"
|
|
||||||
echo " stderr: $stderr_content"
|
|
||||||
FAILED=$((FAILED+1))
|
|
||||||
rm -rf "$TMPDIR_FOR_CASE"; TMPDIR_FOR_CASE=""
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
# Empty expected substring → no assertion on stderr (used for the
|
|
||||||
# passing case where stderr should be empty / not interesting).
|
|
||||||
if [[ -n "$expected_stderr_substring" ]]; then
|
|
||||||
if ! grep -qF "$expected_stderr_substring" <<< "$stderr_content"; then
|
|
||||||
echo "FAIL: $desc"
|
|
||||||
echo " expected stderr to contain: '$expected_stderr_substring'"
|
|
||||||
echo " actual stderr: $stderr_content"
|
|
||||||
FAILED=$((FAILED+1))
|
|
||||||
rm -rf "$TMPDIR_FOR_CASE"; TMPDIR_FOR_CASE=""
|
|
||||||
return
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
echo "PASS: $desc"
|
|
||||||
PASSED=$((PASSED+1))
|
|
||||||
rm -rf "$TMPDIR_FOR_CASE"; TMPDIR_FOR_CASE=""
|
|
||||||
}
|
|
||||||
|
|
||||||
# Case 1: safe workflow — no top-level paths: filter, single job
|
|
||||||
# emitting the required name. Should exit 0.
|
|
||||||
run_case "safe: no paths filter, job emits required name" \
|
|
||||||
"Foo Build" \
|
|
||||||
"$(cat <<'EOF'
|
|
||||||
name: Foo
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches: [main]
|
|
||||||
pull_request:
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
foo:
|
|
||||||
name: Foo Build
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- run: echo ok
|
|
||||||
EOF
|
|
||||||
)" \
|
|
||||||
"foo.yml" \
|
|
||||||
0 \
|
|
||||||
""
|
|
||||||
|
|
||||||
# Case 2: unsafe — top-level paths: filter AND no per-step if-gates.
|
|
||||||
# This is the silent-block shape from the saved memory.
|
|
||||||
run_case "unsafe: top-level paths: filter without per-step if-gates" \
|
|
||||||
"Bar Build" \
|
|
||||||
"$(cat <<'EOF'
|
|
||||||
name: Bar
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches: [main]
|
|
||||||
paths:
|
|
||||||
- 'bar/**'
|
|
||||||
pull_request:
|
|
||||||
paths:
|
|
||||||
- 'bar/**'
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
bar:
|
|
||||||
name: Bar Build
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- run: echo ok
|
|
||||||
EOF
|
|
||||||
)" \
|
|
||||||
"bar.yml" \
|
|
||||||
1 \
|
|
||||||
"UNSAFE-PATH-FILTER"
|
|
||||||
|
|
||||||
# Case 3: required name has no emitter at all.
|
|
||||||
run_case "missing: required name not in any workflow" \
|
|
||||||
"Nonexistent Job" \
|
|
||||||
"$(cat <<'EOF'
|
|
||||||
name: Other
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
other:
|
|
||||||
name: Other Job
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- run: echo ok
|
|
||||||
EOF
|
|
||||||
)" \
|
|
||||||
"other.yml" \
|
|
||||||
1 \
|
|
||||||
"MISSING: required check name 'Nonexistent Job'"
|
|
||||||
|
|
||||||
# Case 4: safe — top-level paths: filter is absent BUT per-step if-
|
|
||||||
# gates are present (single-job-with-per-step-if pattern, what
|
|
||||||
# ci.yml + e2e-api.yml use). Should exit 0.
|
|
||||||
run_case "safe: per-step if-gates without top-level paths" \
|
|
||||||
"Baz Build" \
|
|
||||||
"$(cat <<'EOF'
|
|
||||||
name: Baz
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches: [main]
|
|
||||||
pull_request:
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
changes:
|
|
||||||
name: Detect changes
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
outputs:
|
|
||||||
baz: ${{ steps.check.outputs.baz }}
|
|
||||||
steps:
|
|
||||||
- id: check
|
|
||||||
run: echo "baz=true" >> "$GITHUB_OUTPUT"
|
|
||||||
|
|
||||||
baz:
|
|
||||||
needs: changes
|
|
||||||
name: Baz Build
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- if: needs.changes.outputs.baz != 'true'
|
|
||||||
run: echo no-op
|
|
||||||
- if: needs.changes.outputs.baz == 'true'
|
|
||||||
run: echo real work
|
|
||||||
EOF
|
|
||||||
)" \
|
|
||||||
"baz.yml" \
|
|
||||||
0 \
|
|
||||||
""
|
|
||||||
|
|
||||||
# Case 5: unsafe-mix — top-level paths: AND per-step if-gates. The
|
|
||||||
# script flags this distinctly because the workflow may STILL skip
|
|
||||||
# entirely when paths exclude the commit (the per-step gates only
|
|
||||||
# matter if the workflow actually fires).
|
|
||||||
run_case "unsafe-mix: top-level paths: AND per-step if-gates" \
|
|
||||||
"Qux Build" \
|
|
||||||
"$(cat <<'EOF'
|
|
||||||
name: Qux
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches: [main]
|
|
||||||
paths:
|
|
||||||
- 'qux/**'
|
|
||||||
pull_request:
|
|
||||||
paths:
|
|
||||||
- 'qux/**'
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
changes:
|
|
||||||
name: Detect changes
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
outputs:
|
|
||||||
qux: ${{ steps.check.outputs.qux }}
|
|
||||||
steps:
|
|
||||||
- id: check
|
|
||||||
run: echo "qux=true" >> "$GITHUB_OUTPUT"
|
|
||||||
|
|
||||||
qux:
|
|
||||||
needs: changes
|
|
||||||
name: Qux Build
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- if: needs.changes.outputs.qux == 'true'
|
|
||||||
run: echo build
|
|
||||||
EOF
|
|
||||||
)" \
|
|
||||||
"qux.yml" \
|
|
||||||
1 \
|
|
||||||
"UNSAFE-MIX"
|
|
||||||
|
|
||||||
# Case 6: codeql.yml matrix — required names like "Analyze (go)" are
|
|
||||||
# generated by `Analyze (${{ matrix.language }})`. Script must
|
|
||||||
# special-case match this pattern.
|
|
||||||
run_case "matrix: codeql Analyze (go) is recognised via matrix expansion" \
|
|
||||||
"$(printf 'Analyze (go)\nAnalyze (javascript-typescript)\nAnalyze (python)')" \
|
|
||||||
"$(cat <<'EOF'
|
|
||||||
name: CodeQL
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
analyze:
|
|
||||||
name: Analyze (${{ matrix.language }})
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
language: [go, javascript-typescript, python]
|
|
||||||
steps:
|
|
||||||
- run: echo analyse
|
|
||||||
EOF
|
|
||||||
)" \
|
|
||||||
"codeql.yml" \
|
|
||||||
0 \
|
|
||||||
""
|
|
||||||
|
|
||||||
echo ""
|
|
||||||
echo "================================================"
|
|
||||||
echo "test_check_name_parity: $PASSED passed, $FAILED failed"
|
|
||||||
echo "================================================"
|
|
||||||
exit "$FAILED"
|
|
||||||
@ -1,49 +0,0 @@
|
|||||||
# air.toml — live-reload config for local docker-compose dev mode.
|
|
||||||
#
|
|
||||||
# Active when the platform service runs from workspace-server/Dockerfile.dev
|
|
||||||
# (selected via docker-compose.dev.yml overlay). In production, the regular
|
|
||||||
# Dockerfile builds a static binary; air is dev-only.
|
|
||||||
#
|
|
||||||
# Reference: https://github.com/air-verse/air
|
|
||||||
|
|
||||||
root = "."
|
|
||||||
testdata_dir = "testdata"
|
|
||||||
tmp_dir = "tmp"
|
|
||||||
|
|
||||||
[build]
|
|
||||||
# Same build invocation as Dockerfile's builder stage minus the
|
|
||||||
# CGO_ENABLED=0 toggle (CGO ok in dev for richer race detector output).
|
|
||||||
cmd = "go build -o ./tmp/server ./cmd/server"
|
|
||||||
bin = "tmp/server"
|
|
||||||
full_bin = ""
|
|
||||||
args_bin = []
|
|
||||||
# Watch every .go and .yaml file under workspace-server/.
|
|
||||||
include_ext = ["go", "yaml", "tmpl"]
|
|
||||||
# Don't watch tests, build artifacts, vendored deps, or migration .sql
|
|
||||||
# (migrations need a clean DB anyway — handled by docker-compose down/up).
|
|
||||||
exclude_dir = ["assets", "tmp", "vendor", "testdata", "node_modules"]
|
|
||||||
exclude_file = []
|
|
||||||
# _test.go and *_mock.go shouldn't trigger a rebuild — saves cycles.
|
|
||||||
exclude_regex = ["_test\\.go$", "_mock\\.go$"]
|
|
||||||
exclude_unchanged = true
|
|
||||||
follow_symlink = false
|
|
||||||
log = "build-errors.log"
|
|
||||||
# Kill running binary 1s before starting new one.
|
|
||||||
kill_delay = "1s"
|
|
||||||
send_interrupt = true
|
|
||||||
stop_on_error = true
|
|
||||||
# Debounce: wait this long after last change before triggering rebuild.
|
|
||||||
delay = 500
|
|
||||||
|
|
||||||
[log]
|
|
||||||
time = false
|
|
||||||
|
|
||||||
[color]
|
|
||||||
main = "magenta"
|
|
||||||
watcher = "cyan"
|
|
||||||
build = "yellow"
|
|
||||||
runner = "green"
|
|
||||||
|
|
||||||
[misc]
|
|
||||||
# Don't keep the tmp/ dir around between runs.
|
|
||||||
clean_on_exit = true
|
|
||||||
3
workspace-server/.gitignore
vendored
3
workspace-server/.gitignore
vendored
@ -1,5 +1,2 @@
|
|||||||
# The compiled binary, not the cmd/server package.
|
# The compiled binary, not the cmd/server package.
|
||||||
/server
|
/server
|
||||||
|
|
||||||
# air live-reload build cache (Dockerfile.dev + docker-compose.dev.yml).
|
|
||||||
/tmp/
|
|
||||||
|
|||||||
@ -1,15 +1,7 @@
|
|||||||
# Platform-only image (no canvas). Used by publish-workspace-server-image
|
# Platform-only image (no canvas). Used by publish-platform-image workflow
|
||||||
# workflow for ECR. Tenant image uses Dockerfile.tenant instead.
|
# for GHCR + Fly registry. Tenant image uses Dockerfile.tenant instead.
|
||||||
#
|
#
|
||||||
# Templates + plugins are pre-cloned by scripts/clone-manifest.sh (in CI
|
# Build context: repo root.
|
||||||
# or on the operator host) into .tenant-bundle-deps/ — same pattern as
|
|
||||||
# Dockerfile.tenant. See that file's header for the full rationale; the
|
|
||||||
# short version is that post-2026-05-06 every workspace-template-* and
|
|
||||||
# org-template-* repo on Gitea is private, so an in-image `git clone`
|
|
||||||
# has no auth path that doesn't leak the Gitea token into a layer.
|
|
||||||
#
|
|
||||||
# Build context: repo root, with `.tenant-bundle-deps/` populated by the
|
|
||||||
# workflow's "Pre-clone manifest deps" step (Task #173).
|
|
||||||
|
|
||||||
FROM golang:1.25-alpine AS builder
|
FROM golang:1.25-alpine AS builder
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
@ -34,18 +26,21 @@ RUN CGO_ENABLED=0 GOOS=linux go build \
|
|||||||
-ldflags "-X github.com/Molecule-AI/molecule-monorepo/platform/internal/buildinfo.GitSHA=${GIT_SHA}" \
|
-ldflags "-X github.com/Molecule-AI/molecule-monorepo/platform/internal/buildinfo.GitSHA=${GIT_SHA}" \
|
||||||
-o /memory-plugin ./cmd/memory-plugin-postgres
|
-o /memory-plugin ./cmd/memory-plugin-postgres
|
||||||
|
|
||||||
|
# Clone templates + plugins at build time from manifest.json
|
||||||
|
FROM alpine:3.20 AS templates
|
||||||
|
RUN apk add --no-cache git jq
|
||||||
|
COPY manifest.json /manifest.json
|
||||||
|
COPY scripts/clone-manifest.sh /scripts/clone-manifest.sh
|
||||||
|
RUN chmod +x /scripts/clone-manifest.sh && /scripts/clone-manifest.sh /manifest.json /workspace-configs-templates /org-templates /plugins
|
||||||
|
|
||||||
FROM alpine:3.20
|
FROM alpine:3.20
|
||||||
RUN apk add --no-cache ca-certificates git tzdata wget
|
RUN apk add --no-cache ca-certificates git tzdata wget
|
||||||
COPY --from=builder /platform /platform
|
COPY --from=builder /platform /platform
|
||||||
COPY --from=builder /memory-plugin /memory-plugin
|
COPY --from=builder /memory-plugin /memory-plugin
|
||||||
COPY workspace-server/migrations /migrations
|
COPY workspace-server/migrations /migrations
|
||||||
# Templates + plugins (pre-cloned by scripts/clone-manifest.sh in the
|
COPY --from=templates /workspace-configs-templates /workspace-configs-templates
|
||||||
# trusted CI / operator-host context, .git already stripped). The Gitea
|
COPY --from=templates /org-templates /org-templates
|
||||||
# token used to clone them never enters this image — same shape as
|
COPY --from=templates /plugins /plugins
|
||||||
# Dockerfile.tenant.
|
|
||||||
COPY .tenant-bundle-deps/workspace-configs-templates /workspace-configs-templates
|
|
||||||
COPY .tenant-bundle-deps/org-templates /org-templates
|
|
||||||
COPY .tenant-bundle-deps/plugins /plugins
|
|
||||||
# Non-root runtime with Docker socket access for workspace provisioning.
|
# Non-root runtime with Docker socket access for workspace provisioning.
|
||||||
RUN addgroup -g 1000 platform && adduser -u 1000 -G platform -s /bin/sh -D platform
|
RUN addgroup -g 1000 platform && adduser -u 1000 -G platform -s /bin/sh -D platform
|
||||||
EXPOSE 8080
|
EXPOSE 8080
|
||||||
|
|||||||
@ -1,44 +0,0 @@
|
|||||||
# Dockerfile.dev — local-development image with air-driven live reload.
|
|
||||||
#
|
|
||||||
# Selected by docker-compose.dev.yml (overlay over docker-compose.yml).
|
|
||||||
# Production stays on workspace-server/Dockerfile (static binary, no air).
|
|
||||||
#
|
|
||||||
# Workflow:
|
|
||||||
# 1. docker compose -f docker-compose.yml -f docker-compose.dev.yml up
|
|
||||||
# 2. Edit any .go file under workspace-server/
|
|
||||||
# 3. air detects, rebuilds, kills old binary, starts new one (~3-5s)
|
|
||||||
# 4. No `docker compose up --build` needed
|
|
||||||
#
|
|
||||||
# Templates + plugins are NOT pre-cloned here — air-mode assumes the
|
|
||||||
# developer's filesystem has the workspace-configs-templates/ + plugins/
|
|
||||||
# dirs available, mounted at runtime via docker-compose.dev.yml.
|
|
||||||
|
|
||||||
FROM golang:1.25-alpine
|
|
||||||
|
|
||||||
# air + git (for go mod) + ca-certs (for TLS) + tzdata (for time-zone DB)
|
|
||||||
# + docker-cli + docker-cli-buildx so the platform binary can shell out to
|
|
||||||
# /var/run/docker.sock (bind-mounted from host) for local-build provisioning.
|
|
||||||
# docker-cli alone is insufficient: alpine's docker-cli enables BuildKit by
|
|
||||||
# default but ships without buildx, producing
|
|
||||||
# `ERROR: BuildKit is enabled but the buildx component is missing or broken`
|
|
||||||
# on every `docker build`. docker-cli-buildx provides the buildx subcommand.
|
|
||||||
RUN apk add --no-cache git ca-certificates tzdata wget docker-cli docker-cli-buildx \
|
|
||||||
&& go install github.com/air-verse/air@latest
|
|
||||||
|
|
||||||
WORKDIR /app/workspace-server
|
|
||||||
|
|
||||||
# Pre-fetch deps so the first `air` rebuild on a fresh container is fast.
|
|
||||||
# These are bind-mount-overridden at runtime, so the COPY here is just
|
|
||||||
# to warm the module cache.
|
|
||||||
COPY workspace-server/go.mod workspace-server/go.sum ./
|
|
||||||
RUN go mod download
|
|
||||||
|
|
||||||
# Source is bind-mounted at runtime (see docker-compose.dev.yml volumes
|
|
||||||
# block) so the Dockerfile doesn't need to COPY it. air watches the
|
|
||||||
# bind-mounted dir for changes.
|
|
||||||
|
|
||||||
ENV CGO_ENABLED=0
|
|
||||||
ENV GOFLAGS="-buildvcs=false"
|
|
||||||
|
|
||||||
# Run air with the .air.toml in the bind-mounted source dir.
|
|
||||||
CMD ["air", "-c", ".air.toml"]
|
|
||||||
@ -3,34 +3,14 @@
|
|||||||
# Serves both the API (Go on :8080) and the UI (Node.js on :3000) in a
|
# Serves both the API (Go on :8080) and the UI (Node.js on :3000) in a
|
||||||
# single container. Go reverse-proxies unknown routes to canvas.
|
# single container. Go reverse-proxies unknown routes to canvas.
|
||||||
#
|
#
|
||||||
# Templates + plugins are NOT cloned at build time. They are pre-cloned
|
# Templates are cloned from standalone GitHub repos at build time so the
|
||||||
# in the trusted CI context (or operator host) by
|
# monorepo doesn't need to carry them. The repos are public; no auth.
|
||||||
# `scripts/clone-manifest.sh` into `.tenant-bundle-deps/` and COPYed in.
|
|
||||||
# The reason: post-2026-05-06, every workspace-template-* repo on Gitea
|
|
||||||
# (codex, crewai, deepagents, gemini-cli, langgraph) plus all 7
|
|
||||||
# org-template-* repos are private, so the Docker build can't `git clone`
|
|
||||||
# from inside the build context — there's no auth path that doesn't leak
|
|
||||||
# the Gitea token into an image layer. Pre-cloning keeps the token in
|
|
||||||
# the CI environment only; the resulting image carries the cloned trees
|
|
||||||
# with `.git` already stripped (see clone-manifest.sh).
|
|
||||||
#
|
#
|
||||||
# Build context: repo root, with `.tenant-bundle-deps/` populated by:
|
# Build context: repo root.
|
||||||
#
|
|
||||||
# MOLECULE_GITEA_TOKEN=<persona-PAT> scripts/clone-manifest.sh \
|
|
||||||
# manifest.json \
|
|
||||||
# .tenant-bundle-deps/workspace-configs-templates \
|
|
||||||
# .tenant-bundle-deps/org-templates \
|
|
||||||
# .tenant-bundle-deps/plugins
|
|
||||||
#
|
|
||||||
# In CI this happens in publish-workspace-server-image.yml's "Pre-clone
|
|
||||||
# manifest deps" step (uses AUTO_SYNC_TOKEN = devops-engineer persona).
|
|
||||||
# For a manual operator-host build, source the same token from
|
|
||||||
# /etc/molecule-bootstrap/agent-secrets.env first.
|
|
||||||
#
|
#
|
||||||
# docker buildx build --platform linux/amd64 \
|
# docker buildx build --platform linux/amd64 \
|
||||||
# -f workspace-server/Dockerfile.tenant \
|
# -f workspace-server/Dockerfile.tenant \
|
||||||
# -t <ECR>/molecule-ai/platform-tenant:latest \
|
# -t registry.fly.io/molecule-tenant:latest \
|
||||||
# --build-arg GIT_SHA=<sha> --build-arg NEXT_PUBLIC_PLATFORM_URL= \
|
|
||||||
# --push .
|
# --push .
|
||||||
|
|
||||||
# ── Stage 1: Go platform binary ──────────────────────────────────────
|
# ── Stage 1: Go platform binary ──────────────────────────────────────
|
||||||
@ -75,7 +55,14 @@ ENV NEXT_PUBLIC_PLATFORM_URL=$NEXT_PUBLIC_PLATFORM_URL
|
|||||||
ENV NEXT_PUBLIC_WS_URL=$NEXT_PUBLIC_WS_URL
|
ENV NEXT_PUBLIC_WS_URL=$NEXT_PUBLIC_WS_URL
|
||||||
RUN npm run build
|
RUN npm run build
|
||||||
|
|
||||||
# ── Stage 3: Runtime ──────────────────────────────────────────────────
|
# ── Stage 3: Clone templates + plugins from manifest.json ─────────────
|
||||||
|
FROM alpine:3.20 AS templates
|
||||||
|
RUN apk add --no-cache git jq
|
||||||
|
COPY manifest.json /manifest.json
|
||||||
|
COPY scripts/clone-manifest.sh /scripts/clone-manifest.sh
|
||||||
|
RUN chmod +x /scripts/clone-manifest.sh && /scripts/clone-manifest.sh /manifest.json /workspace-configs-templates /org-templates /plugins
|
||||||
|
|
||||||
|
# ── Stage 4: Runtime ──────────────────────────────────────────────────
|
||||||
FROM node:20-alpine
|
FROM node:20-alpine
|
||||||
RUN apk add --no-cache ca-certificates git tzdata openssh-client aws-cli
|
RUN apk add --no-cache ca-certificates git tzdata openssh-client aws-cli
|
||||||
|
|
||||||
@ -100,13 +87,10 @@ COPY --from=go-builder /platform /platform
|
|||||||
COPY --from=go-builder /memory-plugin /memory-plugin
|
COPY --from=go-builder /memory-plugin /memory-plugin
|
||||||
COPY workspace-server/migrations /migrations
|
COPY workspace-server/migrations /migrations
|
||||||
|
|
||||||
# Templates + plugins (pre-cloned by scripts/clone-manifest.sh in the
|
# Templates + plugins (cloned from GitHub in stage 3)
|
||||||
# trusted CI / operator-host context, .git already stripped — see
|
COPY --from=templates /workspace-configs-templates /workspace-configs-templates
|
||||||
# .tenant-bundle-deps/ in the build context). The Gitea token used to
|
COPY --from=templates /org-templates /org-templates
|
||||||
# clone them never enters this image.
|
COPY --from=templates /plugins /plugins
|
||||||
COPY .tenant-bundle-deps/workspace-configs-templates /workspace-configs-templates
|
|
||||||
COPY .tenant-bundle-deps/org-templates /org-templates
|
|
||||||
COPY .tenant-bundle-deps/plugins /plugins
|
|
||||||
|
|
||||||
# Canvas standalone
|
# Canvas standalone
|
||||||
WORKDIR /canvas
|
WORKDIR /canvas
|
||||||
|
|||||||
@ -1,89 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
import "testing"
|
|
||||||
|
|
||||||
// TestResolveBindHost pins the precedence: BIND_ADDR explicit > dev-mode
|
|
||||||
// fail-open default of 127.0.0.1 > production-shape empty (all interfaces).
|
|
||||||
//
|
|
||||||
// Mutation-test invariant: removing the IsDevModeFailOpen() branch makes
|
|
||||||
// "no_bindaddr_devmode_unset_admin" fail (returns "" instead of "127.0.0.1").
|
|
||||||
// Removing the BIND_ADDR branch makes "explicit_bindaddr_*" cases fail.
|
|
||||||
func TestResolveBindHost(t *testing.T) {
|
|
||||||
cases := []struct {
|
|
||||||
name string
|
|
||||||
bindAddr string
|
|
||||||
adminToken string
|
|
||||||
molEnv string
|
|
||||||
want string
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
name: "no_bindaddr_devmode_unset_admin",
|
|
||||||
bindAddr: "",
|
|
||||||
adminToken: "",
|
|
||||||
molEnv: "dev",
|
|
||||||
want: "127.0.0.1",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "no_bindaddr_devmode_unset_admin_full_word",
|
|
||||||
bindAddr: "",
|
|
||||||
adminToken: "",
|
|
||||||
molEnv: "development",
|
|
||||||
want: "127.0.0.1",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "no_bindaddr_admin_set_in_dev_env",
|
|
||||||
bindAddr: "",
|
|
||||||
adminToken: "secret",
|
|
||||||
molEnv: "dev",
|
|
||||||
want: "", // ADMIN_TOKEN flips IsDevModeFailOpen to false → all interfaces
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "no_bindaddr_production_env",
|
|
||||||
bindAddr: "",
|
|
||||||
adminToken: "",
|
|
||||||
molEnv: "production",
|
|
||||||
want: "", // production is not a dev value → all interfaces
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "no_bindaddr_unset_env",
|
|
||||||
bindAddr: "",
|
|
||||||
adminToken: "",
|
|
||||||
molEnv: "",
|
|
||||||
want: "", // unset MOLECULE_ENV → not dev → all interfaces
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "explicit_bindaddr_loopback_overrides_devmode",
|
|
||||||
bindAddr: "127.0.0.1",
|
|
||||||
adminToken: "",
|
|
||||||
molEnv: "dev",
|
|
||||||
want: "127.0.0.1",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "explicit_bindaddr_wildcard_overrides_devmode_default",
|
|
||||||
bindAddr: "0.0.0.0",
|
|
||||||
adminToken: "",
|
|
||||||
molEnv: "dev",
|
|
||||||
want: "0.0.0.0",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "explicit_bindaddr_in_production",
|
|
||||||
bindAddr: "10.0.5.7",
|
|
||||||
adminToken: "secret",
|
|
||||||
molEnv: "production",
|
|
||||||
want: "10.0.5.7",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, tc := range cases {
|
|
||||||
t.Run(tc.name, func(t *testing.T) {
|
|
||||||
t.Setenv("BIND_ADDR", tc.bindAddr)
|
|
||||||
t.Setenv("ADMIN_TOKEN", tc.adminToken)
|
|
||||||
t.Setenv("MOLECULE_ENV", tc.molEnv)
|
|
||||||
got := resolveBindHost()
|
|
||||||
if got != tc.want {
|
|
||||||
t.Errorf("resolveBindHost() = %q, want %q (BIND_ADDR=%q ADMIN_TOKEN=%q MOLECULE_ENV=%q)",
|
|
||||||
got, tc.want, tc.bindAddr, tc.adminToken, tc.molEnv)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -19,7 +19,6 @@ import (
|
|||||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/handlers"
|
"github.com/Molecule-AI/molecule-monorepo/platform/internal/handlers"
|
||||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/imagewatch"
|
"github.com/Molecule-AI/molecule-monorepo/platform/internal/imagewatch"
|
||||||
memwiring "github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/wiring"
|
memwiring "github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/wiring"
|
||||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/middleware"
|
|
||||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/pendinguploads"
|
"github.com/Molecule-AI/molecule-monorepo/platform/internal/pendinguploads"
|
||||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
|
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
|
||||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/registry"
|
"github.com/Molecule-AI/molecule-monorepo/platform/internal/registry"
|
||||||
@ -333,23 +332,15 @@ func main() {
|
|||||||
// Router
|
// Router
|
||||||
r := router.Setup(hub, broadcaster, prov, platformURL, configsDir, wh, channelMgr, memBundle)
|
r := router.Setup(hub, broadcaster, prov, platformURL, configsDir, wh, channelMgr, memBundle)
|
||||||
|
|
||||||
// HTTP server with graceful shutdown.
|
// HTTP server with graceful shutdown
|
||||||
//
|
|
||||||
// Bind host: in dev-mode (no ADMIN_TOKEN, MOLECULE_ENV=dev|development)
|
|
||||||
// the AdminAuth chain fails open by design; pairing that with a wildcard
|
|
||||||
// bind would expose unauth /workspaces to any same-LAN peer. Default to
|
|
||||||
// loopback when fail-open is active. Operators who need LAN exposure set
|
|
||||||
// BIND_ADDR=0.0.0.0 explicitly. Production (ADMIN_TOKEN set) is unchanged.
|
|
||||||
// See molecule-core#7.
|
|
||||||
bindHost := resolveBindHost()
|
|
||||||
srv := &http.Server{
|
srv := &http.Server{
|
||||||
Addr: fmt.Sprintf("%s:%s", bindHost, port),
|
Addr: fmt.Sprintf(":%s", port),
|
||||||
Handler: r,
|
Handler: r,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start server in goroutine
|
// Start server in goroutine
|
||||||
go func() {
|
go func() {
|
||||||
log.Printf("Platform starting on %s:%s (dev-mode-fail-open=%v)", bindHost, port, middleware.IsDevModeFailOpen())
|
log.Printf("Platform starting on :%s", port)
|
||||||
if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
|
if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
|
||||||
log.Fatalf("Server failed: %v", err)
|
log.Fatalf("Server failed: %v", err)
|
||||||
}
|
}
|
||||||
@ -384,29 +375,6 @@ func envOr(key, fallback string) string {
|
|||||||
return fallback
|
return fallback
|
||||||
}
|
}
|
||||||
|
|
||||||
// resolveBindHost picks the listener interface for the HTTP server.
|
|
||||||
//
|
|
||||||
// Precedence:
|
|
||||||
// 1. BIND_ADDR — explicit operator override (any value, including "0.0.0.0").
|
|
||||||
// 2. dev-mode fail-open active → "127.0.0.1" (loopback only).
|
|
||||||
// 3. otherwise → "" (Go binds every interface; existing prod/self-host shape).
|
|
||||||
//
|
|
||||||
// Coupling the loopback default to middleware.IsDevModeFailOpen() means the
|
|
||||||
// two safety levers — bind narrowness and auth strength — move together. A
|
|
||||||
// production deploy (ADMIN_TOKEN set) keeps binding to all interfaces because
|
|
||||||
// the auth chain is doing its job; a dev Mac (no ADMIN_TOKEN, MOLECULE_ENV=dev)
|
|
||||||
// is reachable only via loopback because the auth chain is fail-open. See
|
|
||||||
// molecule-core#7 for the original LAN exposure finding.
|
|
||||||
func resolveBindHost() string {
|
|
||||||
if v := os.Getenv("BIND_ADDR"); v != "" {
|
|
||||||
return v
|
|
||||||
}
|
|
||||||
if middleware.IsDevModeFailOpen() {
|
|
||||||
return "127.0.0.1"
|
|
||||||
}
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
func findConfigsDir() string {
|
func findConfigsDir() string {
|
||||||
candidates := []string{
|
candidates := []string{
|
||||||
"workspace-configs-templates",
|
"workspace-configs-templates",
|
||||||
|
|||||||
@ -413,56 +413,11 @@ func (h *WorkspaceHandler) proxyA2ARequest(ctx context.Context, workspaceID stri
|
|||||||
return http.StatusOK, respBody, nil
|
return http.StatusOK, respBody, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mock-runtime short-circuit. Workspaces with runtime='mock' have
|
|
||||||
// no container, no EC2, no URL — every reply is synthesised here
|
|
||||||
// from a small canned-variant pool. Built for the "200-workspace
|
|
||||||
// mock org" demo: a CEO/VPs/Managers/ICs hierarchy that renders
|
|
||||||
// at scale on the canvas without burning real LLM credits or
|
|
||||||
// provisioning 200 EC2 instances. See mock_runtime.go for the
|
|
||||||
// full rationale + reply shape contract.
|
|
||||||
//
|
|
||||||
// Position: AFTER poll-mode (mock isn't a delivery mode, it's a
|
|
||||||
// runtime; treating poll-set-on-mock as poll matches operator
|
|
||||||
// intent if anyone ever does that), BEFORE resolveAgentURL (mock
|
|
||||||
// has no URL — going through resolveAgentURL would 404 on the
|
|
||||||
// SELECT url since the row is provisioned as NULL).
|
|
||||||
if status, respBody, handled := h.handleMockA2A(ctx, workspaceID, callerID, body, a2aMethod, logActivity); handled {
|
|
||||||
return status, respBody, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
agentURL, proxyErr := h.resolveAgentURL(ctx, workspaceID)
|
agentURL, proxyErr := h.resolveAgentURL(ctx, workspaceID)
|
||||||
if proxyErr != nil {
|
if proxyErr != nil {
|
||||||
return 0, nil, proxyErr
|
return 0, nil, proxyErr
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pre-flight container-health check (#36). The dispatchA2A path below
|
|
||||||
// does Docker-DNS forwarding to `ws-<wsShort>:8000` and only catches a
|
|
||||||
// missing/dead container REACTIVELY via maybeMarkContainerDead in
|
|
||||||
// handleA2ADispatchError. That works but costs the caller a full
|
|
||||||
// network-timeout (2-30s) before the structured 503 surfaces.
|
|
||||||
//
|
|
||||||
// When we KNOW the workspace is container-backed (h.docker != nil + we
|
|
||||||
// rewrite to Docker-DNS form below), do a single proactive
|
|
||||||
// RunningContainerName lookup. If the container is genuinely missing,
|
|
||||||
// short-circuit with the same structured 503 + async restart that
|
|
||||||
// maybeMarkContainerDead would produce — but immediately, without the
|
|
||||||
// network round-trip.
|
|
||||||
//
|
|
||||||
// Three outcomes of provisioner.RunningContainerName(ctx, h.docker, id):
|
|
||||||
// ("ws-<id>", nil) → forward as today.
|
|
||||||
// ("", nil) → container is genuinely not running. Fast-503.
|
|
||||||
// ("", err) → transient daemon error. Fall through to optimistic
|
|
||||||
// forward — matches Provisioner.IsRunning's
|
|
||||||
// (true, err) "fail-soft as alive" contract.
|
|
||||||
//
|
|
||||||
// Same SSOT as findRunningContainer (#10/#12). See AST gate
|
|
||||||
// TestProxyA2A_RoutesThroughProvisionerSSOT.
|
|
||||||
if h.provisioner != nil && platformInDocker && strings.HasPrefix(agentURL, "http://"+provisioner.ContainerName(workspaceID)+":") {
|
|
||||||
if proxyErr := h.preflightContainerHealth(ctx, workspaceID); proxyErr != nil {
|
|
||||||
return 0, nil, proxyErr
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
startTime := time.Now()
|
startTime := time.Now()
|
||||||
resp, cancelFwd, err := h.dispatchA2A(ctx, workspaceID, agentURL, body, callerID)
|
resp, cancelFwd, err := h.dispatchA2A(ctx, workspaceID, agentURL, body, callerID)
|
||||||
if cancelFwd != nil {
|
if cancelFwd != nil {
|
||||||
|
|||||||
@ -198,60 +198,6 @@ func (h *WorkspaceHandler) maybeMarkContainerDead(ctx context.Context, workspace
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
// preflightContainerHealth runs a proactive Provisioner.IsRunning check
|
|
||||||
// (#36) before dispatching the a2a forward. Routed through provisioner's
|
|
||||||
// SSOT IsRunning, which itself wraps RunningContainerName — same source
|
|
||||||
// as findRunningContainer in the plugins handler (#10/#12).
|
|
||||||
//
|
|
||||||
// Returns nil when the forward should proceed:
|
|
||||||
// - container is running, OR
|
|
||||||
// - daemon errored transiently (matches IsRunning's (true, err)
|
|
||||||
// "fail-soft as alive" contract — let the optimistic forward run
|
|
||||||
// and reactive maybeMarkContainerDead catch a real failure).
|
|
||||||
//
|
|
||||||
// Returns a structured 503 + triggers the same async restart that
|
|
||||||
// maybeMarkContainerDead would produce, when:
|
|
||||||
// - container is genuinely not running (NotFound / Exited / Created…).
|
|
||||||
//
|
|
||||||
// The point of running this BEFORE the forward is to save the caller
|
|
||||||
// 2-30s of network-timeout cost when the container is missing — a common
|
|
||||||
// shape post-EC2-replace (see molecule-controlplane#20 incident
|
|
||||||
// 2026-05-07) where the reconciler hasn't respawned the agent yet.
|
|
||||||
func (h *WorkspaceHandler) preflightContainerHealth(ctx context.Context, workspaceID string) *proxyA2AError {
|
|
||||||
running, err := h.provisioner.IsRunning(ctx, workspaceID)
|
|
||||||
if err != nil {
|
|
||||||
// Transient daemon error. Provisioner.IsRunning returns (true, err)
|
|
||||||
// in this case — fall through to the optimistic forward, reactive
|
|
||||||
// maybeMarkContainerDead handles a real failure later.
|
|
||||||
log.Printf("ProxyA2A preflight: IsRunning transient error for %s: %v (proceeding with forward)", workspaceID, err)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
if running {
|
|
||||||
// Container is running — forward as today.
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
// Container is genuinely not running. Mark offline + trigger restart
|
|
||||||
// (same effect as maybeMarkContainerDead's branch), and return the
|
|
||||||
// structured 503 immediately so the caller skips the forward.
|
|
||||||
log.Printf("ProxyA2A preflight: container for %s is not running — marking offline and triggering restart (#36)", workspaceID)
|
|
||||||
if _, dbErr := db.DB.ExecContext(ctx,
|
|
||||||
`UPDATE workspaces SET status = $1, updated_at = now() WHERE id = $2 AND status NOT IN ('removed', 'provisioning')`,
|
|
||||||
models.StatusOffline, workspaceID); dbErr != nil {
|
|
||||||
log.Printf("ProxyA2A preflight: failed to mark workspace %s offline: %v", workspaceID, dbErr)
|
|
||||||
}
|
|
||||||
db.ClearWorkspaceKeys(ctx, workspaceID)
|
|
||||||
h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceOffline), workspaceID, map[string]interface{}{})
|
|
||||||
go h.RestartByID(workspaceID)
|
|
||||||
return &proxyA2AError{
|
|
||||||
Status: http.StatusServiceUnavailable,
|
|
||||||
Response: gin.H{
|
|
||||||
"error": "workspace container not running — restart triggered",
|
|
||||||
"restarting": true,
|
|
||||||
"preflight": true, // distinguishes from reactive containerDead path
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// logA2AFailure records a failed A2A attempt to activity_logs in a detached
|
// logA2AFailure records a failed A2A attempt to activity_logs in a detached
|
||||||
// goroutine (the request context may already be done by the time it runs).
|
// goroutine (the request context may already be done by the time it runs).
|
||||||
func (h *WorkspaceHandler) logA2AFailure(ctx context.Context, workspaceID, callerID string, body []byte, a2aMethod string, err error, durationMs int) {
|
func (h *WorkspaceHandler) logA2AFailure(ctx context.Context, workspaceID, callerID string, body []byte, a2aMethod string, err error, durationMs int) {
|
||||||
|
|||||||
@ -1,194 +0,0 @@
|
|||||||
package handlers
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"errors"
|
|
||||||
"go/ast"
|
|
||||||
"go/parser"
|
|
||||||
"go/token"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/DATA-DOG/go-sqlmock"
|
|
||||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
|
|
||||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
|
|
||||||
)
|
|
||||||
|
|
||||||
// preflightLocalProv is a controllable LocalProvisionerAPI stub for the
|
|
||||||
// preflight tests (#36). Other API methods panic to guard against tests
|
|
||||||
// that should be using a different stub.
|
|
||||||
type preflightLocalProv struct {
|
|
||||||
running bool
|
|
||||||
err error
|
|
||||||
calls int
|
|
||||||
calledWith []string
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *preflightLocalProv) IsRunning(_ context.Context, workspaceID string) (bool, error) {
|
|
||||||
p.calls++
|
|
||||||
p.calledWith = append(p.calledWith, workspaceID)
|
|
||||||
return p.running, p.err
|
|
||||||
}
|
|
||||||
func (p *preflightLocalProv) Start(_ context.Context, _ provisioner.WorkspaceConfig) (string, error) {
|
|
||||||
panic("preflightLocalProv: Start not implemented")
|
|
||||||
}
|
|
||||||
func (p *preflightLocalProv) Stop(_ context.Context, _ string) error {
|
|
||||||
panic("preflightLocalProv: Stop not implemented")
|
|
||||||
}
|
|
||||||
func (p *preflightLocalProv) ExecRead(_ context.Context, _, _ string) ([]byte, error) {
|
|
||||||
panic("preflightLocalProv: ExecRead not implemented")
|
|
||||||
}
|
|
||||||
func (p *preflightLocalProv) RemoveVolume(_ context.Context, _ string) error {
|
|
||||||
panic("preflightLocalProv: RemoveVolume not implemented")
|
|
||||||
}
|
|
||||||
func (p *preflightLocalProv) VolumeHasFile(_ context.Context, _, _ string) (bool, error) {
|
|
||||||
panic("preflightLocalProv: VolumeHasFile not implemented")
|
|
||||||
}
|
|
||||||
func (p *preflightLocalProv) WriteAuthTokenToVolume(_ context.Context, _, _ string) error {
|
|
||||||
panic("preflightLocalProv: WriteAuthTokenToVolume not implemented")
|
|
||||||
}
|
|
||||||
|
|
||||||
// TestPreflight_ContainerRunning_ReturnsNil — IsRunning(true,nil): forward
|
|
||||||
// proceeds. preflight returns nil → caller continues to dispatchA2A.
|
|
||||||
func TestPreflight_ContainerRunning_ReturnsNil(t *testing.T) {
|
|
||||||
_ = setupTestDB(t)
|
|
||||||
stub := &preflightLocalProv{running: true, err: nil}
|
|
||||||
h := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
|
|
||||||
h.provisioner = stub
|
|
||||||
|
|
||||||
if err := h.preflightContainerHealth(context.Background(), "ws-running-123"); err != nil {
|
|
||||||
t.Fatalf("preflight should return nil when container running, got %+v", err)
|
|
||||||
}
|
|
||||||
if stub.calls != 1 {
|
|
||||||
t.Errorf("IsRunning should be called exactly once, got %d", stub.calls)
|
|
||||||
}
|
|
||||||
if len(stub.calledWith) != 1 || stub.calledWith[0] != "ws-running-123" {
|
|
||||||
t.Errorf("IsRunning should be called with workspace id, got %v", stub.calledWith)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TestPreflight_ContainerNotRunning_StructuredFastFail — IsRunning(false,nil):
|
|
||||||
// preflight returns structured 503 with restarting=true + preflight=true, AND
|
|
||||||
// triggers the offline-flip + WORKSPACE_OFFLINE broadcast + async restart.
|
|
||||||
// This is the load-bearing case — saves the caller 2-30s of network timeout.
|
|
||||||
func TestPreflight_ContainerNotRunning_StructuredFastFail(t *testing.T) {
|
|
||||||
mock := setupTestDB(t)
|
|
||||||
_ = setupTestRedis(t)
|
|
||||||
stub := &preflightLocalProv{running: false, err: nil}
|
|
||||||
h := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
|
|
||||||
h.provisioner = stub
|
|
||||||
|
|
||||||
// Expect the offline-flip UPDATE.
|
|
||||||
mock.ExpectExec(`UPDATE workspaces SET status =`).
|
|
||||||
WithArgs(models.StatusOffline, "ws-dead-456").
|
|
||||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
|
||||||
// Broadcaster's INSERT INTO structure_events fires too — best-effort
|
|
||||||
// log entry for the WORKSPACE_OFFLINE event. Match permissively.
|
|
||||||
mock.ExpectExec(`INSERT INTO structure_events`).
|
|
||||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
|
||||||
|
|
||||||
proxyErr := h.preflightContainerHealth(context.Background(), "ws-dead-456")
|
|
||||||
if proxyErr == nil {
|
|
||||||
t.Fatal("preflight should return *proxyA2AError when container not running")
|
|
||||||
}
|
|
||||||
if proxyErr.Status != 503 {
|
|
||||||
t.Errorf("expected 503, got %d", proxyErr.Status)
|
|
||||||
}
|
|
||||||
if got := proxyErr.Response["restarting"]; got != true {
|
|
||||||
t.Errorf("response should mark restarting=true, got %v", got)
|
|
||||||
}
|
|
||||||
if got := proxyErr.Response["preflight"]; got != true {
|
|
||||||
t.Errorf("response should mark preflight=true so callers can distinguish from reactive containerDead, got %v", got)
|
|
||||||
}
|
|
||||||
if got := proxyErr.Response["error"]; got != "workspace container not running — restart triggered" {
|
|
||||||
t.Errorf("error message mismatch, got %q", got)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Note: broadcaster firing is exercised by the production path's
|
|
||||||
// h.broadcaster.RecordAndBroadcast call but not asserted here — the
|
|
||||||
// real *events.Broadcaster doesn't expose received events for inspection.
|
|
||||||
// The DB UPDATE expectation is sufficient to pin the offline-flip path.
|
|
||||||
}
|
|
||||||
|
|
||||||
// TestPreflight_TransientError_FailsSoftAsAlive — IsRunning(true,err): the
|
|
||||||
// (true, err) "fail-soft" contract — preflight returns nil so the optimistic
|
|
||||||
// forward runs; reactive maybeMarkContainerDead handles a real failure later.
|
|
||||||
// This pin is critical: a flaky daemon must NOT trigger a restart cascade.
|
|
||||||
func TestPreflight_TransientError_FailsSoftAsAlive(t *testing.T) {
|
|
||||||
_ = setupTestDB(t)
|
|
||||||
stub := &preflightLocalProv{running: true, err: errors.New("docker daemon EOF")}
|
|
||||||
h := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
|
|
||||||
h.provisioner = stub
|
|
||||||
|
|
||||||
if err := h.preflightContainerHealth(context.Background(), "ws-flaky-789"); err != nil {
|
|
||||||
t.Fatalf("preflight should return nil on transient error (fail-soft), got %+v", err)
|
|
||||||
}
|
|
||||||
// No DB UPDATE expected — sqlmock would complain about unexpected calls
|
|
||||||
// at test cleanup if the offline-flip path fired.
|
|
||||||
}
|
|
||||||
|
|
||||||
// TestProxyA2A_Preflight_RoutesThroughProvisionerSSOT — AST gate (#36 mirror
|
|
||||||
// of #12's gate). Pins the invariant that preflightContainerHealth uses the
|
|
||||||
// SSOT Provisioner.IsRunning helper, NOT a parallel docker.ContainerInspect
|
|
||||||
// of its own.
|
|
||||||
//
|
|
||||||
// Mutation invariant: if a future PR replaces h.provisioner.IsRunning with
|
|
||||||
// a direct cli.ContainerInspect call, this test fails. That's the signal to
|
|
||||||
// either (a) extend Provisioner.IsRunning's contract OR (b) document why
|
|
||||||
// this call site needs to differ. Either way, the drift gets a reviewer's
|
|
||||||
// attention instead of shipping silently.
|
|
||||||
func TestProxyA2A_Preflight_RoutesThroughProvisionerSSOT(t *testing.T) {
|
|
||||||
fset := token.NewFileSet()
|
|
||||||
file, err := parser.ParseFile(fset, "a2a_proxy_helpers.go", nil, parser.ParseComments)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("parse a2a_proxy_helpers.go: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
var fn *ast.FuncDecl
|
|
||||||
ast.Inspect(file, func(n ast.Node) bool {
|
|
||||||
f, ok := n.(*ast.FuncDecl)
|
|
||||||
if !ok || f.Name.Name != "preflightContainerHealth" {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
fn = f
|
|
||||||
return false
|
|
||||||
})
|
|
||||||
if fn == nil {
|
|
||||||
t.Fatal("preflightContainerHealth not found — was it renamed? update this gate or the SSOT routing assumption")
|
|
||||||
}
|
|
||||||
|
|
||||||
var (
|
|
||||||
callsIsRunning bool
|
|
||||||
callsContainerInspectRaw bool
|
|
||||||
callsRunningContainerNameDirect bool
|
|
||||||
)
|
|
||||||
ast.Inspect(fn.Body, func(n ast.Node) bool {
|
|
||||||
call, ok := n.(*ast.CallExpr)
|
|
||||||
if !ok {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
sel, ok := call.Fun.(*ast.SelectorExpr)
|
|
||||||
if !ok {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
switch sel.Sel.Name {
|
|
||||||
case "IsRunning":
|
|
||||||
callsIsRunning = true
|
|
||||||
case "ContainerInspect":
|
|
||||||
callsContainerInspectRaw = true
|
|
||||||
case "RunningContainerName":
|
|
||||||
// Direct RunningContainerName is also acceptable SSOT — but
|
|
||||||
// preferring IsRunning keeps the (bool, error) contract that
|
|
||||||
// already exists in the helper API surface.
|
|
||||||
callsRunningContainerNameDirect = true
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
})
|
|
||||||
|
|
||||||
if !callsIsRunning && !callsRunningContainerNameDirect {
|
|
||||||
t.Errorf("preflightContainerHealth must call provisioner.IsRunning OR provisioner.RunningContainerName for the SSOT health check — see molecule-core#36. Found neither.")
|
|
||||||
}
|
|
||||||
if callsContainerInspectRaw {
|
|
||||||
t.Errorf("preflightContainerHealth carries a direct ContainerInspect call. This is the parallel-impl drift molecule-core#36 fixed. " +
|
|
||||||
"Either route through provisioner.IsRunning OR — if a new use case truly needs a different inspect — extend the helper's contract first and update this gate to allow the specific delta.")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user