Merge pull request #2386 from Molecule-AI/staging

staging → main: auto-promote cdef893
This commit is contained in:
github-actions[bot] 2026-04-30 16:19:26 +00:00 committed by GitHub
commit 319f85a4b4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 574 additions and 39 deletions

View File

@ -267,6 +267,32 @@ jobs:
echo "promote_pr_num=${PR_NUM}" >> "$GITHUB_OUTPUT"
id: promote_pr
# Mint a short-lived GitHub App installation token for the dispatch
# step below. We CANNOT use `secrets.GITHUB_TOKEN` to dispatch the
# downstream publish chain — workflow runs created by GITHUB_TOKEN
# do not fire `workflow_run` triggers on completion (the
# documented "no recursion" rule —
# https://docs.github.com/en/actions/using-workflows/triggering-a-workflow#triggering-a-workflow-from-a-workflow).
#
# Symptom this caused (root-caused on 2026-04-30): publish-image
# ran successfully twice (21313dc 14:41Z, 59dec57 15:21Z) but
# canary-verify and redeploy-tenants-on-main never chained,
# because the publish run's `triggering_actor` was
# `github-actions[bot]` (i.e. GITHUB_TOKEN). A manual dispatch
# earlier in the day with the operator's PAT (d850ec7 06:52Z) did
# chain — same workflow file, only the actor differed.
#
# An App token's triggering_actor is the App user (e.g.
# `molecule-ai[bot]`), which IS allowed to fire downstream
# workflow_run cascades.
- name: Mint App token for downstream dispatch
if: steps.promote_pr.outputs.promote_pr_num != ''
id: app-token
uses: actions/create-github-app-token@1b10c78c7865c340bc4f6099eb2f838309f1e8c3 # v3.1.1
with:
app-id: ${{ secrets.MOLECULE_AI_APP_ID }}
private-key: ${{ secrets.MOLECULE_AI_APP_PRIVATE_KEY }}
- name: Wait for promote merge, then dispatch publish + redeploy (#2357)
# GITHUB_TOKEN-initiated merges suppress downstream `push` events
# (https://docs.github.com/en/actions/using-workflows/triggering-a-workflow#triggering-a-workflow-from-a-workflow).
@ -276,18 +302,20 @@ jobs:
# tenants stay on stale code (issue #2357).
#
# Workaround: poll for the merge to land, then explicitly
# `gh workflow run` publish-workspace-server-image. workflow_dispatch
# is the documented exception to the GITHUB_TOKEN suppression rule —
# dispatch DOES create a new workflow run. canary-verify chains via
# workflow_run (no branch filter) and redeploys to fleet via the
# existing chain.
# `gh workflow run` publish-workspace-server-image. The dispatch
# MUST authenticate as the molecule-ai App (App token minted
# above) — not GITHUB_TOKEN — so that the resulting publish
# run's completion event can fire the workflow_run cascade
# into canary-verify + redeploy-tenants-on-main. See the prior
# step's comment for the GITHUB_TOKEN no-recursion details.
#
# Long-term fix: switch the auto-merge call above to a GitHub App
# token (actions/create-github-app-token) and remove this polling
# tail step. Tracked in #2357.
# Long-term fix: switch the auto-merge call above to use the
# same App token, so the merge's push event fires
# publish-workspace-server-image naturally and this polling tail
# becomes unnecessary. Tracked in #2357.
if: steps.promote_pr.outputs.promote_pr_num != ''
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GH_TOKEN: ${{ steps.app-token.outputs.token }}
REPO: ${{ github.repository }}
PR_NUM: ${{ steps.promote_pr.outputs.promote_pr_num }}
run: |
@ -318,17 +346,18 @@ jobs:
exit 0
fi
# Dispatch publish on main. workflow_dispatch via GITHUB_TOKEN
# IS allowed to create new workflow runs (per the linked docs).
# Dispatch publish on main using the App token. App-initiated
# workflow_dispatch DOES propagate the workflow_run cascade,
# unlike GITHUB_TOKEN-initiated dispatch.
# publish completes → canary-verify chains via workflow_run →
# redeploy-tenants-on-main chains via workflow_run + branches:[main].
if gh workflow run publish-workspace-server-image.yml \
--repo "$REPO" --ref main 2>&1; then
echo "::notice::Dispatched publish-workspace-server-image on ref=main — canary-verify and redeploy-tenants-on-main will chain via workflow_run."
echo "::notice::Dispatched publish-workspace-server-image on ref=main as molecule-ai App — canary-verify and redeploy-tenants-on-main will chain via workflow_run."
{
echo "## 🚀 Tenant redeploy chain dispatched"
echo
echo "- publish-workspace-server-image (workflow_dispatch on \`main\`)"
echo "- publish-workspace-server-image (workflow_dispatch on \`main\`, actor: \`molecule-ai[bot]\`)"
echo "- canary-verify will chain on completion"
echo "- redeploy-tenants-on-main will chain on canary green"
} >> "$GITHUB_STEP_SUMMARY"

View File

@ -0,0 +1,227 @@
package db_test
// Static drift gate: every workspaces.status literal written in the Go
// tree must exist in the workspace_status enum defined by the migrations.
//
// Why this exists: the `workspace_status` enum (migrations 043 + 046)
// shipped without 'awaiting_agent' even though application code wrote
// that value, and every UPDATE silently failed in production for five
// days before the gap surfaced (see 046_workspace_status_awaiting_agent.up.sql).
// The unit tests passed because sqlmock matches SQL by regex, not against
// a live enum constraint.
//
// Approach: extract every Go string literal whose body matches
// (?i)workspaces[^a-z_].*status (so "UPDATE workspaces SET status",
// "FROM workspaces WHERE ... status", "INSERT INTO workspaces ... status",
// CTEs that reference workspaces, etc.). For each such SQL fragment,
// pull the single-quoted status values out of `status =`, `status IN`,
// `THEN`, and `ELSE`. Every value must be in the union of CREATE TYPE +
// ALTER TYPE ADD VALUE across all migrations.
import (
"os"
"path/filepath"
"regexp"
"sort"
"strings"
"testing"
)
func TestWorkspaceStatusEnum_NoLiteralDrift(t *testing.T) {
t.Parallel()
repoRoot := findRepoRoot(t)
migrationsDir := filepath.Join(repoRoot, "workspace-server", "migrations")
internalDir := filepath.Join(repoRoot, "workspace-server", "internal")
enum := loadWorkspaceStatusEnum(t, migrationsDir)
if len(enum) == 0 {
t.Fatalf("could not parse workspace_status enum from %s — gate is non-functional", migrationsDir)
}
literals := collectWorkspacesStatusLiterals(t, internalDir)
if len(literals) == 0 {
t.Fatalf("found zero workspaces.status literals under %s — gate is non-functional", internalDir)
}
var rogue []string
for lit := range literals {
if _, ok := enum[lit]; ok {
continue
}
rogue = append(rogue, lit)
}
if len(rogue) > 0 {
sort.Strings(rogue)
t.Errorf(
"workspaces.status literal(s) %v are written by Go code but not in the workspace_status enum.\n"+
"Add a migration `ALTER TYPE workspace_status ADD VALUE 'X';` (see 046 for shape).\n"+
"Enum currently is: %v",
rogue, sortedKeys(enum),
)
}
}
// loadWorkspaceStatusEnum scans every *.up.sql file for either:
//
// CREATE TYPE workspace_status AS ENUM ('a', 'b', ...)
// ALTER TYPE workspace_status ADD VALUE [IF NOT EXISTS] 'X' [BEFORE|AFTER 'Y']
//
// and returns the union of every value the enum will hold after all
// migrations apply.
func loadWorkspaceStatusEnum(t *testing.T, migrationsDir string) map[string]struct{} {
t.Helper()
out := make(map[string]struct{})
files, err := filepath.Glob(filepath.Join(migrationsDir, "*.up.sql"))
if err != nil {
t.Fatalf("glob migrations: %v", err)
}
sort.Strings(files)
createRE := regexp.MustCompile(`(?is)CREATE\s+TYPE\s+workspace_status\s+AS\s+ENUM\s*\(([^)]+)\)`)
addValueRE := regexp.MustCompile(`(?i)ALTER\s+TYPE\s+workspace_status\s+ADD\s+VALUE(?:\s+IF\s+NOT\s+EXISTS)?\s+'([^']+)'`)
literalRE := regexp.MustCompile(`'([^']+)'`)
for _, f := range files {
body, err := os.ReadFile(f)
if err != nil {
t.Fatalf("read %s: %v", f, err)
}
for _, m := range createRE.FindAllStringSubmatch(string(body), -1) {
for _, lit := range literalRE.FindAllStringSubmatch(m[1], -1) {
out[lit[1]] = struct{}{}
}
}
for _, m := range addValueRE.FindAllStringSubmatch(string(body), -1) {
out[m[1]] = struct{}{}
}
}
return out
}
// collectWorkspacesStatusLiterals walks every non-test .go file under
// root, finds Go string literals that contain `UPDATE workspaces` or
// `INSERT INTO workspaces`, and extracts the status literals appearing
// inside the matching SQL statement.
//
// Why this scope: any UPDATE/INSERT against `workspaces` is the moment
// a status literal hits the column constrained by the enum. Read-side
// SQL (SELECT ... WHERE status = 'X') cannot fail on enum drift, so it's
// out of scope. JOINs to `workspaces` from other tables (e.g. approvals
// joining workspaces for display) write to a different table's status —
// also out of scope. Anchoring on the leading `UPDATE workspaces` /
// `INSERT INTO workspaces` keyword unambiguously identifies the writes
// we care about.
func collectWorkspacesStatusLiterals(t *testing.T, root string) map[string]struct{} {
t.Helper()
// Match raw-string and double-quoted Go string literals. Backtick
// strings can span multiple lines. Both forms are extracted via the
// same DOTALL regex over the whole file body.
rawRE := regexp.MustCompile("(?s)`([^`]*?)`")
dquoteRE := regexp.MustCompile(`"((?:[^"\\]|\\.)*)"`)
// A SQL string is in scope if it begins (after optional leading
// whitespace) with UPDATE workspaces or INSERT INTO workspaces.
// `(?i)` is case-insensitive; `\s*` allows the format-friendly
// leading newline and indent that the codebase uses.
updateWorkspacesRE := regexp.MustCompile(`(?is)^\s*UPDATE\s+workspaces\b`)
insertWorkspacesRE := regexp.MustCompile(`(?is)^\s*INSERT\s+INTO\s+workspaces\b`)
// Inside a scoped SQL fragment, status literals appear in:
// status = 'X' — assignment in SET (or filter in WHERE)
// status IN ('X', ...) — filter
// status NOT IN ('X') — filter
// THEN 'X' — CASE arm
// ELSE 'X' — CASE default
statusEqRE := regexp.MustCompile(`(?i)status\s*(?:=|!=|<>)\s*'([a-z_]+)'`)
statusInRE := regexp.MustCompile(`(?i)status\s+(?:NOT\s+)?IN\s*\(([^)]*)\)`)
thenRE := regexp.MustCompile(`(?i)THEN\s+'([a-z_]+)'`)
elseRE := regexp.MustCompile(`(?i)ELSE\s+'([a-z_]+)'`)
inListLiteralRE := regexp.MustCompile(`'([a-z_]+)'`)
out := make(map[string]struct{})
walkErr := filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() {
return nil
}
if !strings.HasSuffix(path, ".go") {
return nil
}
if strings.HasSuffix(path, "_test.go") {
return nil
}
body, err := os.ReadFile(path)
if err != nil {
return err
}
text := string(body)
harvest := func(fragment string) {
if !updateWorkspacesRE.MatchString(fragment) && !insertWorkspacesRE.MatchString(fragment) {
return
}
for _, m := range statusEqRE.FindAllStringSubmatch(fragment, -1) {
out[m[1]] = struct{}{}
}
for _, m := range statusInRE.FindAllStringSubmatch(fragment, -1) {
for _, lit := range inListLiteralRE.FindAllStringSubmatch(m[1], -1) {
out[lit[1]] = struct{}{}
}
}
for _, m := range thenRE.FindAllStringSubmatch(fragment, -1) {
out[m[1]] = struct{}{}
}
for _, m := range elseRE.FindAllStringSubmatch(fragment, -1) {
out[m[1]] = struct{}{}
}
}
for _, m := range rawRE.FindAllStringSubmatch(text, -1) {
harvest(m[1])
}
for _, m := range dquoteRE.FindAllStringSubmatch(text, -1) {
harvest(m[1])
}
return nil
})
if walkErr != nil {
t.Fatalf("walk %s: %v", root, walkErr)
}
return out
}
func findRepoRoot(t *testing.T) string {
t.Helper()
dir, err := os.Getwd()
if err != nil {
t.Fatalf("getwd: %v", err)
}
for i := 0; i < 8; i++ {
if _, err := os.Stat(filepath.Join(dir, "workspace-server", "migrations")); err == nil {
return dir
}
parent := filepath.Dir(dir)
if parent == dir {
break
}
dir = parent
}
t.Fatalf("could not locate repo root with workspace-server/migrations from %s", dir)
return ""
}
func sortedKeys(m map[string]struct{}) []string {
out := make([]string, 0, len(m))
for k := range m {
out = append(out, k)
}
sort.Strings(out)
return out
}

View File

@ -259,24 +259,7 @@ func (h *ChatFilesHandler) Upload(c *gin.Context) {
req.ContentLength = c.Request.ContentLength
}
resp, err := h.httpClient.Do(req)
if err != nil {
log.Printf("chat_files Upload: forward to %s failed: %v", forwardURL, err)
c.JSON(http.StatusBadGateway, gin.H{"error": "workspace unreachable"})
return
}
defer resp.Body.Close()
// Stream response back. Copy headers we know are safe + the body.
if ct := resp.Header.Get("Content-Type"); ct != "" {
c.Header("Content-Type", ct)
}
c.Status(resp.StatusCode)
if _, err := io.Copy(c.Writer, resp.Body); err != nil {
// Mid-stream failure — too late to write a JSON error, just
// log so ops can correlate with the workspace's logs.
log.Printf("chat_files Upload: stream response back failed for %s: %v", workspaceID, err)
}
h.streamWorkspaceResponse(c, "upload", workspaceID, forwardURL, req, []string{"Content-Type"})
}
// Download handles GET /workspaces/:id/chat/download?path=<abs path>.
@ -351,27 +334,42 @@ func (h *ChatFilesHandler) Download(c *gin.Context) {
}
req.Header.Set("Authorization", "Bearer "+secret)
h.streamWorkspaceResponse(c, "download", workspaceID, forwardURL, req,
[]string{"Content-Type", "Content-Length", "Content-Disposition"})
}
// streamWorkspaceResponse executes the prepared forward request and
// streams the workspace's response back to the inbound caller.
// Forwards the named response headers verbatim. Centralizes the
// "do request → check err → defer close → copy headers → set status →
// io.Copy" tail that's identical between Upload and Download.
//
// op is the human-readable feature label ("upload"/"download") used
// in log messages so operators can distinguish which feature ran.
func (h *ChatFilesHandler) streamWorkspaceResponse(
c *gin.Context,
op, workspaceID, forwardURL string,
req *http.Request,
forwardHeaders []string,
) {
resp, err := h.httpClient.Do(req)
if err != nil {
log.Printf("chat_files Download: forward to %s failed: %v", forwardURL, err)
log.Printf("chat_files %s: forward to %s failed: %v", op, forwardURL, err)
c.JSON(http.StatusBadGateway, gin.H{"error": "workspace unreachable"})
return
}
defer resp.Body.Close()
// Stream response back, including the workspace's headers so the
// client gets the correct Content-Type + Content-Disposition (the
// workspace constructs them from the actual file's extension +
// basename — keeping that logic on the workspace side avoids a
// double-source-of-truth on filename encoding rules).
for _, hdr := range []string{"Content-Type", "Content-Length", "Content-Disposition"} {
for _, hdr := range forwardHeaders {
if v := resp.Header.Get(hdr); v != "" {
c.Header(hdr, v)
}
}
c.Status(resp.StatusCode)
if _, err := io.Copy(c.Writer, resp.Body); err != nil {
log.Printf("chat_files Download: stream response back failed for %s: %v", workspaceID, err)
// Mid-stream failure — too late to write a JSON error, just
// log so ops can correlate with the workspace's logs.
log.Printf("chat_files %s: stream response back failed for %s: %v", op, workspaceID, err)
}
}

View File

@ -0,0 +1,53 @@
-- 046_workspace_status_awaiting_agent.down.sql
--
-- Reverse 046_workspace_status_awaiting_agent.up.sql.
--
-- Postgres does NOT support DROP VALUE on an enum. The standard rollback
-- recipe is rename → recreate → cast → drop, which is intrusive (locks
-- workspaces with ACCESS EXCLUSIVE the same way migration 043 did). We
-- punt: only run this manually, and only if you're prepared to nuke and
-- recreate the type. Application code WILL fail if the value disappears
-- and any row currently has it; pre-flight with:
--
-- UPDATE workspaces SET status = 'offline'
-- WHERE status = 'awaiting_agent';
--
-- before running the recipe below.
BEGIN;
SET LOCAL lock_timeout = '5s';
-- Convert any existing awaiting_agent / hibernating rows to a value the
-- new enum will accept. 'offline' is the safest fallback for awaiting_agent
-- (operator can re-register to bring them back online); 'hibernated' is
-- the natural terminal of an in-flight 'hibernating'.
UPDATE workspaces SET status = 'offline' WHERE status = 'awaiting_agent';
UPDATE workspaces SET status = 'hibernated' WHERE status = 'hibernating';
ALTER TYPE workspace_status RENAME TO workspace_status_with_awaiting;
CREATE TYPE workspace_status AS ENUM (
'provisioning',
'online',
'offline',
'degraded',
'failed',
'removed',
'paused',
'hibernated'
);
ALTER TABLE workspaces
ALTER COLUMN status DROP DEFAULT;
ALTER TABLE workspaces
ALTER COLUMN status TYPE workspace_status
USING status::text::workspace_status;
ALTER TABLE workspaces
ALTER COLUMN status SET DEFAULT 'provisioning'::workspace_status;
DROP TYPE workspace_status_with_awaiting;
COMMIT;

View File

@ -0,0 +1,60 @@
-- 046_workspace_status_awaiting_agent.up.sql
--
-- Add the missing 'awaiting_agent' and 'hibernating' values to the
-- workspace_status enum.
--
-- Migration 043 (2026-04-25) introduced the workspace_status enum but
-- omitted two values that application code had already been writing:
--
-- 'awaiting_agent' — handlers/workspace.go:333 (since 2026-04-24,
-- commit 1e8b5e01, "first-class BYO-compute"):
-- external workspaces created without a URL are
-- meant to park here until the agent registers.
-- 'hibernating' — handlers/workspace_restart.go:271-272 (since
-- 29_workspace_hibernation): intermediate state
-- between 'online' and the final 'hibernated';
-- acts as a DB-level claim while provisioner.Stop
-- runs.
--
-- Every UPDATE that tried to write either value failed with
-- `invalid input value for enum workspace_status: "..."`. The
-- consequences were silent because both call sites either dropped or
-- log-and-continued the error:
--
-- - workspace.go:333 discards the Exec result entirely. Canvas shows
-- `awaiting_agent` in the response, but the row stays in
-- 'provisioning' until first /registry/register.
-- - workspace_restart.go:277 logs and returns; hibernation simply
-- never happens. Idle workspaces consumed resources indefinitely
-- for five days before this gate flagged it.
--
-- 2026-04-30 PR #2382 ("default external runtime to poll-mode +
-- awaiting_agent") added two more silent-fail sites in
-- registry/liveness.go and registry/healthsweep.go. Both log + continue.
-- Result: liveness expiry no longer transitions runtime='external'
-- rows, and the heartbeat-staleness sweep is a no-op for them.
-- UI/canvas shows external workspaces stuck on 'online' or 'degraded'
-- indefinitely after the agent disconnects.
--
-- Tests across all four sites used sqlmock, which matches SQL by regex
-- but does not validate against the live enum constraint, so unit
-- tests passed despite the prod-only failures. See
-- feedback_mock_at_drifting_layer in operator memory.
--
-- This migration adds both enum values so all six call sites start
-- succeeding. IF NOT EXISTS makes the migration idempotent across
-- re-runs (RunMigrations in postgres.go re-applies migrations until
-- schema_migrations records them; a future operator-driven re-run is
-- harmless).
--
-- ALTER TYPE ADD VALUE is committed immediately by Postgres regardless
-- of transaction wrapping, and does NOT take a heavy lock on workspaces.
-- Safe to run during normal traffic.
--
-- A regression gate lives at internal/db/workspace_status_enum_drift_test.go:
-- it parses every UPDATE/INSERT against `workspaces` in the Go tree
-- and asserts every status literal is in the enum. That test would
-- have caught both omissions on the day they shipped.
ALTER TYPE workspace_status ADD VALUE IF NOT EXISTS 'awaiting_agent';
ALTER TYPE workspace_status ADD VALUE IF NOT EXISTS 'hibernating';

View File

@ -0,0 +1,54 @@
{
"functions": [
{
"is_abstract": false,
"is_async": false,
"name": "auth_headers",
"parameters": [],
"return_annotation": "dict[str, str]"
},
{
"is_abstract": false,
"is_async": false,
"name": "get_token",
"parameters": [],
"return_annotation": "str | None"
},
{
"is_abstract": false,
"is_async": false,
"name": "refresh_cache",
"parameters": [],
"return_annotation": "str | None"
},
{
"is_abstract": false,
"is_async": false,
"name": "save_token",
"parameters": [
{
"annotation": "str",
"has_default": false,
"kind": "POSITIONAL_OR_KEYWORD",
"name": "token"
}
],
"return_annotation": "None"
},
{
"is_abstract": false,
"is_async": false,
"name": "self_source_headers",
"parameters": [
{
"annotation": "str",
"has_default": false,
"kind": "POSITIONAL_OR_KEYWORD",
"name": "workspace_id"
}
],
"return_annotation": "dict[str, str]"
}
],
"module": "platform_auth"
}

View File

@ -0,0 +1,114 @@
"""platform_auth public-API signature snapshot — drift gate.
``platform_auth`` is the workspace's auth-token store. Every outbound
HTTP from the runtime heartbeat, registry/register, A2A delegation,
memory tool calls, chat uploads, temporal_workflow, molecule_ai_status
pulls credentials through one of these five module-level functions.
A grep of ``from platform_auth import`` across workspace/ shows it's
imported by 14+ files in the runtime hot path:
- main.py (boot + token issuance)
- heartbeat.py (every heartbeat loop fire)
- a2a_client.py (every A2A peer call)
- a2a_tools.py (delegate_task_async)
- consolidation.py
- events.py (canvas push)
- executor_helpers.py (3 sites)
- molecule_ai_status.py
- builtin_tools/memory.py (3 sites)
- builtin_tools/temporal_workflow.py (2 sites)
Renaming any of the five (e.g. ``auth_headers`` ``bearer_headers``)
would make every one of those imports raise ``ImportError`` at boot
the workspace fails to start with a confusing trace deep in
heartbeat init, not at the rename site.
Same drift class as the BaseAdapter signature snapshot (#2378, #2380),
skill_loader gate (#2381), and runtime_wedge gate (#2383). The
shared ``_signature_snapshot.py`` helpers do the heavy lifting; this
file just declares which functions are part of the contract.
"""
import sys
from pathlib import Path
import pytest
WORKSPACE_DIR = Path(__file__).parent.parent
if str(WORKSPACE_DIR) not in sys.path:
sys.path.insert(0, str(WORKSPACE_DIR))
from tests._signature_snapshot import ( # noqa: E402
build_module_functions_record,
compare_against_snapshot,
)
SNAPSHOT_PATH = Path(__file__).parent / "snapshots" / "platform_auth_signature.json"
def _build_full_snapshot() -> dict:
"""Pin only the five contract functions runtime + adapters call.
``clear_cache`` is intentionally NOT in the snapshot it's a
test-only helper. Callers in production code MUST NOT depend on it.
"""
import platform_auth
return build_module_functions_record(
platform_auth,
function_names=[
"auth_headers",
"self_source_headers",
"get_token",
"save_token",
"refresh_cache",
],
)
def test_platform_auth_signature_matches_snapshot():
compare_against_snapshot(_build_full_snapshot(), SNAPSHOT_PATH)
def test_snapshot_has_required_functions():
"""Defense-in-depth: even if both source and snapshot are updated
together, removing any of the five contract functions requires
explicit edit here. The required set is the documented public
contract every workspace runtime import path depends on these.
"""
if not SNAPSHOT_PATH.exists():
pytest.skip(f"{SNAPSHOT_PATH.name} not generated yet")
import json
snapshot = json.loads(SNAPSHOT_PATH.read_text())
fn_names = {f["name"] for f in snapshot["functions"]}
required = {
# Every outbound httpx call merges this into headers
"auth_headers",
# A2A peer + self-message paths add X-Workspace-ID via this
"self_source_headers",
# main.py reads this on boot to decide register-vs-resume
"get_token",
# main.py persists the platform-issued token via this
"save_token",
# 401-retry path drops the in-process cache via this (#1877)
"refresh_cache",
}
missing = required - fn_names
if missing:
pytest.fail(
f"platform_auth snapshot is missing required functions: {sorted(missing)}.\n"
"Either restore them on platform_auth.py, OR coordinate runtime "
"module + adapter updates AND remove the entry from `required` in "
"this test with a justification."
)
for fn in snapshot["functions"]:
if fn.get("missing"):
pytest.fail(
f"platform_auth.{fn['name']} resolved as a non-function — "
"either it was replaced by a different kind of attribute "
"(class? module-level alias?) which existing direct calls "
"would break, OR it was removed entirely."
)