diff --git a/.gitea/workflows/harness-replays.yml b/.gitea/workflows/harness-replays.yml index a17a9c3a..c97a2153 100644 --- a/.gitea/workflows/harness-replays.yml +++ b/.gitea/workflows/harness-replays.yml @@ -269,6 +269,37 @@ jobs: if: needs.detect-changes.outputs.run == 'true' run: pip install -r tests/harness/requirements.txt + - name: Rebuild cp-stub from source (RC #11812 + RC #11815 narrow fix) + # Why this step exists: tests/harness/up.sh's \`build\` block is + # gated by \`--rebuild\` (no default rebuild), so without an + # explicit \`docker compose build\` the CI uses the cached cp-stub + # image — which was built BEFORE the new /cp/admin/orgs handler + # landed in tests/harness/cp-stub/main.go. Symptom: the + # canary-smoke-org-create-400-capture replay gets HTTP 501 from + # the catch-all (handler not implemented), even though the + # source has the handler. \`--no-cache\` ensures the rebuilt image + # pulls the current main.go from the checked-out branch. + # + # RC #11815 NARROW FIX: only cp-stub is rebuilt here. The + # tenants (tenant-alpha/tenant-beta) need SECRETS_ENCRYPTION_KEY + # which tests/harness/up.sh generates (with \`openssl rand -base64 + # 32\`) and exports — up.sh runs AFTER this pre-build step, so + # building the tenants here would fail with missing env. The + # tenants get built later by up.sh (the harness's own \`build\` + # block doesn't need --rebuild, so the cached tenant images are + # fine for the non-handler-bug surface). cp-stub is the only + # service with the stale-handler problem AND no env dep. + if: needs.detect-changes.outputs.run == 'true' + working-directory: tests/harness + env: + # docker compose validates the whole compose.yml even when only + # building cp-stub; tenant-alpha/beta require SECRETS_ENCRYPTION_KEY. + # up.sh generates the real key later. A placeholder is fine here + # because cp-stub does not use it. + SECRETS_ENCRYPTION_KEY: rebuild-cp-stub-placeholder + run: | + docker compose -f compose.yml build --no-cache cp-stub + - name: Run all replays against the harness # run-all-replays.sh: boot via up.sh → seed via seed.sh → run # every replays/*.sh → tear down via down.sh on EXIT (trap). diff --git a/tests/harness/compose.yml b/tests/harness/compose.yml index 224066f6..9b783ec0 100644 --- a/tests/harness/compose.yml +++ b/tests/harness/compose.yml @@ -48,6 +48,16 @@ services: environment: PORT: "9090" CP_STUB_PEERS_MODE: "${CP_STUB_PEERS_MODE:-}" + # #2867 address-fix: replays run on the host (./run-all-replays.sh), + # not inside a harness-net container, so the cp-stub's port 9090 + # MUST be published to the host loopback. Without this, the + # canary-smoke-org-create-400-capture replay gets "could not + # resolve host cp-stub" / HTTP 000 (the harness job 501121 + # round-7 failure). Note: the cf-proxy (line ~212) already does + # this for port 8080; the cp-stub was the only service missing + # a host-side mapping. + ports: + - "9090:9090" networks: [harness-net] healthcheck: test: ["CMD-SHELL", "wget -q -O- http://localhost:9090/healthz || exit 1"] diff --git a/tests/harness/cp-stub/main.go b/tests/harness/cp-stub/main.go index e87c3ece..86e6a4f3 100644 --- a/tests/harness/cp-stub/main.go +++ b/tests/harness/cp-stub/main.go @@ -24,6 +24,7 @@ import ( "log" "net/http" "os" + "strings" "sync/atomic" ) @@ -66,6 +67,60 @@ func main() { }) }) + // /cp/admin/orgs — POST. Mirrors the real CP's orgs.go:267-295 + + // router.go:437 validation shape: org-create requires slug, name, + // and owner_user_id. The harness's canary-smoke-org-create-400-capture + // replay (tests/harness/replays/) posts a payload missing + // owner_user_id and asserts the stub returns 400 + a parseable JSON + // body naming the missing fields. This is the harness-capture path + // for the real core#2737 staging 400-body-loss (the staging script + // eats the body under set -e + admin_call; the harness proves the + // pattern works locally). + // + // Burn-down for #2864: registering this handler un-arms the + // canary-smoke-org-create-400-capture xfail. + mux.HandleFunc("/cp/admin/orgs", func(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + writeJSON(w, 405, map[string]any{"error": "method not allowed; expected POST"}) + return + } + var payload struct { + Slug string `json:"slug"` + Name string `json:"name"` + OwnerUserID string `json:"owner_user_id"` + } + if err := json.NewDecoder(r.Body).Decode(&payload); err != nil { + writeJSON(w, 400, map[string]any{"error": "invalid JSON: " + err.Error()}) + return + } + var missing []string + if payload.Slug == "" { + missing = append(missing, "slug") + } + if payload.Name == "" { + missing = append(missing, "name") + } + if payload.OwnerUserID == "" { + missing = append(missing, "owner_user_id") + } + if len(missing) > 0 { + writeJSON(w, 400, map[string]any{ + "error": strings.Join(missing, ", ") + " are required", + "fields": missing, + }) + return + } + // If the payload is valid, return 201 (real CP behavior). The + // replay doesn't exercise this path — it specifically tests the + // 400 + body shape on a bad payload — but returning 201 keeps + // the stub honest for any future replay that wants to test + // the happy path. + writeJSON(w, 201, map[string]any{ + "ok": true, + "slug": payload.Slug, + }) + }) + // __stub/state — expose stub state (counters) so replay scripts can // assert the tenant actually reached us. Read-only. mux.HandleFunc("/__stub/state", func(w http.ResponseWriter, r *http.Request) { diff --git a/tests/harness/replays/canary-smoke-org-create-400-capture.sh b/tests/harness/replays/canary-smoke-org-create-400-capture.sh index 69dbb920..3addeb9e 100755 --- a/tests/harness/replays/canary-smoke-org-create-400-capture.sh +++ b/tests/harness/replays/canary-smoke-org-create-400-capture.sh @@ -1,24 +1,16 @@ #!/usr/bin/env bash -# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -# XFAIL — issue #2864 -# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -# This replay is currently marked xfail (expected to fail). The underlying -# issue is tracked at https://git.moleculesai.app/molecule-ai/molecule-core/issues/2864 -# Reason: cp-stub lacks /cp/admin/orgs route (404) + 400 body empty under set -e +# canary-smoke-org-create-400-capture — core#2737 staging 400-body-loss capture. # -# To un-xfail (when the underlying issue is fixed): -# 1. Remove the `exit 0` line below -# 2. Update the issue #2864 with a "fixed" comment + link to the fix PR -# 3. Verify the replay runs end-to-end with PASS in the local harness -# 4. The Harness Replays workflow will then surface the real pass signal +# Reproduces the staging SaaS smoke canary (test_staging_full_saas.sh:368-420) +# locally: POST /cp/admin/orgs with a known-bad payload (missing owner_user_id) +# and assert the response is 400 + a parseable JSON body naming the missing +# fields. The staging script's admin_call + set -e combo eats the body under +# the failure-shape path; this harness-capture proves the pattern works +# locally so the staging fix (per Researcher #101104) can mirror it. # -# Why we xfail (not skip, not fix): the underlying issues are out of scope -# for PR #2821 (which captures the canary failures) but block the green CI -# signal that the 2-genuine review needs. Tracking the work in the linked -# issue lets us burn down the xfails as separate PRs land. -# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -echo "[replay] __XFAIL__:#2864:cp-stub lacks /cp/admin/orgs route (404) + 400 body empty under set -e" -exit 0 +# Burn-down for #2864: was previously xfail'd (PR #2821 tracking issue); +# the cp-stub now implements /cp/admin/orgs (mirror of the real CP's +# orgs.go:267-295 validation shape), so this replay is re-armed. set -euo pipefail HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" @@ -73,13 +65,23 @@ echo "[replay] phase 2: POST /cp/admin/orgs with a known-bad payload (missing ow # shape. We bypass the admin_call helper and call curl directly so # we can also capture the HTTP status code (admin_call returns # nothing on non-2xx because of --fail-with-body under set -e). +# +# The cp-stub is called DIRECTLY (http://localhost:9090) — NOT through +# the cf-proxy/tenant-proxy chain. Reason: the tenant's cp-proxy +# allowlist intentionally blocks /cp/admin/* paths (security +# boundary, cp_proxy_test.go line 30: "cross-tenant admin list +# (lateral movement)") — admin operations don't traverse the +# tenant proxy in the production path either (real CP admin ops +# call the CP directly, not through the tenant's cf-proxy). This +# replay is a harness-capture of the cp-stub's 400+JSON shape; it +# is NOT a production-path E2E. The staging script (test_staging_full_saas.sh) +# exercises the production path separately. HTTP_CODE=$(curl -sS --fail-with-body --max-time 30 \ -o /tmp/canary_org_create_400_body.$$ \ -w "%{http_code}" \ - -H "Host: ${ALPHA_HOST}" \ -H "Authorization: Bearer ${ALPHA_ADMIN_TOKEN}" \ -H "Content-Type: application/json" \ - -X POST "$BASE/cp/admin/orgs" \ + -X POST "http://localhost:9090/cp/admin/orgs" \ -d "{\"slug\":\"$ORG_CREATE_400_CAPTURE_SLUG\",\"name\":\"replay-bad-org\"}" \ || true) # Reset the exit-code from the curl --fail-with-body so set -e