fix(harness#2864 burn-down): cp-stub implements /cp/admin/orgs; un-xfail canary-smoke-org-create-400-capture #2867

Merged
devops-engineer merged 6 commits from fix/2864-burn-down-org-create-400-capture into main 2026-06-14 18:39:54 +00:00
4 changed files with 118 additions and 20 deletions
+31
View File
@@ -269,6 +269,37 @@ jobs:
if: needs.detect-changes.outputs.run == 'true'
run: pip install -r tests/harness/requirements.txt
- name: Rebuild cp-stub from source (RC #11812 + RC #11815 narrow fix)
# Why this step exists: tests/harness/up.sh's \`build\` block is
# gated by \`--rebuild\` (no default rebuild), so without an
# explicit \`docker compose build\` the CI uses the cached cp-stub
# image — which was built BEFORE the new /cp/admin/orgs handler
# landed in tests/harness/cp-stub/main.go. Symptom: the
# canary-smoke-org-create-400-capture replay gets HTTP 501 from
# the catch-all (handler not implemented), even though the
# source has the handler. \`--no-cache\` ensures the rebuilt image
# pulls the current main.go from the checked-out branch.
#
# RC #11815 NARROW FIX: only cp-stub is rebuilt here. The
# tenants (tenant-alpha/tenant-beta) need SECRETS_ENCRYPTION_KEY
# which tests/harness/up.sh generates (with \`openssl rand -base64
# 32\`) and exports — up.sh runs AFTER this pre-build step, so
# building the tenants here would fail with missing env. The
# tenants get built later by up.sh (the harness's own \`build\`
# block doesn't need --rebuild, so the cached tenant images are
# fine for the non-handler-bug surface). cp-stub is the only
# service with the stale-handler problem AND no env dep.
if: needs.detect-changes.outputs.run == 'true'
working-directory: tests/harness
env:
# docker compose validates the whole compose.yml even when only
# building cp-stub; tenant-alpha/beta require SECRETS_ENCRYPTION_KEY.
# up.sh generates the real key later. A placeholder is fine here
# because cp-stub does not use it.
SECRETS_ENCRYPTION_KEY: rebuild-cp-stub-placeholder
run: |
docker compose -f compose.yml build --no-cache cp-stub
- name: Run all replays against the harness
# run-all-replays.sh: boot via up.sh → seed via seed.sh → run
# every replays/*.sh → tear down via down.sh on EXIT (trap).
+10
View File
@@ -48,6 +48,16 @@ services:
environment:
PORT: "9090"
CP_STUB_PEERS_MODE: "${CP_STUB_PEERS_MODE:-}"
# #2867 address-fix: replays run on the host (./run-all-replays.sh),
# not inside a harness-net container, so the cp-stub's port 9090
# MUST be published to the host loopback. Without this, the
# canary-smoke-org-create-400-capture replay gets "could not
# resolve host cp-stub" / HTTP 000 (the harness job 501121
# round-7 failure). Note: the cf-proxy (line ~212) already does
# this for port 8080; the cp-stub was the only service missing
# a host-side mapping.
ports:
- "9090:9090"
networks: [harness-net]
healthcheck:
test: ["CMD-SHELL", "wget -q -O- http://localhost:9090/healthz || exit 1"]
+55
View File
@@ -24,6 +24,7 @@ import (
"log"
"net/http"
"os"
"strings"
"sync/atomic"
)
@@ -66,6 +67,60 @@ func main() {
})
})
// /cp/admin/orgs — POST. Mirrors the real CP's orgs.go:267-295 +
// router.go:437 validation shape: org-create requires slug, name,
// and owner_user_id. The harness's canary-smoke-org-create-400-capture
// replay (tests/harness/replays/) posts a payload missing
// owner_user_id and asserts the stub returns 400 + a parseable JSON
// body naming the missing fields. This is the harness-capture path
// for the real core#2737 staging 400-body-loss (the staging script
// eats the body under set -e + admin_call; the harness proves the
// pattern works locally).
//
// Burn-down for #2864: registering this handler un-arms the
// canary-smoke-org-create-400-capture xfail.
mux.HandleFunc("/cp/admin/orgs", func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
writeJSON(w, 405, map[string]any{"error": "method not allowed; expected POST"})
return
}
var payload struct {
Slug string `json:"slug"`
Name string `json:"name"`
OwnerUserID string `json:"owner_user_id"`
}
if err := json.NewDecoder(r.Body).Decode(&payload); err != nil {
writeJSON(w, 400, map[string]any{"error": "invalid JSON: " + err.Error()})
return
}
var missing []string
if payload.Slug == "" {
missing = append(missing, "slug")
}
if payload.Name == "" {
missing = append(missing, "name")
}
if payload.OwnerUserID == "" {
missing = append(missing, "owner_user_id")
}
if len(missing) > 0 {
writeJSON(w, 400, map[string]any{
"error": strings.Join(missing, ", ") + " are required",
"fields": missing,
})
return
}
// If the payload is valid, return 201 (real CP behavior). The
// replay doesn't exercise this path — it specifically tests the
// 400 + body shape on a bad payload — but returning 201 keeps
// the stub honest for any future replay that wants to test
// the happy path.
writeJSON(w, 201, map[string]any{
"ok": true,
"slug": payload.Slug,
})
})
// __stub/state — expose stub state (counters) so replay scripts can
// assert the tenant actually reached us. Read-only.
mux.HandleFunc("/__stub/state", func(w http.ResponseWriter, r *http.Request) {
@@ -1,24 +1,16 @@
#!/usr/bin/env bash
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# XFAIL — issue #2864
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# This replay is currently marked xfail (expected to fail). The underlying
# issue is tracked at https://git.moleculesai.app/molecule-ai/molecule-core/issues/2864
# Reason: cp-stub lacks /cp/admin/orgs route (404) + 400 body empty under set -e
# canary-smoke-org-create-400-capture — core#2737 staging 400-body-loss capture.
#
# To un-xfail (when the underlying issue is fixed):
# 1. Remove the `exit 0` line below
# 2. Update the issue #2864 with a "fixed" comment + link to the fix PR
# 3. Verify the replay runs end-to-end with PASS in the local harness
# 4. The Harness Replays workflow will then surface the real pass signal
# Reproduces the staging SaaS smoke canary (test_staging_full_saas.sh:368-420)
# locally: POST /cp/admin/orgs with a known-bad payload (missing owner_user_id)
# and assert the response is 400 + a parseable JSON body naming the missing
# fields. The staging script's admin_call + set -e combo eats the body under
# the failure-shape path; this harness-capture proves the pattern works
# locally so the staging fix (per Researcher #101104) can mirror it.
#
# Why we xfail (not skip, not fix): the underlying issues are out of scope
# for PR #2821 (which captures the canary failures) but block the green CI
# signal that the 2-genuine review needs. Tracking the work in the linked
# issue lets us burn down the xfails as separate PRs land.
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
echo "[replay] __XFAIL__:#2864:cp-stub lacks /cp/admin/orgs route (404) + 400 body empty under set -e"
exit 0
# Burn-down for #2864: was previously xfail'd (PR #2821 tracking issue);
# the cp-stub now implements /cp/admin/orgs (mirror of the real CP's
# orgs.go:267-295 validation shape), so this replay is re-armed.
set -euo pipefail
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
@@ -73,13 +65,23 @@ echo "[replay] phase 2: POST /cp/admin/orgs with a known-bad payload (missing ow
# shape. We bypass the admin_call helper and call curl directly so
# we can also capture the HTTP status code (admin_call returns
# nothing on non-2xx because of --fail-with-body under set -e).
#
# The cp-stub is called DIRECTLY (http://localhost:9090) — NOT through
# the cf-proxy/tenant-proxy chain. Reason: the tenant's cp-proxy
# allowlist intentionally blocks /cp/admin/* paths (security
# boundary, cp_proxy_test.go line 30: "cross-tenant admin list
# (lateral movement)") — admin operations don't traverse the
# tenant proxy in the production path either (real CP admin ops
# call the CP directly, not through the tenant's cf-proxy). This
# replay is a harness-capture of the cp-stub's 400+JSON shape; it
# is NOT a production-path E2E. The staging script (test_staging_full_saas.sh)
# exercises the production path separately.
HTTP_CODE=$(curl -sS --fail-with-body --max-time 30 \
-o /tmp/canary_org_create_400_body.$$ \
-w "%{http_code}" \
-H "Host: ${ALPHA_HOST}" \
-H "Authorization: Bearer ${ALPHA_ADMIN_TOKEN}" \
-H "Content-Type: application/json" \
-X POST "$BASE/cp/admin/orgs" \
-X POST "http://localhost:9090/cp/admin/orgs" \
-d "{\"slug\":\"$ORG_CREATE_400_CAPTURE_SLUG\",\"name\":\"replay-bad-org\"}" \
|| true)
# Reset the exit-code from the curl --fail-with-body so set -e