fix(harness#2864 burn-down): cp-stub implements /cp/admin/orgs; un-xfail canary-smoke-org-create-400-capture #2867
@@ -269,6 +269,37 @@ jobs:
|
||||
if: needs.detect-changes.outputs.run == 'true'
|
||||
run: pip install -r tests/harness/requirements.txt
|
||||
|
||||
- name: Rebuild cp-stub from source (RC #11812 + RC #11815 narrow fix)
|
||||
# Why this step exists: tests/harness/up.sh's \`build\` block is
|
||||
# gated by \`--rebuild\` (no default rebuild), so without an
|
||||
# explicit \`docker compose build\` the CI uses the cached cp-stub
|
||||
# image — which was built BEFORE the new /cp/admin/orgs handler
|
||||
# landed in tests/harness/cp-stub/main.go. Symptom: the
|
||||
# canary-smoke-org-create-400-capture replay gets HTTP 501 from
|
||||
# the catch-all (handler not implemented), even though the
|
||||
# source has the handler. \`--no-cache\` ensures the rebuilt image
|
||||
# pulls the current main.go from the checked-out branch.
|
||||
#
|
||||
# RC #11815 NARROW FIX: only cp-stub is rebuilt here. The
|
||||
# tenants (tenant-alpha/tenant-beta) need SECRETS_ENCRYPTION_KEY
|
||||
# which tests/harness/up.sh generates (with \`openssl rand -base64
|
||||
# 32\`) and exports — up.sh runs AFTER this pre-build step, so
|
||||
# building the tenants here would fail with missing env. The
|
||||
# tenants get built later by up.sh (the harness's own \`build\`
|
||||
# block doesn't need --rebuild, so the cached tenant images are
|
||||
# fine for the non-handler-bug surface). cp-stub is the only
|
||||
# service with the stale-handler problem AND no env dep.
|
||||
if: needs.detect-changes.outputs.run == 'true'
|
||||
working-directory: tests/harness
|
||||
env:
|
||||
# docker compose validates the whole compose.yml even when only
|
||||
# building cp-stub; tenant-alpha/beta require SECRETS_ENCRYPTION_KEY.
|
||||
# up.sh generates the real key later. A placeholder is fine here
|
||||
# because cp-stub does not use it.
|
||||
SECRETS_ENCRYPTION_KEY: rebuild-cp-stub-placeholder
|
||||
run: |
|
||||
docker compose -f compose.yml build --no-cache cp-stub
|
||||
|
||||
- name: Run all replays against the harness
|
||||
# run-all-replays.sh: boot via up.sh → seed via seed.sh → run
|
||||
# every replays/*.sh → tear down via down.sh on EXIT (trap).
|
||||
|
||||
@@ -48,6 +48,16 @@ services:
|
||||
environment:
|
||||
PORT: "9090"
|
||||
CP_STUB_PEERS_MODE: "${CP_STUB_PEERS_MODE:-}"
|
||||
# #2867 address-fix: replays run on the host (./run-all-replays.sh),
|
||||
# not inside a harness-net container, so the cp-stub's port 9090
|
||||
# MUST be published to the host loopback. Without this, the
|
||||
# canary-smoke-org-create-400-capture replay gets "could not
|
||||
# resolve host cp-stub" / HTTP 000 (the harness job 501121
|
||||
# round-7 failure). Note: the cf-proxy (line ~212) already does
|
||||
# this for port 8080; the cp-stub was the only service missing
|
||||
# a host-side mapping.
|
||||
ports:
|
||||
- "9090:9090"
|
||||
networks: [harness-net]
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "wget -q -O- http://localhost:9090/healthz || exit 1"]
|
||||
|
||||
@@ -24,6 +24,7 @@ import (
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
@@ -66,6 +67,60 @@ func main() {
|
||||
})
|
||||
})
|
||||
|
||||
// /cp/admin/orgs — POST. Mirrors the real CP's orgs.go:267-295 +
|
||||
// router.go:437 validation shape: org-create requires slug, name,
|
||||
// and owner_user_id. The harness's canary-smoke-org-create-400-capture
|
||||
// replay (tests/harness/replays/) posts a payload missing
|
||||
// owner_user_id and asserts the stub returns 400 + a parseable JSON
|
||||
// body naming the missing fields. This is the harness-capture path
|
||||
// for the real core#2737 staging 400-body-loss (the staging script
|
||||
// eats the body under set -e + admin_call; the harness proves the
|
||||
// pattern works locally).
|
||||
//
|
||||
// Burn-down for #2864: registering this handler un-arms the
|
||||
// canary-smoke-org-create-400-capture xfail.
|
||||
mux.HandleFunc("/cp/admin/orgs", func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost {
|
||||
writeJSON(w, 405, map[string]any{"error": "method not allowed; expected POST"})
|
||||
return
|
||||
}
|
||||
var payload struct {
|
||||
Slug string `json:"slug"`
|
||||
Name string `json:"name"`
|
||||
OwnerUserID string `json:"owner_user_id"`
|
||||
}
|
||||
if err := json.NewDecoder(r.Body).Decode(&payload); err != nil {
|
||||
writeJSON(w, 400, map[string]any{"error": "invalid JSON: " + err.Error()})
|
||||
return
|
||||
}
|
||||
var missing []string
|
||||
if payload.Slug == "" {
|
||||
missing = append(missing, "slug")
|
||||
}
|
||||
if payload.Name == "" {
|
||||
missing = append(missing, "name")
|
||||
}
|
||||
if payload.OwnerUserID == "" {
|
||||
missing = append(missing, "owner_user_id")
|
||||
}
|
||||
if len(missing) > 0 {
|
||||
writeJSON(w, 400, map[string]any{
|
||||
"error": strings.Join(missing, ", ") + " are required",
|
||||
"fields": missing,
|
||||
})
|
||||
return
|
||||
}
|
||||
// If the payload is valid, return 201 (real CP behavior). The
|
||||
// replay doesn't exercise this path — it specifically tests the
|
||||
// 400 + body shape on a bad payload — but returning 201 keeps
|
||||
// the stub honest for any future replay that wants to test
|
||||
// the happy path.
|
||||
writeJSON(w, 201, map[string]any{
|
||||
"ok": true,
|
||||
"slug": payload.Slug,
|
||||
})
|
||||
})
|
||||
|
||||
// __stub/state — expose stub state (counters) so replay scripts can
|
||||
// assert the tenant actually reached us. Read-only.
|
||||
mux.HandleFunc("/__stub/state", func(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
@@ -1,24 +1,16 @@
|
||||
#!/usr/bin/env bash
|
||||
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||||
# XFAIL — issue #2864
|
||||
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||||
# This replay is currently marked xfail (expected to fail). The underlying
|
||||
# issue is tracked at https://git.moleculesai.app/molecule-ai/molecule-core/issues/2864
|
||||
# Reason: cp-stub lacks /cp/admin/orgs route (404) + 400 body empty under set -e
|
||||
# canary-smoke-org-create-400-capture — core#2737 staging 400-body-loss capture.
|
||||
#
|
||||
# To un-xfail (when the underlying issue is fixed):
|
||||
# 1. Remove the `exit 0` line below
|
||||
# 2. Update the issue #2864 with a "fixed" comment + link to the fix PR
|
||||
# 3. Verify the replay runs end-to-end with PASS in the local harness
|
||||
# 4. The Harness Replays workflow will then surface the real pass signal
|
||||
# Reproduces the staging SaaS smoke canary (test_staging_full_saas.sh:368-420)
|
||||
# locally: POST /cp/admin/orgs with a known-bad payload (missing owner_user_id)
|
||||
# and assert the response is 400 + a parseable JSON body naming the missing
|
||||
# fields. The staging script's admin_call + set -e combo eats the body under
|
||||
# the failure-shape path; this harness-capture proves the pattern works
|
||||
# locally so the staging fix (per Researcher #101104) can mirror it.
|
||||
#
|
||||
# Why we xfail (not skip, not fix): the underlying issues are out of scope
|
||||
# for PR #2821 (which captures the canary failures) but block the green CI
|
||||
# signal that the 2-genuine review needs. Tracking the work in the linked
|
||||
# issue lets us burn down the xfails as separate PRs land.
|
||||
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||||
echo "[replay] __XFAIL__:#2864:cp-stub lacks /cp/admin/orgs route (404) + 400 body empty under set -e"
|
||||
exit 0
|
||||
# Burn-down for #2864: was previously xfail'd (PR #2821 tracking issue);
|
||||
# the cp-stub now implements /cp/admin/orgs (mirror of the real CP's
|
||||
# orgs.go:267-295 validation shape), so this replay is re-armed.
|
||||
|
||||
set -euo pipefail
|
||||
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
@@ -73,13 +65,23 @@ echo "[replay] phase 2: POST /cp/admin/orgs with a known-bad payload (missing ow
|
||||
# shape. We bypass the admin_call helper and call curl directly so
|
||||
# we can also capture the HTTP status code (admin_call returns
|
||||
# nothing on non-2xx because of --fail-with-body under set -e).
|
||||
#
|
||||
# The cp-stub is called DIRECTLY (http://localhost:9090) — NOT through
|
||||
# the cf-proxy/tenant-proxy chain. Reason: the tenant's cp-proxy
|
||||
# allowlist intentionally blocks /cp/admin/* paths (security
|
||||
# boundary, cp_proxy_test.go line 30: "cross-tenant admin list
|
||||
# (lateral movement)") — admin operations don't traverse the
|
||||
# tenant proxy in the production path either (real CP admin ops
|
||||
# call the CP directly, not through the tenant's cf-proxy). This
|
||||
# replay is a harness-capture of the cp-stub's 400+JSON shape; it
|
||||
# is NOT a production-path E2E. The staging script (test_staging_full_saas.sh)
|
||||
# exercises the production path separately.
|
||||
HTTP_CODE=$(curl -sS --fail-with-body --max-time 30 \
|
||||
-o /tmp/canary_org_create_400_body.$$ \
|
||||
-w "%{http_code}" \
|
||||
-H "Host: ${ALPHA_HOST}" \
|
||||
-H "Authorization: Bearer ${ALPHA_ADMIN_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-X POST "$BASE/cp/admin/orgs" \
|
||||
-X POST "http://localhost:9090/cp/admin/orgs" \
|
||||
-d "{\"slug\":\"$ORG_CREATE_400_CAPTURE_SLUG\",\"name\":\"replay-bad-org\"}" \
|
||||
|| true)
|
||||
# Reset the exit-code from the curl --fail-with-body so set -e
|
||||
|
||||
Reference in New Issue
Block a user