From aa2ae25ac85383dd67b801637e2dce75acb0a581 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Sun, 14 Jun 2026 16:41:34 +0000 Subject: [PATCH 1/2] fix(e2e): #76 staging LLM preflight uses correct model slug + optional auth The preflight hard-coded the namespaced model slug , which the staging LLM proxy rejects, causing a false DEP-DOWN while the real E2E (bare slug) would succeed. It also sent no Authorization header, so proxies that require auth were mis-classified as down. Changes: - Default preflight model to the bare slug . - Add override for lanes that need a different provider/model slug. - Add override; when set, sent as . - Add to curl so redirects from the proxy are followed. - Update unit tests to cover custom model and auth header. Fixes #76 --- tests/e2e/lib/llm_proxy_preflight.sh | 30 +++++++-- tests/e2e/test_llm_proxy_preflight_unit.sh | 78 +++++++++++++++++++++- 2 files changed, 99 insertions(+), 9 deletions(-) diff --git a/tests/e2e/lib/llm_proxy_preflight.sh b/tests/e2e/lib/llm_proxy_preflight.sh index 144709f5..9f493ea2 100755 --- a/tests/e2e/lib/llm_proxy_preflight.sh +++ b/tests/e2e/lib/llm_proxy_preflight.sh @@ -78,20 +78,38 @@ llm_proxy_preflight() { # name is a no-op for the liveness check (any model id that the proxy # will accept is fine; the proxy returns 200 + completion for healthy # provider keys and 5xx/timeout for outage conditions). + # + # #76 root cause: the hard-coded namespaced slug `minimax/MiniMax-M2.7` + # is rejected by the staging LLM proxy's model validation, so the + # preflight false-reds while the real E2E (which uses the bare slug + # `MiniMax-M2.7`) would succeed. Default to the bare slug and allow + # per-lane override via E2E_LLM_PREFLIGHT_MODEL. + local model="${E2E_LLM_PREFLIGHT_MODEL:-MiniMax-M2.7}" + local api_key="${E2E_LLM_PREFLIGHT_API_KEY:-}" local body - body=$(cat <<'JSON' -{"model":"minimax/MiniMax-M2.7","max_tokens":1,"messages":[{"role":"user","content":"pong"}]} + body=$(cat </dev/null) || http_code="000" diff --git a/tests/e2e/test_llm_proxy_preflight_unit.sh b/tests/e2e/test_llm_proxy_preflight_unit.sh index 46c06f12..4bc83f5b 100755 --- a/tests/e2e/test_llm_proxy_preflight_unit.sh +++ b/tests/e2e/test_llm_proxy_preflight_unit.sh @@ -35,7 +35,7 @@ PY_SERVER_LOG=$(mktemp) PY_SERVER_PID= start_test_server() { - local mode="$1" # "ok" | "down" | "empty_200" + local mode="$1" # "ok" | "down" | "empty_200" | "echo" | "auth" # Pick a free port via socket binding; pass it explicitly to the server. local port port=$(python3 -c " @@ -51,6 +51,12 @@ mode = "$mode" port = $port class H(http.server.BaseHTTPRequestHandler): def do_POST(self): + length = int(self.headers.get('Content-Length', 0)) + raw = self.rfile.read(length).decode('utf-8', errors='replace') + try: + req = json.loads(raw) if raw else {} + except json.JSONDecodeError: + req = {} if mode == "down": self.send_error(503, "simulated outage") return @@ -60,8 +66,24 @@ class H(http.server.BaseHTTPRequestHandler): self.end_headers() self.wfile.write(b'{"error":"upstream silent"}') return - # ok - body = {"choices":[{"message":{"role":"assistant","content":"pong"}}]} + if mode == "auth": + auth = self.headers.get('Authorization', '') + if not auth.startswith('Bearer '): + self.send_response(401) + self.end_headers() + self.wfile.write(b'{"error":"missing auth"}') + return + # fall through to ok response + # ok / echo / auth-success: echo model back so tests can verify + # the request body was sent correctly. Also persist the full request + # to a well-known file for tests that need to inspect it. + req_path = "/tmp/_llm_preflight_last_request.json" + try: + with open(req_path, "w") as fh: + json.dump(req, fh) + except Exception: + pass + body = {"choices":[{"message":{"role":"assistant","content":req.get("model","pong")}}]} payload = json.dumps(body).encode() self.send_response(200) self.send_header("Content-Type", "application/json") @@ -214,12 +236,62 @@ test_503() { return 0 } +# Test 6: custom model slug via E2E_LLM_PREFLIGHT_MODEL is sent in the request body. +test_model_override() { + PY_SERVER_PORT=0 + start_test_server "echo" + export E2E_LLM_PROXY_URL="http://127.0.0.1:${PY_SERVER_PORT}/v1/chat/completions" + export E2E_LLM_PREFLIGHT_MODEL="custom-model-42" + rm -f /tmp/_llm_preflight_last_request.json + local out rc + out=$(llm_proxy_preflight 2>&1) + rc=$? + unset E2E_LLM_PREFLIGHT_MODEL + stop_test_server + PY_SERVER_PID= + if [ "$rc" -ne 0 ]; then + echo "FAIL: test_model_override expected exit 0, got $rc" + echo " output: $out" + return 1 + fi + if ! python3 -c "import json; d=json.load(open('/tmp/_llm_preflight_last_request.json')); assert d.get('model')=='custom-model-42'; print('model ok')" 2>&1; then + echo "FAIL: test_model_override did not send the custom model in the request body" + echo " request file: $(cat /tmp/_llm_preflight_last_request.json 2>/dev/null || echo '')" + return 1 + fi + echo "PASS: test_model_override" + return 0 +} + +# Test 7: optional Authorization header is sent when E2E_LLM_PREFLIGHT_API_KEY is set. +test_auth_header() { + PY_SERVER_PORT=0 + start_test_server "auth" + export E2E_LLM_PROXY_URL="http://127.0.0.1:${PY_SERVER_PORT}/v1/chat/completions" + export E2E_LLM_PREFLIGHT_API_KEY="test-token-123" + local out rc + out=$(llm_proxy_preflight 2>&1) + rc=$? + unset E2E_LLM_PREFLIGHT_API_KEY + stop_test_server + PY_SERVER_PID= + if [ "$rc" -ne 0 ]; then + echo "FAIL: test_auth_header expected exit 0, got $rc" + echo " output: $out" + return 1 + fi + echo "PASS: test_auth_header" + return 0 +} + failed=0 test_config_missing || failed=$((failed+1)) test_proxy_unreachable || failed=$((failed+1)) test_200_empty_body || failed=$((failed+1)) test_ok || failed=$((failed+1)) test_503 || failed=$((failed+1)) +test_model_override || failed=$((failed+1)) +test_auth_header || failed=$((failed+1)) if [ "$failed" -gt 0 ]; then echo "FAILED: $failed test(s)" -- 2.52.0 From 2234b4ace188c2b949d94a7e7f923138260d31e3 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Sun, 14 Jun 2026 17:17:09 +0000 Subject: [PATCH 2/2] fix(e2e): #76 staging LLM preflight treats any HTTP response as UP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The preflight was classifying the staging LLM proxy's 401 response to an unauthenticated probe as DEP-DOWN, causing fleet-wide false staging-down incidents since 2026-06-13. Adopt Option 1 from the driver brief: the preflight only needs to prove REACHABILITY. Any HTTP response (including 401/403/404) means the proxy is up; only transport failures (connection refused, timeout) or 5xx server errors classify as DEP-DOWN. Changes: - Reclassify non-5xx HTTP responses as preflight OK. - Remove the optional Authorization header path (no credential needed). - Update unit tests: 401 now passes, 5xx/unreachable still fail. Fixes #76 🤖 Generated with [Claude Code](https://claude.com/claude-code) --- tests/e2e/lib/llm_proxy_preflight.sh | 65 +++++------- tests/e2e/test_llm_proxy_preflight_unit.sh | 113 ++++----------------- 2 files changed, 43 insertions(+), 135 deletions(-) diff --git a/tests/e2e/lib/llm_proxy_preflight.sh b/tests/e2e/lib/llm_proxy_preflight.sh index 9f493ea2..e767d3c5 100755 --- a/tests/e2e/lib/llm_proxy_preflight.sh +++ b/tests/e2e/lib/llm_proxy_preflight.sh @@ -35,14 +35,23 @@ # # STATUS CODES # ============ -# 0 preflight OK (the proxy answered a cheap completion cleanly) -# 70 DEP-DOWN:staging-llm (proxy unreachable, slow, or auth-failed) +# 0 preflight OK (the proxy is reachable and returned an HTTP response) +# 70 DEP-DOWN:staging-llm (proxy unreachable, slow, or returned a 5xx) # 71 E2E_LLM_PROXY_URL not set and the URL could not be derived # # Why distinct exit codes: the redgate-reporter and the workflow's notify # step can use them to differentiate "infrastructure down" from "config # missing" (the latter is operator error and should not dedup against # live dependency outages). +# +# SEMANTICS NOTE (#76 root cause, 2026-06-13): +# The preflight sends an UNauthenticated probe. A healthy staging LLM proxy +# that requires auth correctly returns 401. Previously any non-200 status +# (including 401) was classified as DEP-DOWN, causing fleet-wide false +# staging-down incidents. The preflight only needs to prove REACHABILITY: +# any HTTP response (including 401/403/404) means the proxy is up. Only +# transport failures (connection refused, timeout) or 5xx server errors +# classify as DEP-DOWN. # e2e_llm_proxy_preflight # Source the lib's `llm_proxy_preflight` function. Returns 0 on success, @@ -74,47 +83,32 @@ llm_proxy_preflight() { return 71 fi - # Cheap completion: minimal token count, no streaming. The exact model - # name is a no-op for the liveness check (any model id that the proxy - # will accept is fine; the proxy returns 200 + completion for healthy - # provider keys and 5xx/timeout for outage conditions). - # - # #76 root cause: the hard-coded namespaced slug `minimax/MiniMax-M2.7` - # is rejected by the staging LLM proxy's model validation, so the - # preflight false-reds while the real E2E (which uses the bare slug - # `MiniMax-M2.7`) would succeed. Default to the bare slug and allow - # per-lane override via E2E_LLM_PREFLIGHT_MODEL. - local model="${E2E_LLM_PREFLIGHT_MODEL:-MiniMax-M2.7}" - local api_key="${E2E_LLM_PREFLIGHT_API_KEY:-}" + # Cheap, auth-less reachability probe: minimal token count, no streaming. + # The model name is a no-op for reachability; the bare slug avoids a + # provider-specific 400 on proxies that validate model IDs. local body - body=$(cat </dev/null) || http_code="000" - if [ "$http_code" != "200" ]; then + # #76 semantics fix: the preflight only needs to prove the proxy is + # reachable and speaking HTTP. An auth-required proxy returns 401; a + # mis-routed probe returns 404 — both mean the proxy is UP. Only + # transport failures (http_code=000) or 5xx server errors mean DOWN. + if [ "$http_code" = "000" ] || [[ "$http_code" == 5* ]]; then # NOTE: the prefix `DEP-DOWN:staging-llm` is the SSOT that the # redgate-reporter parses for dedup. Do not edit without coordinating # with the redgate-reporter's parser in molecule-ci. @@ -122,14 +116,5 @@ JSON return 70 fi - # Even on 200, sanity-check the response shape — an LLM proxy that - # returns 200 with an empty/malformed body is itself a class of outage - # (the 2026-06-12 incident had a few minutes of 200 + empty body for - # one of the affected providers). - if ! grep -q '"choices"' "$tmpfile" 2>/dev/null; then - echo "::error::DEP-DOWN:staging-llm preflight failed: 200 with malformed body: $(head -c 500 "$tmpfile" 2>/dev/null)" - return 70 - fi - return 0 } diff --git a/tests/e2e/test_llm_proxy_preflight_unit.sh b/tests/e2e/test_llm_proxy_preflight_unit.sh index 4bc83f5b..7aa90ce4 100755 --- a/tests/e2e/test_llm_proxy_preflight_unit.sh +++ b/tests/e2e/test_llm_proxy_preflight_unit.sh @@ -5,11 +5,11 @@ # 1. Config-missing path (exit 71) when E2E_LLM_PROXY_URL is unset AND # MOLECULE_CP_URL is unset. # 2. DEP-DOWN path (exit 70) when the proxy URL is unreachable. -# 3. DEP-DOWN path (exit 70) when the proxy returns 200 with a -# malformed body (the 2026-06-12 incident's "200 with empty body" -# class of outage — see lib doc). -# 4. Happy path (exit 0) when the proxy returns 200 with a normal -# completion body containing "choices". +# 3. DEP-DOWN path (exit 70) when the proxy returns 5xx. +# 4. Happy path (exit 0) when the proxy returns any HTTP response, +# including 401 (the #76 semantics fix: an unauthenticated probe +# against an auth-required proxy must NOT be classified as +# dependency-down). # 5. The error message starts with the `DEP-DOWN:staging-llm` prefix # that the redgate-reporter parses for dedup. # @@ -35,7 +35,7 @@ PY_SERVER_LOG=$(mktemp) PY_SERVER_PID= start_test_server() { - local mode="$1" # "ok" | "down" | "empty_200" | "echo" | "auth" + local mode="$1" # "ok" | "down" | "unauth" # Pick a free port via socket binding; pass it explicitly to the server. local port port=$(python3 -c " @@ -51,39 +51,17 @@ mode = "$mode" port = $port class H(http.server.BaseHTTPRequestHandler): def do_POST(self): - length = int(self.headers.get('Content-Length', 0)) - raw = self.rfile.read(length).decode('utf-8', errors='replace') - try: - req = json.loads(raw) if raw else {} - except json.JSONDecodeError: - req = {} if mode == "down": self.send_error(503, "simulated outage") return - if mode == "empty_200": - self.send_response(200) + if mode == "unauth": + self.send_response(401) self.send_header("Content-Type", "application/json") self.end_headers() - self.wfile.write(b'{"error":"upstream silent"}') + self.wfile.write(b'{"error":"unauthorized"}') return - if mode == "auth": - auth = self.headers.get('Authorization', '') - if not auth.startswith('Bearer '): - self.send_response(401) - self.end_headers() - self.wfile.write(b'{"error":"missing auth"}') - return - # fall through to ok response - # ok / echo / auth-success: echo model back so tests can verify - # the request body was sent correctly. Also persist the full request - # to a well-known file for tests that need to inspect it. - req_path = "/tmp/_llm_preflight_last_request.json" - try: - with open(req_path, "w") as fh: - json.dump(req, fh) - except Exception: - pass - body = {"choices":[{"message":{"role":"assistant","content":req.get("model","pong")}}]} + # ok + body = {"choices":[{"message":{"role":"assistant","content":"pong"}}]} payload = json.dumps(body).encode() self.send_response(200) self.send_header("Content-Type", "application/json") @@ -162,10 +140,10 @@ test_proxy_unreachable() { return 0 } -# Test 3: proxy returns 200 with malformed body → exit 70. -test_200_empty_body() { +# Test 3: proxy returns 401 (auth required) → exit 0 (#76 semantics fix). +test_401_reachable() { PY_SERVER_PORT=0 - start_test_server "empty_200" + start_test_server "unauth" # E2E_LLM_PROXY_URL is read by the sourced llm_proxy_preflight helper # (lib/llm_proxy_preflight.sh) via ${E2E_LLM_PROXY_URL:-}. Export it # here so shellcheck doesn't false-positive SC2034 (appears unused) when @@ -174,19 +152,14 @@ test_200_empty_body() { local out rc out=$(llm_proxy_preflight 2>&1) rc=$? - if [ "$rc" -ne 70 ]; then - echo "FAIL: test_200_empty_body expected exit 70, got $rc" - echo " output: $out" - return 1 - fi - if ! echo "$out" | grep -q "DEP-DOWN:staging-llm"; then - echo "FAIL: test_200_empty_body output missing DEP-DOWN:staging-llm prefix" + if [ "$rc" -ne 0 ]; then + echo "FAIL: test_401_reachable expected exit 0, got $rc" echo " output: $out" return 1 fi stop_test_server PY_SERVER_PID= - echo "PASS: test_200_empty_body" + echo "PASS: test_401_reachable" return 0 } @@ -236,62 +209,12 @@ test_503() { return 0 } -# Test 6: custom model slug via E2E_LLM_PREFLIGHT_MODEL is sent in the request body. -test_model_override() { - PY_SERVER_PORT=0 - start_test_server "echo" - export E2E_LLM_PROXY_URL="http://127.0.0.1:${PY_SERVER_PORT}/v1/chat/completions" - export E2E_LLM_PREFLIGHT_MODEL="custom-model-42" - rm -f /tmp/_llm_preflight_last_request.json - local out rc - out=$(llm_proxy_preflight 2>&1) - rc=$? - unset E2E_LLM_PREFLIGHT_MODEL - stop_test_server - PY_SERVER_PID= - if [ "$rc" -ne 0 ]; then - echo "FAIL: test_model_override expected exit 0, got $rc" - echo " output: $out" - return 1 - fi - if ! python3 -c "import json; d=json.load(open('/tmp/_llm_preflight_last_request.json')); assert d.get('model')=='custom-model-42'; print('model ok')" 2>&1; then - echo "FAIL: test_model_override did not send the custom model in the request body" - echo " request file: $(cat /tmp/_llm_preflight_last_request.json 2>/dev/null || echo '')" - return 1 - fi - echo "PASS: test_model_override" - return 0 -} - -# Test 7: optional Authorization header is sent when E2E_LLM_PREFLIGHT_API_KEY is set. -test_auth_header() { - PY_SERVER_PORT=0 - start_test_server "auth" - export E2E_LLM_PROXY_URL="http://127.0.0.1:${PY_SERVER_PORT}/v1/chat/completions" - export E2E_LLM_PREFLIGHT_API_KEY="test-token-123" - local out rc - out=$(llm_proxy_preflight 2>&1) - rc=$? - unset E2E_LLM_PREFLIGHT_API_KEY - stop_test_server - PY_SERVER_PID= - if [ "$rc" -ne 0 ]; then - echo "FAIL: test_auth_header expected exit 0, got $rc" - echo " output: $out" - return 1 - fi - echo "PASS: test_auth_header" - return 0 -} - failed=0 test_config_missing || failed=$((failed+1)) test_proxy_unreachable || failed=$((failed+1)) -test_200_empty_body || failed=$((failed+1)) +test_401_reachable || failed=$((failed+1)) test_ok || failed=$((failed+1)) test_503 || failed=$((failed+1)) -test_model_override || failed=$((failed+1)) -test_auth_header || failed=$((failed+1)) if [ "$failed" -gt 0 ]; then echo "FAILED: $failed test(s)" -- 2.52.0