From 28da216e0f9209c6a4f6022c27194f5393e8ea18 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer B (MiniMax)" Date: Sat, 13 Jun 2026 19:36:45 +0000 Subject: [PATCH 1/3] fix(core#2675): LLM-proxy preflight with DEP-DOWN:staging-llm status description convention MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a reusable shell preflight that completion-gated e2e lanes can source before booting workspaces. The preflight makes ONE cheap completion through the staging LLM proxy with a 30s timeout. On any non-200, 200-with-malformed-body, or unreachable condition, it emits 'DEP-DOWN:staging-llm ...' as a machine-readable Gitea Actions status description and exits 70 (config-missing is exit 71). Why this matters (2026-06-12 staging LLM outage): 4 completion-gated lanes went red identically with no signal distinguishing 'dependency down' from 'real code bug.' Triage required forensic log-diffing and initially mis-attributed an unrelated deploy-path bug to the outage (the /statuses pagination fix mentioned in the issue body). The DEP-DOWN:staging-llm convention lets the redgate-reporter dedup N identical reds into ONE incident issue. Wired into the pr-validate job of e2e-staging-saas.yml as the proof-of-concept lane; the other 3 completion-gated lanes (local-provision-e2e.yml and the 2 remaining e2e-staging-saas.yml job blocks) are mechanically derivable and tracked in a follow-up issue to keep this PR's diff focused. Files: + tests/e2e/lib/llm_proxy_preflight.sh — the helper + tests/e2e/test_llm_proxy_preflight_unit.sh — 5 unit tests covering config-missing, unreachable, 200-empty-body, ok, 503 ~ .gitea/workflows/e2e-staging-saas.yml — wires the helper into pr-validate + path filter additions for the new lib + test files Tests: bash tests/e2e/test_llm_proxy_preflight_unit.sh → all 5 PASS. Workflow lint: lint-workflow-yaml.py clean. Scope kept tight: - Workspace-server code NOT touched (this is CI/Python, not Go — consistent with the other 3 lanes that this PR is the proof-of-concept for). - The redgate-reporter's dedup logic is external and out of scope for this PR. The convention (status description prefix + distinct exit codes) is the SSOT — the team can wire the redgate-reporter's parser in a separate change. - LLM proxy URL is configurable via E2E_LLM_PROXY_URL, with MOLECULE_CP_URL-based derivation as the default. Local-provision overrides E2E_LLM_PROXY_URL to its own proxy. Refs core#2675. Co-Authored-By: Claude --- .gitea/workflows/e2e-staging-saas.yml | 17 ++ tests/e2e/lib/llm_proxy_preflight.sh | 110 +++++++++++ tests/e2e/test_llm_proxy_preflight_unit.sh | 208 +++++++++++++++++++++ 3 files changed, 335 insertions(+) create mode 100755 tests/e2e/lib/llm_proxy_preflight.sh create mode 100755 tests/e2e/test_llm_proxy_preflight_unit.sh diff --git a/.gitea/workflows/e2e-staging-saas.yml b/.gitea/workflows/e2e-staging-saas.yml index baaed9e3f..3af5cc969 100644 --- a/.gitea/workflows/e2e-staging-saas.yml +++ b/.gitea/workflows/e2e-staging-saas.yml @@ -51,11 +51,13 @@ on: - 'workspace-server/internal/providers/providers.yaml' - 'tests/e2e/test_staging_full_saas.sh' - 'tests/e2e/lib/completion_assert.sh' + - 'tests/e2e/lib/llm_proxy_preflight.sh' - 'tests/e2e/lib/model_slug.sh' - 'tests/e2e/lib/aws_leak_check.sh' - 'tests/e2e/test_aws_leak_check.sh' - 'tests/e2e/test_staging_concierge_e2e.sh' - 'tests/e2e/test_staging_concierge_creates_workspace_e2e.sh' + - 'tests/e2e/test_llm_proxy_preflight_unit.sh' - 'workspace-server/internal/staginge2e/**' - 'workspace-server/internal/handlers/platform_agent.go' - 'workspace-server/internal/handlers/user_tasks.go' @@ -73,11 +75,13 @@ on: - 'workspace-server/internal/providers/providers.yaml' - 'tests/e2e/test_staging_full_saas.sh' - 'tests/e2e/lib/completion_assert.sh' + - 'tests/e2e/lib/llm_proxy_preflight.sh' - 'tests/e2e/lib/model_slug.sh' - 'tests/e2e/lib/aws_leak_check.sh' - 'tests/e2e/test_aws_leak_check.sh' - 'tests/e2e/test_staging_concierge_e2e.sh' - 'tests/e2e/test_staging_concierge_creates_workspace_e2e.sh' + - 'tests/e2e/test_llm_proxy_preflight_unit.sh' - 'workspace-server/internal/staginge2e/**' - 'workspace-server/internal/handlers/platform_agent.go' - 'workspace-server/internal/handlers/user_tasks.go' @@ -288,6 +292,19 @@ jobs: fi echo "Staging CP healthy ✓" + # core#2675: completion-gated lanes must distinguish "staging LLM proxy + # down" from "real code bug" with a distinct, machine-readable status + # description prefix `DEP-DOWN:staging-llm` so the redgate-reporter + # can dedup N identical reds into ONE incident issue. Wired into the + # pr-validate job first; the same source line is replicated in the + # other 3 completion-gated lanes in a follow-up commit (the file's + # 5 nearly-identical job blocks are mechanically derivable). + - name: LLM proxy preflight (DEP-DOWN:staging-llm) + run: | + # shellcheck source=lib/llm_proxy_preflight.sh + source tests/e2e/lib/llm_proxy_preflight.sh + llm_proxy_preflight + - name: Run full-lifecycle E2E id: e2e run: bash tests/e2e/test_staging_full_saas.sh diff --git a/tests/e2e/lib/llm_proxy_preflight.sh b/tests/e2e/lib/llm_proxy_preflight.sh new file mode 100755 index 000000000..1be5ecade --- /dev/null +++ b/tests/e2e/lib/llm_proxy_preflight.sh @@ -0,0 +1,110 @@ +#!/usr/bin/env bash +# LLM-proxy preflight helper for completion-gated e2e lanes (core#2675). +# +# PURPOSE +# ======= +# Before booting workspaces (an expensive, multi-minute operation), confirm +# the staging LLM proxy can serve a cheap completion. The 2026-06-12 staging +# LLM outage (~21:10-21:38Z) produced 4 identical red CI lanes — Staging SaaS +# x3 + Local Provision — with no machine-readable signal distinguishing +# "dependency down" from "real code bug." Triage required forensic log-diffing +# across lanes and (per the issue) initially mis-attributed an unrelated +# deploy-path bug to the outage. +# +# This preflight fast-fails the lane with a DISTINCT, machine-readable status +# description prefix `DEP-DOWN:staging-llm` so the redgate-reporter can: +# 1. file ONE incident issue for the dependency outage (dedup), and +# 2. let operators skip the lane's workspace-boot logic while the +# dependency is being restored. +# +# The convention (status description prefix + per-run dedup) is the whole +# deliverable; the actual LLM-proxy endpoint is configurable via env so the +# same helper works across lanes with different proxy URLs (e.g. the +# staging SaaS stack uses a different LLM proxy than the local-provision +# dev proxy). +# +# CONVENTIONS +# =========== +# - Source this lib AFTER the host script defines fail()/ok()/log(). +# - Call `llm_proxy_preflight` (no args). It reads E2E_LLM_PROXY_URL +# (or falls back to deriving one from MOLECULE_CP_URL) and exits the +# whole lane on failure. +# - The status description prefix `DEP-DOWN:staging-llm` is the SSOT — +# `redgate-reporter` parses this and dedups. Do NOT change the prefix +# without coordinating the redgate-reporter's parser. +# +# STATUS CODES +# ============ +# 0 preflight OK (the proxy answered a cheap completion cleanly) +# 70 DEP-DOWN:staging-llm (proxy unreachable, slow, or auth-failed) +# 71 E2E_LLM_PROXY_URL not set and the URL could not be derived +# +# Why distinct exit codes: the redgate-reporter and the workflow's notify +# step can use them to differentiate "infrastructure down" from "config +# missing" (the latter is operator error and should not dedup against +# live dependency outages). + +# e2e_llm_proxy_preflight +# Source the lib's `llm_proxy_preflight` function. Returns 0 on success, +# 70/71 on the dedicated DEP-DOWN / config-missing cases. +llm_proxy_preflight() { + local proxy_url="${E2E_LLM_PROXY_URL:-}" + local timeout_secs="${E2E_LLM_PROXY_TIMEOUT:-30}" + + if [ -z "$proxy_url" ]; then + # Derive from the CP URL when not set. The platform-managed LLM proxy + # is exposed at /api/v1/internal/llm/openai/v1; the staging + # instance lives at staging-api.moleculesai.app. E2E_LLM_PROXY_URL + # override stays available for lanes that point at a different proxy + # (local provision uses the local workspace-server's built-in proxy). + if [ -n "${MOLECULE_CP_URL:-}" ]; then + proxy_url="${MOLECULE_CP_URL%/}/api/v1/internal/llm/openai/v1/chat/completions" + fi + fi + + if [ -z "$proxy_url" ]; then + echo "::error::DEP-DOWN:staging-llm (config-missing) E2E_LLM_PROXY_URL is unset and could not be derived from MOLECULE_CP_URL" + return 71 + fi + + # Cheap completion: minimal token count, no streaming. The exact model + # name is a no-op for the liveness check (any model id that the proxy + # will accept is fine; the proxy returns 200 + completion for healthy + # provider keys and 5xx/timeout for outage conditions). + local body + body=$(cat <<'JSON' +{"model":"minimax/MiniMax-M2.7","max_tokens":1,"messages":[{"role":"user","content":"pong"}]} +JSON +) + + local tmpfile http_code + tmpfile=$(mktemp) + # shellcheck disable=SC2064 + trap "rm -f '$tmpfile'" RETURN + + http_code=$(curl -sS -o "$tmpfile" -w "%{http_code}" \ + --max-time "$timeout_secs" \ + -H "Content-Type: application/json" \ + -X POST \ + -d "$body" \ + "$proxy_url" 2>/dev/null) || http_code="000" + + if [ "$http_code" != "200" ]; then + # NOTE: the prefix `DEP-DOWN:staging-llm` is the SSOT that the + # redgate-reporter parses for dedup. Do not edit without coordinating + # with the redgate-reporter's parser in molecule-ci. + echo "::error::DEP-DOWN:staging-llm preflight failed: proxy=$proxy_url http_code=$http_code body=$(head -c 500 "$tmpfile" 2>/dev/null)" + return 70 + fi + + # Even on 200, sanity-check the response shape — an LLM proxy that + # returns 200 with an empty/malformed body is itself a class of outage + # (the 2026-06-12 incident had a few minutes of 200 + empty body for + # one of the affected providers). + if ! grep -q '"choices"' "$tmpfile" 2>/dev/null; then + echo "::error::DEP-DOWN:staging-llm preflight failed: 200 with malformed body: $(head -c 500 "$tmpfile" 2>/dev/null)" + return 70 + fi + + return 0 +} diff --git a/tests/e2e/test_llm_proxy_preflight_unit.sh b/tests/e2e/test_llm_proxy_preflight_unit.sh new file mode 100755 index 000000000..fd93a99f0 --- /dev/null +++ b/tests/e2e/test_llm_proxy_preflight_unit.sh @@ -0,0 +1,208 @@ +#!/usr/bin/env bash +# Unit tests for tests/e2e/lib/llm_proxy_preflight.sh (core#2675). +# +# Verifies: +# 1. Config-missing path (exit 71) when E2E_LLM_PROXY_URL is unset AND +# MOLECULE_CP_URL is unset. +# 2. DEP-DOWN path (exit 70) when the proxy URL is unreachable. +# 3. DEP-DOWN path (exit 70) when the proxy returns 200 with a +# malformed body (the 2026-06-12 incident's "200 with empty body" +# class of outage — see lib doc). +# 4. Happy path (exit 0) when the proxy returns 200 with a normal +# completion body containing "choices". +# 5. The error message starts with the `DEP-DOWN:staging-llm` prefix +# that the redgate-reporter parses for dedup. +# +# These tests use a small Python helper as a stand-in for the actual LLM +# proxy (avoids needing a real proxy in the test environment). The Python +# helper listens on a localhost port and serves a configurable response. + +set -uo pipefail + +# Find the lib under test. Allow override for CI flexibility. +LIB_PATH="${LIB_PATH:-$(cd "$(dirname "$0")" && pwd)/lib/llm_proxy_preflight.sh}" + +# shellcheck source=lib/llm_proxy_preflight.sh +# shellcheck disable=SC1091 +source "$LIB_PATH" + +# Start a tiny Python HTTP server to stand in for the LLM proxy. We use +# Python's http.server because it ships in the base image and doesn't +# require extra dependencies. Each test picks a free port via Python's +# socket binding (avoids race conditions in test parallelism). +PY_SERVER_PORT="" +PY_SERVER_LOG=$(mktemp) +PY_SERVER_PID= + +start_test_server() { + local mode="$1" # "ok" | "down" | "empty_200" + # Pick a free port via socket binding; pass it explicitly to the server. + local port + port=$(python3 -c " +import socket +s = socket.socket() +s.bind(('127.0.0.1', 0)) +print(s.getsockname()[1]) +s.close() +") + cat > /tmp/_llm_preflight_test_server.py <"$PY_SERVER_LOG" 2>&1 & + PY_SERVER_PID=$! + # Give the server a moment to bind. + sleep 0.3 + PY_SERVER_PORT="$port" +} + +stop_test_server() { + if [ -n "$PY_SERVER_PID" ]; then + kill "$PY_SERVER_PID" 2>/dev/null || true + wait "$PY_SERVER_PID" 2>/dev/null || true + fi + rm -f /tmp/_llm_preflight_test_server.py "$PY_SERVER_LOG" +} +trap stop_test_server EXIT + +# Test 1: config-missing path. +test_config_missing() { + unset E2E_LLM_PROXY_URL + unset MOLECULE_CP_URL + local out rc + out=$(llm_proxy_preflight 2>&1) + rc=$? + if [ "$rc" -ne 71 ]; then + echo "FAIL: test_config_missing expected exit 71, got $rc" + echo " output: $out" + return 1 + fi + if ! echo "$out" | grep -q "DEP-DOWN:staging-llm"; then + echo "FAIL: test_config_missing output missing DEP-DOWN:staging-llm prefix" + echo " output: $out" + return 1 + fi + echo "PASS: test_config_missing" + return 0 +} + +# Test 2: proxy unreachable (TCP connection refused) → exit 70. +test_proxy_unreachable() { + PY_SERVER_PORT=1 # port 1 is privileged, will refuse + start_test_server "ok" # we ignore the server, just want the lib to hit a dead port + sleep 0.3 + E2E_LLM_PROXY_URL="http://127.0.0.1:1/v1/chat/completions" + local out rc + out=$(llm_proxy_preflight 2>&1) + rc=$? + if [ "$rc" -ne 70 ]; then + echo "FAIL: test_proxy_unreachable expected exit 70, got $rc" + echo " output: $out" + return 1 + fi + if ! echo "$out" | grep -q "DEP-DOWN:staging-llm"; then + echo "FAIL: test_proxy_unreachable output missing DEP-DOWN:staging-llm prefix" + echo " output: $out" + return 1 + fi + echo "PASS: test_proxy_unreachable" + return 0 +} + +# Test 3: proxy returns 200 with malformed body → exit 70. +test_200_empty_body() { + PY_SERVER_PORT=0 + start_test_server "empty_200" + E2E_LLM_PROXY_URL="http://127.0.0.1:${PY_SERVER_PORT}/v1/chat/completions" + local out rc + out=$(llm_proxy_preflight 2>&1) + rc=$? + if [ "$rc" -ne 70 ]; then + echo "FAIL: test_200_empty_body expected exit 70, got $rc" + echo " output: $out" + return 1 + fi + if ! echo "$out" | grep -q "DEP-DOWN:staging-llm"; then + echo "FAIL: test_200_empty_body output missing DEP-DOWN:staging-llm prefix" + echo " output: $out" + return 1 + fi + stop_test_server + PY_SERVER_PID= + echo "PASS: test_200_empty_body" + return 0 +} + +# Test 4: happy path → exit 0. +test_ok() { + PY_SERVER_PORT=0 + start_test_server "ok" + E2E_LLM_PROXY_URL="http://127.0.0.1:${PY_SERVER_PORT}/v1/chat/completions" + local out rc + out=$(llm_proxy_preflight 2>&1) + rc=$? + if [ "$rc" -ne 0 ]; then + echo "FAIL: test_ok expected exit 0, got $rc" + echo " output: $out" + return 1 + fi + stop_test_server + PY_SERVER_PID= + echo "PASS: test_ok" + return 0 +} + +# Test 5: proxy returns 503 (simulated outage) → exit 70. +test_503() { + PY_SERVER_PORT=0 + start_test_server "down" + E2E_LLM_PROXY_URL="http://127.0.0.1:${PY_SERVER_PORT}/v1/chat/completions" + local out rc + out=$(llm_proxy_preflight 2>&1) + rc=$? + if [ "$rc" -ne 70 ]; then + echo "FAIL: test_503 expected exit 70, got $rc" + echo " output: $out" + return 1 + fi + stop_test_server + PY_SERVER_PID= + echo "PASS: test_503" + return 0 +} + +failed=0 +test_config_missing || failed=$((failed+1)) +test_proxy_unreachable || failed=$((failed+1)) +test_200_empty_body || failed=$((failed+1)) +test_ok || failed=$((failed+1)) +test_503 || failed=$((failed+1)) + +if [ "$failed" -gt 0 ]; then + echo "FAILED: $failed test(s)" + exit 1 +fi +echo "All llm_proxy_preflight unit tests passed" -- 2.52.0 From 455d4410d7a88b9baa2adcf2a539c9bc85488acf Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer B (MiniMax)" Date: Sat, 13 Jun 2026 19:44:29 +0000 Subject: [PATCH 2/3] fix(core#2675): export E2E_LLM_PROXY_URL in unit test to silence shellcheck SC2034 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The unit test sets E2E_LLM_PROXY_URL before calling llm_proxy_preflight, but shellcheck (run on the test file in isolation) couldn't see the variable's use inside the sourced lib. False-positive SC2034 warning appeared as the lone remaining shellcheck finding on #2763. declare -x ADAPTER_MODULE="adapter" declare -x ADMIN_TOKEN="268ac0912c58d05532a7dbdd" declare -x AI_AGENT="claude-code_2-1-177_agent" declare -x ANTHROPIC_AUTH_TOKEN="sk-cp-mMBbrofhT_rFLwZYlNdlEsWPNXeaxtJNMeOzvDy-xm8NaXj-LNuxcVu4ALFrVlrwmjkZZVX3nIgkXnJUORxMbLbmJv8v8D92es2OqzHJBBKrJRi0qVZrDU0" declare -x ANTHROPIC_BASE_URL="https://api.minimax.io/anthropic" declare -x CLAUDECODE="1" declare -x CLAUDE_AGENT_SDK_VERSION="0.2.84" declare -x CLAUDE_CODE_CHILD_SESSION="1" declare -x CLAUDE_CODE_ENTRYPOINT="sdk-py" declare -x CLAUDE_CODE_EXECPATH="/usr/local/lib/node_modules/@anthropic-ai/claude-code/bin/claude.exe" declare -x CLAUDE_CODE_SESSION_ID="37abf943-b1c0-43a1-b348-4c8a3d2936c3" declare -x CLAUDE_EFFORT="high" declare -x COREPACK_ENABLE_AUTO_PIN="0" declare -x GITEA_ISSUE_TOKEN="5218a3c94583acd2613cdc1c242786d69e2703b4" declare -x GIT_ASKPASS="/usr/local/bin/molecule-askpass" declare -x GIT_AUTHOR_EMAIL="dev-engineer-b-minimax@agents.moleculesai.app" declare -x GIT_AUTHOR_NAME="Molecule AI Dev Engineer B (MiniMax)" declare -x GIT_COMMITTER_EMAIL="dev-engineer-b-minimax@agents.moleculesai.app" declare -x GIT_COMMITTER_NAME="Molecule AI Dev Engineer B (MiniMax)" declare -x GIT_EDITOR="true" declare -x GIT_HTTP_PASSWORD="5889495b04041bc6505287f8098c3e69b4227593" declare -x GIT_HTTP_USERNAME="agent-dev-b" declare -x GPG_KEY="A035C8C19219BA821ECEA86B64E628F8D684696D" declare -x HOME="/home/agent" declare -x HOSTNAME="ip-172-31-15-6" declare -x LANG="C.UTF-8" declare -x MINIMAX_API_KEY="sk-cp-mMBbrofhT_rFLwZYlNdlEsWPNXeaxtJNMeOzvDy-xm8NaXj-LNuxcVu4ALFrVlrwmjkZZVX3nIgkXnJUORxMbLbmJv8v8D92es2OqzHJBBKrJRi0qVZrDU0" declare -x MODEL="MiniMax-M3" declare -x MOLECULE_ADMIN_TOKEN="268ac0912c58d05532a7dbdd" declare -x MOLECULE_CP_URL="https://api.moleculesai.app" declare -x MOLECULE_LLM_BILLING_MODE="byok" declare -x MOLECULE_LLM_BILLING_MODE_RESOLVED="byok" declare -x MOLECULE_MODEL="MiniMax-M3" declare -x MOLECULE_ORG_ID="2355b568-0799-4cc7-9e7f-806747f9958c" declare -x NoDefaultCurrentDirectoryInExePath="1" declare -x OLDPWD declare -x PATH="/usr/local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" declare -x PLATFORM_URL="http://172.31.4.202:8080" declare -x PORT="8000" declare -x PWD="/workspace" declare -x PYTHONPATH="/app" declare -x PYTHON_SHA256="272179ddd9a2e41a0fc8e42e33dfbdca0b3711aa5abf372d3f2d51543d09b625" declare -x PYTHON_VERSION="3.11.15" declare -x RUNTIME="claude-code" declare -x SHELL="/bin/bash" declare -x SHLVL="1" declare -x TEST_APPROVAL_DUMMY_KEY="dummy-value-for-approval-flow-test" declare -x TRACEPARENT="00-f844469b0ef60efc319ff03857917aca-8c359443d4a3bff9-03" declare -x WORKSPACE_ID="0c96b3ab-33f8-4a54-9807-f48444e6bfff" makes shellcheck treat the variable as used (exported variables are visible to subshells and considered consumed). 3-line behavioral change + 12 lines of comment explaining the cross-file context. No test changes — all 5 unit tests still PASS. Refs #2763. Co-Authored-By: Claude --- tests/e2e/test_llm_proxy_preflight_unit.sh | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tests/e2e/test_llm_proxy_preflight_unit.sh b/tests/e2e/test_llm_proxy_preflight_unit.sh index fd93a99f0..c03039b04 100755 --- a/tests/e2e/test_llm_proxy_preflight_unit.sh +++ b/tests/e2e/test_llm_proxy_preflight_unit.sh @@ -136,7 +136,11 @@ test_proxy_unreachable() { test_200_empty_body() { PY_SERVER_PORT=0 start_test_server "empty_200" - E2E_LLM_PROXY_URL="http://127.0.0.1:${PY_SERVER_PORT}/v1/chat/completions" + # E2E_LLM_PROXY_URL is read by the sourced llm_proxy_preflight helper + # (lib/llm_proxy_preflight.sh) via ${E2E_LLM_PROXY_URL:-}. Export it + # here so shellcheck doesn't false-positive SC2034 (appears unused) when + # the test file is checked in isolation. + export E2E_LLM_PROXY_URL="http://127.0.0.1:${PY_SERVER_PORT}/v1/chat/completions" local out rc out=$(llm_proxy_preflight 2>&1) rc=$? @@ -160,7 +164,11 @@ test_200_empty_body() { test_ok() { PY_SERVER_PORT=0 start_test_server "ok" - E2E_LLM_PROXY_URL="http://127.0.0.1:${PY_SERVER_PORT}/v1/chat/completions" + # E2E_LLM_PROXY_URL is read by the sourced llm_proxy_preflight helper + # (lib/llm_proxy_preflight.sh) via ${E2E_LLM_PROXY_URL:-}. Export it + # here so shellcheck doesn't false-positive SC2034 (appears unused) when + # the test file is checked in isolation. + export E2E_LLM_PROXY_URL="http://127.0.0.1:${PY_SERVER_PORT}/v1/chat/completions" local out rc out=$(llm_proxy_preflight 2>&1) rc=$? @@ -179,7 +187,11 @@ test_ok() { test_503() { PY_SERVER_PORT=0 start_test_server "down" - E2E_LLM_PROXY_URL="http://127.0.0.1:${PY_SERVER_PORT}/v1/chat/completions" + # E2E_LLM_PROXY_URL is read by the sourced llm_proxy_preflight helper + # (lib/llm_proxy_preflight.sh) via ${E2E_LLM_PROXY_URL:-}. Export it + # here so shellcheck doesn't false-positive SC2034 (appears unused) when + # the test file is checked in isolation. + export E2E_LLM_PROXY_URL="http://127.0.0.1:${PY_SERVER_PORT}/v1/chat/completions" local out rc out=$(llm_proxy_preflight 2>&1) rc=$? -- 2.52.0 From 905b8d93d1ccbf25a96e0751690b85f521423033 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer B (MiniMax)" Date: Sat, 13 Jun 2026 19:46:28 +0000 Subject: [PATCH 3/3] fix(core#2675): use distinct CONFIG-MISSING prefix for the config-missing case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous fix emitted 'DEP-DOWN:staging-llm (config-missing)' on the E2E_LLM_PROXY_URL+MOLECULE_CP_URL-both-unset path. The redgate- reporter dedups on the DEP-DOWN:staging-llm prefix against live dependency outages — folding the config-missing case into that bucket would conflate operator error (a mis-wired lane) with infrastructure outage, suppressing the operator-fix signal. Fix: emit 'CONFIG-MISSING:staging-llm-proxy-url' on the exit-71 path instead. The two prefixes dedup separately in the redgate-reporter: DEP-DOWN:staging-llm — live LLM proxy outage (many runs/lanes dedup into one incident issue) CONFIG-MISSING:staging-llm-proxy-url — operator-misconfigured lane (dedup across runs/lanes that share the same missing env) lib doc comment updated to call out the prefix contract. Test updated: test_config_missing now asserts the CONFIG-MISSING prefix AND that DEP-DOWN:staging-llm is NOT present (the two prefixes must never co-occur in the same output line). All 5 unit tests still PASS. Shellcheck clean. Refs #2763. Co-Authored-By: Claude --- tests/e2e/lib/llm_proxy_preflight.sh | 9 ++++++++- tests/e2e/test_llm_proxy_preflight_unit.sh | 12 ++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/tests/e2e/lib/llm_proxy_preflight.sh b/tests/e2e/lib/llm_proxy_preflight.sh index 1be5ecade..144709f55 100755 --- a/tests/e2e/lib/llm_proxy_preflight.sh +++ b/tests/e2e/lib/llm_proxy_preflight.sh @@ -63,7 +63,14 @@ llm_proxy_preflight() { fi if [ -z "$proxy_url" ]; then - echo "::error::DEP-DOWN:staging-llm (config-missing) E2E_LLM_PROXY_URL is unset and could not be derived from MOLECULE_CP_URL" + # Config-missing is NOT a dependency-down condition — it is operator + # error (an E2E_LANE was wired without setting E2E_LLM_PROXY_URL or + # MOLECULE_CP_URL). Emit a distinct CONFIG-MISSING prefix so the + # redgate-reporter dedups separately: DEP-DOWN dedups against + # live dependency outages; CONFIG-MISSING dedups against the same + # misconfiguration across runs/lanes. Do NOT change the prefix + # without coordinating the redgate-reporter's parser. + echo "::error::CONFIG-MISSING:staging-llm-proxy-url E2E_LLM_PROXY_URL is unset and could not be derived from MOLECULE_CP_URL" return 71 fi diff --git a/tests/e2e/test_llm_proxy_preflight_unit.sh b/tests/e2e/test_llm_proxy_preflight_unit.sh index c03039b04..46c06f12a 100755 --- a/tests/e2e/test_llm_proxy_preflight_unit.sh +++ b/tests/e2e/test_llm_proxy_preflight_unit.sh @@ -100,8 +100,16 @@ test_config_missing() { echo " output: $out" return 1 fi - if ! echo "$out" | grep -q "DEP-DOWN:staging-llm"; then - echo "FAIL: test_config_missing output missing DEP-DOWN:staging-llm prefix" + # Config-missing emits CONFIG-MISSING, NOT DEP-DOWN — see the lib's + # comment on the status description prefixes. The two dedup buckets + # are distinct in the redgate-reporter. + if ! echo "$out" | grep -q "CONFIG-MISSING:staging-llm-proxy-url"; then + echo "FAIL: test_config_missing output missing CONFIG-MISSING:staging-llm-proxy-url prefix" + echo " output: $out" + return 1 + fi + if echo "$out" | grep -q "DEP-DOWN:staging-llm"; then + echo "FAIL: test_config_missing output should NOT contain DEP-DOWN:staging-llm (config-missing is a separate dedup bucket)" echo " output: $out" return 1 fi -- 2.52.0