f7e2976324
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 9s
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 7s
Check migration collisions / Migration version collision check (pull_request) Successful in 10s
CI / Detect changes (pull_request) Successful in 7s
CI / Python Lint & Test (pull_request) Successful in 5s
E2E API Smoke Test / detect-changes (pull_request) Successful in 7s
E2E Chat / detect-changes (pull_request) Successful in 7s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (pull_request) Successful in 5s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 10s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (pull_request) Has been skipped
Handlers Postgres Integration / detect-changes (pull_request) Successful in 6s
Harness Replays / detect-changes (pull_request) Successful in 4s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 4s
E2E Staging SaaS (full lifecycle) / pr-validate (pull_request) Successful in 33s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (local) (pull_request) Successful in 50s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 8s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 9s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 58s
gate-check-v3 / gate-check (pull_request) Successful in 4s
qa-review / approved (pull_request) Successful in 3s
security-review / approved (pull_request) Successful in 3s
sop-checklist / na-declarations (pull_request) N/A: (none)
sop-checklist / all-items-acked (pull_request) Successful in 4s
sop-checklist / review-refire (pull_request) Has been skipped
sop-tier-check / tier-check (pull_request) Successful in 4s
Ops Scripts Tests / Ops scripts (unittest) (pull_request) Successful in 1m6s
E2E Staging External Runtime / E2E Staging External Runtime (pull_request) Successful in 5m25s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 20s
E2E Chat / E2E Chat (pull_request) Successful in 33s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 11s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 1m58s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 2m44s
Harness Replays / Harness Replays (pull_request) Successful in 6s
CI / Platform (Go) (pull_request) Successful in 6m9s
CI / Canvas (Next.js) (pull_request) Successful in 7m41s
CI / all-required (pull_request) Successful in 32m0s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
audit-force-merge / audit (pull_request) Successful in 32s
98 lines
4.4 KiB
Bash
Executable File
98 lines
4.4 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Regression test for tests/e2e/lib/model_slug.sh.
|
|
#
|
|
# PR #2571 fixed a synth-E2E masking bug where MODEL_SLUG was hardcoded
|
|
# to "openai/gpt-4o" (slash-form). Without this regression test, dropping
|
|
# any branch of the case (or flipping a slug format) would silently revert
|
|
# behavior — the E2E only fails as "Could not resolve authentication method"
|
|
# at the very first message, after a successful tenant + workspace provision.
|
|
#
|
|
# Each branch must FAIL the test if the dispatch behavior changes, not
|
|
# just produce some non-empty string.
|
|
set -uo pipefail
|
|
|
|
# Resolve to the lib relative to this test file so the test runs from
|
|
# any cwd (CI, local invocation, repo root).
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
# shellcheck source=tests/e2e/lib/model_slug.sh
|
|
source "$SCRIPT_DIR/lib/model_slug.sh"
|
|
|
|
PASS=0
|
|
FAIL=0
|
|
|
|
assert_eq() {
|
|
local label="$1" got="$2" want="$3"
|
|
if [ "$got" = "$want" ]; then
|
|
echo " ✓ $label"
|
|
PASS=$((PASS+1))
|
|
else
|
|
echo " ✗ $label: got=$(printf %q "$got") want=$(printf %q "$want")" >&2
|
|
FAIL=$((FAIL+1))
|
|
fi
|
|
}
|
|
|
|
run_test() {
|
|
local label="$1" runtime="$2" want="$3"
|
|
# Pin per-test isolation: explicitly unset the override so a leaked
|
|
# E2E_MODEL_SLUG from caller env can't poison the dispatch branches.
|
|
local got
|
|
got=$(unset E2E_MODEL_SLUG; pick_model_slug "$runtime")
|
|
assert_eq "$label" "$got" "$want"
|
|
}
|
|
|
|
echo "Test: pick_model_slug — per-runtime dispatch"
|
|
echo
|
|
|
|
# ── Per-runtime branches (the load-bearing ones for synth-E2E) ──
|
|
run_test "hermes → slash-form (derive-provider.sh contract)" hermes "openai/gpt-4o"
|
|
run_test "codex → slash-form fallback" codex "openai/gpt-4o"
|
|
run_test "claude-code → OAuth/default alias" claude-code "sonnet"
|
|
|
|
got=$(unset E2E_MODEL_SLUG E2E_ANTHROPIC_API_KEY; E2E_MINIMAX_API_KEY="mx-test" pick_model_slug claude-code)
|
|
assert_eq "claude-code + MiniMax key → MiniMax model" "$got" "MiniMax-M2"
|
|
|
|
got=$(unset E2E_MODEL_SLUG E2E_MINIMAX_API_KEY; E2E_ANTHROPIC_API_KEY="sk-ant-test" pick_model_slug claude-code)
|
|
assert_eq "claude-code + Anthropic API key → Anthropic API model" "$got" "claude-sonnet-4-6"
|
|
|
|
got=$(unset E2E_MODEL_SLUG; E2E_MINIMAX_API_KEY="mx-priority" E2E_ANTHROPIC_API_KEY="sk-ant-loser" pick_model_slug claude-code)
|
|
assert_eq "claude-code + both keys → MiniMax priority" "$got" "MiniMax-M2"
|
|
|
|
# ── Fallback for unknown runtime ──
|
|
# Picks slash-form (hermes-shaped) since hermes is the historical
|
|
# default and most third-party runtimes behave hermes-like. Pinning
|
|
# this so a future "smarter" fallback (e.g., empty string, error) is
|
|
# a deliberate choice, not silent drift.
|
|
run_test "unknown runtime → slash-form fallback" gemini "openai/gpt-4o"
|
|
run_test "empty runtime → slash-form fallback" "" "openai/gpt-4o"
|
|
|
|
# ── Override via E2E_MODEL_SLUG ──
|
|
# When the operator sets E2E_MODEL_SLUG, the per-runtime dispatch is
|
|
# bypassed. Used during workflow_dispatch to A/B specific slugs.
|
|
echo
|
|
echo "Test: pick_model_slug — E2E_MODEL_SLUG override"
|
|
echo
|
|
|
|
got=$(E2E_MODEL_SLUG="anthropic:claude-opus-4-7" pick_model_slug codex)
|
|
assert_eq "override beats codex default" "$got" "anthropic:claude-opus-4-7"
|
|
|
|
got=$(E2E_MODEL_SLUG="custom/whatever" pick_model_slug hermes)
|
|
assert_eq "override beats hermes default" "$got" "custom/whatever"
|
|
|
|
got=$(E2E_MODEL_SLUG="some-bare-id" pick_model_slug claude-code)
|
|
assert_eq "override beats claude-code default" "$got" "some-bare-id"
|
|
|
|
# Empty-string override does NOT activate (falls through to dispatch).
|
|
# This is the historical bash idiom: -n "" → false → no override. Pin
|
|
# it because changing this behavior (e.g. via -v test) would silently
|
|
# break the dispatch when an operator passes "" to clear an inherited
|
|
# env var.
|
|
got=$(E2E_MODEL_SLUG="" pick_model_slug codex)
|
|
assert_eq "empty-string override falls through to dispatch" "$got" "openai/gpt-4o"
|
|
|
|
echo
|
|
echo "─────────────────────────────────────────────────"
|
|
echo "PASSED: $PASS"
|
|
echo "FAILED: $FAIL"
|
|
echo "─────────────────────────────────────────────────"
|
|
[ "$FAIL" -eq 0 ]
|