diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a642e1a3..f2512cb9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -272,6 +272,14 @@ jobs: find tests/e2e infra/scripts -type f -name '*.sh' -print0 \ | xargs -0 shellcheck --severity=warning + - if: needs.changes.outputs.scripts == 'true' + name: Lint cleanup-trap hygiene (RFC #2873) + # Asserts every shell E2E test that calls `mktemp` also installs + # an EXIT trap. Catches the /tmp-leak class — a missing trap + # silently leaks scratch into CI runners (~10-100KB per run). + # See tests/e2e/lint_cleanup_traps.sh for the rule + fix pattern. + run: bash tests/e2e/lint_cleanup_traps.sh + - if: needs.changes.outputs.scripts == 'true' name: Run E2E bash unit tests (no live infra) # Pure-bash unit tests for E2E helper libs (lib/*.sh). These pin diff --git a/tests/e2e/lint_cleanup_traps.sh b/tests/e2e/lint_cleanup_traps.sh new file mode 100755 index 00000000..a20d9a6f --- /dev/null +++ b/tests/e2e/lint_cleanup_traps.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# lint_cleanup_traps.sh — regression gate for the OSS-shape program's +# "all E2E tests must have proper cleanup" bar (RFC #2873). +# +# Asserts: every shell file under tests/e2e/ that calls `mktemp` ALSO +# installs an `EXIT` trap somewhere in the file. The trap is the +# minimum-viable guarantee that scratch files won't leak when an +# assertion or curl exits the script non-zero. +# +# Why this lints (instead of the test runner enforcing): shell scripts +# can't easily be wrapped by an outer harness without breaking the +# `WSID=… ./test_x.sh` invocation contract. Static gate is the cheap +# defense. +# +# Usage: +# tests/e2e/lint_cleanup_traps.sh +# +# Exits non-zero if any test_*.sh has unmatched mktemp/trap. CI invokes +# it from the existing Shellcheck (E2E scripts) workflow. + +set -euo pipefail + +cd "$(dirname "$0")" + +violations=0 +for f in test_*.sh; do + if grep -qE '\bmktemp\b' "$f"; then + if ! grep -qE 'trap[[:space:]]+.*EXIT' "$f"; then + echo "::error file=tests/e2e/$f::has 'mktemp' but no 'trap … EXIT' — scratch will leak when test exits non-zero. Pattern: TMPDIR_E2E=\$(mktemp -d -t prefix-XXX); trap 'rm -rf \"\$TMPDIR_E2E\"' EXIT INT TERM" + violations=$((violations + 1)) + fi + fi +done + +if [ "$violations" -gt 0 ]; then + echo "::error::$violations shell E2E file(s) leak scratch on early exit. See above." + exit 1 +fi + +echo "✓ all $(grep -lE '\bmktemp\b' test_*.sh | wc -l | tr -d ' ') shell E2E files with mktemp also install an EXIT trap" diff --git a/tests/e2e/test_chat_attachments_e2e.sh b/tests/e2e/test_chat_attachments_e2e.sh index 76f46706..3d352f05 100755 --- a/tests/e2e/test_chat_attachments_e2e.sh +++ b/tests/e2e/test_chat_attachments_e2e.sh @@ -22,6 +22,13 @@ set -euo pipefail WSID="${WSID:?WSID= required}" BASE="${BASE:-http://localhost:8080}" +# Per-run scratch dir collected under one trap so every mktemp leak path +# (assertion failure, SIGINT, exit non-zero) is plugged. Pre-fix this test +# created a /tmp/hermes-e2e-XXXXXX.txt and never deleted it — ~10 KB × +# every CI run leaked into the runner. RFC #2873 cleanup-hygiene PR. +TMPDIR_E2E=$(mktemp -d -t chat-attachments-e2e-XXXXXX) +trap 'rm -rf "$TMPDIR_E2E"' EXIT INT TERM + log() { printf "\n=== %s ===\n" "$*"; } log "Preflight: workspace online?" @@ -29,7 +36,9 @@ STATUS=$(curl -s "$BASE/workspaces/$WSID" | python3 -c 'import json,sys;print(js [ "$STATUS" = "online" ] || { echo "workspace not online ($STATUS)"; exit 1; } log "Step 1 — Upload a text file via /chat/uploads" -TEST_FILE=$(mktemp -t hermes-e2e-XXXXXX.txt) +# `mktemp ` is portable across BSD (macOS) + GNU; -p is +# GNU-only and breaks local dev runs on Mac. +TEST_FILE=$(mktemp "$TMPDIR_E2E/hermes-e2e-XXXXXX.txt") echo "secret code: $(openssl rand -hex 4)-$(openssl rand -hex 4)" > "$TEST_FILE" EXPECTED=$(cat "$TEST_FILE" | awk '{print $NF}') UPLOAD=$(curl -s -X POST "$BASE/workspaces/$WSID/chat/uploads" -F "files=@$TEST_FILE") diff --git a/tests/e2e/test_chat_attachments_multiruntime_e2e.sh b/tests/e2e/test_chat_attachments_multiruntime_e2e.sh index 9601132e..3d93e186 100755 --- a/tests/e2e/test_chat_attachments_multiruntime_e2e.sh +++ b/tests/e2e/test_chat_attachments_multiruntime_e2e.sh @@ -24,6 +24,15 @@ set -uo pipefail BASE="${BASE:-http://localhost:8080}" fails=0 +# Per-run scratch dir collected under one trap so every per-runtime +# round_trip mktemp leak path (assertion failure, SIGINT, exit +# non-zero, function early-return between mktemp and rm) is plugged. +# Pre-fix, round_trip's `rm -f "$test_file"` only fired on the success +# path inside the function — every test_failure path before the rm +# leaked the scratch into /tmp permanently. RFC #2873 cleanup-hygiene PR. +TMPDIR_E2E=$(mktemp -d -t mr-attachments-e2e-XXXXXX) +trap 'rm -rf "$TMPDIR_E2E"' EXIT INT TERM + has_patch_in_container() { local container="$1" # Signal that platform helpers are available AND wired into the @@ -74,12 +83,16 @@ print(f"executor: claude-code monkey-patch active ({name})") round_trip() { local label="$1" wsid="$2" local test_file expected upload uri payload reply reply_text - test_file=$(mktemp -t e2e-mr-XXXX.txt) + # Scratch goes under TMPDIR_E2E; the script-level trap rm -rf's the + # whole dir on exit, so per-file rm calls are unnecessary AND make + # error paths leak when forgotten. + # `mktemp ` is portable across BSD (macOS) + GNU; -p is GNU-only. + test_file=$(mktemp "$TMPDIR_E2E/e2e-mr-${label}-XXXX.txt") expected="secret-$(openssl rand -hex 6)" echo "$expected" > "$test_file" upload=$(curl -s -X POST "$BASE/workspaces/$wsid/chat/uploads" -F "files=@$test_file") uri=$(echo "$upload" | python3 -c 'import json,sys;print(json.load(sys.stdin)["files"][0]["uri"])' 2>/dev/null) - [ -z "$uri" ] && { echo "FAIL $label: upload returned no URI: $upload"; rm -f "$test_file"; return 1; } + [ -z "$uri" ] && { echo "FAIL $label: upload returned no URI: $upload"; return 1; } payload=$(URI="$uri" python3 -c ' import json, os uri = os.environ["URI"] @@ -103,7 +116,8 @@ try: except Exception as exc: print(f"(parse failed: {exc})") ' 2>&1) - rm -f "$test_file" + # $test_file lives under TMPDIR_E2E; the script-level trap rm -rf's + # the dir on exit, covering every return path including SIGINT. if echo "$reply_text" | grep -qF "$expected"; then echo "PASS $label round-trip: agent quoted $expected" diff --git a/tests/e2e/test_notify_attachments_e2e.sh b/tests/e2e/test_notify_attachments_e2e.sh index 6957c7e3..2a5f12a7 100755 --- a/tests/e2e/test_notify_attachments_e2e.sh +++ b/tests/e2e/test_notify_attachments_e2e.sh @@ -29,11 +29,20 @@ FAIL=0 WSID="" cleanup() { + # Workspace teardown — best-effort, ignore errors so an unrelated CP + # outage doesn't shadow a real test failure. if [ -n "$WSID" ]; then curl -s -X DELETE "$BASE/workspaces/$WSID?confirm=true" > /dev/null || true fi + # /tmp scratch — pre-fix only ran on success path (the unconditional + # rm at the bottom of the script). Trap-based path lets the file leak + # whenever the script exits non-zero before reaching the rm. RFC #2873 + # cleanup-hygiene PR. + if [ -n "${TMPF:-}" ]; then + rm -f "$TMPF" + fi } -trap cleanup EXIT +trap cleanup EXIT INT TERM assert() { local label="$1" @@ -230,7 +239,8 @@ for r in rows: assert "stored URI matches uploaded URI" "$STORED_URI" "$URI" fi -rm -f "$TMPF" +# $TMPF cleanup happens via the trap-cleanup function above — covers +# both the success path and any early exit / SIGINT. echo "" echo "=== Results: $PASS passed, $FAIL failed ===" diff --git a/workspace-server/internal/handlers/org_import.go b/workspace-server/internal/handlers/org_import.go index 40a67604..70151e09 100644 --- a/workspace-server/internal/handlers/org_import.go +++ b/workspace-server/internal/handlers/org_import.go @@ -51,11 +51,10 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX model = defaults.Model } if model == "" { - if runtime == "claude-code" { - model = "sonnet" - } else { - model = "anthropic:claude-opus-4-7" - } + // SSOT: per-runtime defaults live in models/runtime_defaults.go + // (see RFC #2873). Consolidated from a duplicate of the same + // branch in workspace_provision.go. + model = models.DefaultModel(runtime) } tier := ws.Tier if tier == 0 { diff --git a/workspace-server/internal/handlers/workspace_provision.go b/workspace-server/internal/handlers/workspace_provision.go index 7734aff0..981ee5da 100644 --- a/workspace-server/internal/handlers/workspace_provision.go +++ b/workspace-server/internal/handlers/workspace_provision.go @@ -534,11 +534,10 @@ func (h *WorkspaceHandler) ensureDefaultConfig(workspaceID string, payload model // Generate a minimal config.yaml model := payload.Model if model == "" { - if runtime == "claude-code" { - model = "sonnet" - } else { - model = "anthropic:claude-opus-4-7" - } + // SSOT: per-runtime defaults live in models/runtime_defaults.go + // (see RFC #2873). Was previously duplicated here AND in + // org_import.go; consolidating prevents silent drift. + model = models.DefaultModel(runtime) } // Sanitize name/role/model for YAML safety — always double-quote so diff --git a/workspace-server/internal/models/runtime_defaults.go b/workspace-server/internal/models/runtime_defaults.go new file mode 100644 index 00000000..320586e8 --- /dev/null +++ b/workspace-server/internal/models/runtime_defaults.go @@ -0,0 +1,39 @@ +package models + +// runtime_defaults.go — single source of truth for per-runtime defaults +// the platform applies when the operator/agent didn't supply a value. +// +// Why this lives in models/ (not handlers/): default selection is a +// pure data fact about the runtime, not handler logic. Multiple +// callers (Create-workspace handler, org-import handler, future +// auto-provision paths) need the same answer; concentrating the +// rule here means one edit when a runtime's default changes. +// +// Related work (RFC #2873): this is the seed for a future +// `RuntimeConfig` interface that will also expose `ProvisioningTimeout()`, +// `CapabilitiesSupported()`, and other per-runtime facts. For now the +// surface is one helper — extracted from the duplicate branch in +// workspace_provision.go:537 and org_import.go:54 that diverged silently +// during refactors before this consolidation. + +// DefaultModel returns the model slug to use when a workspace is +// created without an explicit model and the runtime can't infer one +// from its own config. +// +// - claude-code: "sonnet" — Anthropic's CLI accepts the short +// name and resolves it via the operator's anthropic-oauth or +// ANTHROPIC_API_KEY chain. +// - everything else (hermes, langgraph, crewai, autogen, deepagents, +// codex, openclaw, gemini-cli, external, ""): a fully-qualified +// vendor:model slug that the universal MODEL_PROVIDER chain in +// molecule-core PR #247 can route via per-vendor required_env. +// +// The function never returns an empty string; an unknown runtime +// gets the universal default rather than failing closed (matches the +// pre-refactor behavior — both call sites used the same fallback). +func DefaultModel(runtime string) string { + if runtime == "claude-code" { + return "sonnet" + } + return "anthropic:claude-opus-4-7" +} diff --git a/workspace-server/internal/models/runtime_defaults_test.go b/workspace-server/internal/models/runtime_defaults_test.go new file mode 100644 index 00000000..bab673ac --- /dev/null +++ b/workspace-server/internal/models/runtime_defaults_test.go @@ -0,0 +1,62 @@ +package models + +import "testing" + +// TestDefaultModel pins the contract: known runtimes return their +// expected default; unknowns and the empty string fall through to the +// universal default. Add new runtimes here as `case` entries — pre-fix +// adding a runtime required two source edits + an audit; post-SSOT it +// requires one entry in DefaultModel + one assertion here. +func TestDefaultModel(t *testing.T) { + cases := []struct { + runtime string + want string + }{ + // Known runtimes. + {"claude-code", "sonnet"}, + + // Universal fallback for everything else. Each runtime is named + // explicitly so a future drift (e.g., adding a hermes-specific + // branch) shows up as a failure on the runtime that drifted, not + // as a generic "unknown" failure. + {"hermes", "anthropic:claude-opus-4-7"}, + {"langgraph", "anthropic:claude-opus-4-7"}, + {"crewai", "anthropic:claude-opus-4-7"}, + {"autogen", "anthropic:claude-opus-4-7"}, + {"deepagents", "anthropic:claude-opus-4-7"}, + {"codex", "anthropic:claude-opus-4-7"}, + {"openclaw", "anthropic:claude-opus-4-7"}, + {"gemini-cli", "anthropic:claude-opus-4-7"}, + {"external", "anthropic:claude-opus-4-7"}, + + // Unknown / empty — fall through to universal default rather + // than failing closed. Pre-refactor both call sites also fell + // through; pinning the existing behavior, not changing it. + {"", "anthropic:claude-opus-4-7"}, + {"some-future-runtime", "anthropic:claude-opus-4-7"}, + {"CLAUDE-CODE", "anthropic:claude-opus-4-7"}, // case-sensitive — matches prior behavior + } + for _, tc := range cases { + t.Run(tc.runtime, func(t *testing.T) { + got := DefaultModel(tc.runtime) + if got != tc.want { + t.Errorf("DefaultModel(%q) = %q, want %q", tc.runtime, got, tc.want) + } + }) + } +} + +// TestDefaultModel_NeverEmpty — invariant: no input produces an empty +// string. The handlers that consume this would write empty into +// config.yaml, which the runtime then can't dispatch — pinning the +// non-empty contract here protects against a future "return early on +// unknown runtime" change that would silently break workspace creation. +func TestDefaultModel_NeverEmpty(t *testing.T) { + for _, runtime := range []string{ + "", "claude-code", "hermes", "unknown-runtime", + } { + if got := DefaultModel(runtime); got == "" { + t.Errorf("DefaultModel(%q) returned empty string", runtime) + } + } +}