Merge pull request #2878 from Molecule-AI/staging

staging → main: auto-promote 89ee8e4
This commit is contained in:
molecule-ai[bot] 2026-05-05 11:35:36 +00:00 committed by GitHub
commit 58253f0673
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 196 additions and 16 deletions

View File

@ -272,6 +272,14 @@ jobs:
find tests/e2e infra/scripts -type f -name '*.sh' -print0 \
| xargs -0 shellcheck --severity=warning
- if: needs.changes.outputs.scripts == 'true'
name: Lint cleanup-trap hygiene (RFC #2873)
# Asserts every shell E2E test that calls `mktemp` also installs
# an EXIT trap. Catches the /tmp-leak class — a missing trap
# silently leaks scratch into CI runners (~10-100KB per run).
# See tests/e2e/lint_cleanup_traps.sh for the rule + fix pattern.
run: bash tests/e2e/lint_cleanup_traps.sh
- if: needs.changes.outputs.scripts == 'true'
name: Run E2E bash unit tests (no live infra)
# Pure-bash unit tests for E2E helper libs (lib/*.sh). These pin

40
tests/e2e/lint_cleanup_traps.sh Executable file
View File

@ -0,0 +1,40 @@
#!/usr/bin/env bash
# lint_cleanup_traps.sh — regression gate for the OSS-shape program's
# "all E2E tests must have proper cleanup" bar (RFC #2873).
#
# Asserts: every shell file under tests/e2e/ that calls `mktemp` ALSO
# installs an `EXIT` trap somewhere in the file. The trap is the
# minimum-viable guarantee that scratch files won't leak when an
# assertion or curl exits the script non-zero.
#
# Why this lints (instead of the test runner enforcing): shell scripts
# can't easily be wrapped by an outer harness without breaking the
# `WSID=… ./test_x.sh` invocation contract. Static gate is the cheap
# defense.
#
# Usage:
# tests/e2e/lint_cleanup_traps.sh
#
# Exits non-zero if any test_*.sh has unmatched mktemp/trap. CI invokes
# it from the existing Shellcheck (E2E scripts) workflow.
set -euo pipefail
cd "$(dirname "$0")"
violations=0
for f in test_*.sh; do
if grep -qE '\bmktemp\b' "$f"; then
if ! grep -qE 'trap[[:space:]]+.*EXIT' "$f"; then
echo "::error file=tests/e2e/$f::has 'mktemp' but no 'trap … EXIT' — scratch will leak when test exits non-zero. Pattern: TMPDIR_E2E=\$(mktemp -d -t prefix-XXX); trap 'rm -rf \"\$TMPDIR_E2E\"' EXIT INT TERM"
violations=$((violations + 1))
fi
fi
done
if [ "$violations" -gt 0 ]; then
echo "::error::$violations shell E2E file(s) leak scratch on early exit. See above."
exit 1
fi
echo "✓ all $(grep -lE '\bmktemp\b' test_*.sh | wc -l | tr -d ' ') shell E2E files with mktemp also install an EXIT trap"

View File

@ -22,6 +22,13 @@ set -euo pipefail
WSID="${WSID:?WSID=<workspace-id> required}"
BASE="${BASE:-http://localhost:8080}"
# Per-run scratch dir collected under one trap so every mktemp leak path
# (assertion failure, SIGINT, exit non-zero) is plugged. Pre-fix this test
# created a /tmp/hermes-e2e-XXXXXX.txt and never deleted it — ~10 KB ×
# every CI run leaked into the runner. RFC #2873 cleanup-hygiene PR.
TMPDIR_E2E=$(mktemp -d -t chat-attachments-e2e-XXXXXX)
trap 'rm -rf "$TMPDIR_E2E"' EXIT INT TERM
log() { printf "\n=== %s ===\n" "$*"; }
log "Preflight: workspace online?"
@ -29,7 +36,9 @@ STATUS=$(curl -s "$BASE/workspaces/$WSID" | python3 -c 'import json,sys;print(js
[ "$STATUS" = "online" ] || { echo "workspace not online ($STATUS)"; exit 1; }
log "Step 1 — Upload a text file via /chat/uploads"
TEST_FILE=$(mktemp -t hermes-e2e-XXXXXX.txt)
# `mktemp <full-template>` is portable across BSD (macOS) + GNU; -p is
# GNU-only and breaks local dev runs on Mac.
TEST_FILE=$(mktemp "$TMPDIR_E2E/hermes-e2e-XXXXXX.txt")
echo "secret code: $(openssl rand -hex 4)-$(openssl rand -hex 4)" > "$TEST_FILE"
EXPECTED=$(cat "$TEST_FILE" | awk '{print $NF}')
UPLOAD=$(curl -s -X POST "$BASE/workspaces/$WSID/chat/uploads" -F "files=@$TEST_FILE")

View File

@ -24,6 +24,15 @@ set -uo pipefail
BASE="${BASE:-http://localhost:8080}"
fails=0
# Per-run scratch dir collected under one trap so every per-runtime
# round_trip mktemp leak path (assertion failure, SIGINT, exit
# non-zero, function early-return between mktemp and rm) is plugged.
# Pre-fix, round_trip's `rm -f "$test_file"` only fired on the success
# path inside the function — every test_failure path before the rm
# leaked the scratch into /tmp permanently. RFC #2873 cleanup-hygiene PR.
TMPDIR_E2E=$(mktemp -d -t mr-attachments-e2e-XXXXXX)
trap 'rm -rf "$TMPDIR_E2E"' EXIT INT TERM
has_patch_in_container() {
local container="$1"
# Signal that platform helpers are available AND wired into the
@ -74,12 +83,16 @@ print(f"executor: claude-code monkey-patch active ({name})")
round_trip() {
local label="$1" wsid="$2"
local test_file expected upload uri payload reply reply_text
test_file=$(mktemp -t e2e-mr-XXXX.txt)
# Scratch goes under TMPDIR_E2E; the script-level trap rm -rf's the
# whole dir on exit, so per-file rm calls are unnecessary AND make
# error paths leak when forgotten.
# `mktemp <full-template>` is portable across BSD (macOS) + GNU; -p is GNU-only.
test_file=$(mktemp "$TMPDIR_E2E/e2e-mr-${label}-XXXX.txt")
expected="secret-$(openssl rand -hex 6)"
echo "$expected" > "$test_file"
upload=$(curl -s -X POST "$BASE/workspaces/$wsid/chat/uploads" -F "files=@$test_file")
uri=$(echo "$upload" | python3 -c 'import json,sys;print(json.load(sys.stdin)["files"][0]["uri"])' 2>/dev/null)
[ -z "$uri" ] && { echo "FAIL $label: upload returned no URI: $upload"; rm -f "$test_file"; return 1; }
[ -z "$uri" ] && { echo "FAIL $label: upload returned no URI: $upload"; return 1; }
payload=$(URI="$uri" python3 -c '
import json, os
uri = os.environ["URI"]
@ -103,7 +116,8 @@ try:
except Exception as exc:
print(f"(parse failed: {exc})")
' 2>&1)
rm -f "$test_file"
# $test_file lives under TMPDIR_E2E; the script-level trap rm -rf's
# the dir on exit, covering every return path including SIGINT.
if echo "$reply_text" | grep -qF "$expected"; then
echo "PASS $label round-trip: agent quoted $expected"

View File

@ -29,11 +29,20 @@ FAIL=0
WSID=""
cleanup() {
# Workspace teardown — best-effort, ignore errors so an unrelated CP
# outage doesn't shadow a real test failure.
if [ -n "$WSID" ]; then
curl -s -X DELETE "$BASE/workspaces/$WSID?confirm=true" > /dev/null || true
fi
# /tmp scratch — pre-fix only ran on success path (the unconditional
# rm at the bottom of the script). Trap-based path lets the file leak
# whenever the script exits non-zero before reaching the rm. RFC #2873
# cleanup-hygiene PR.
if [ -n "${TMPF:-}" ]; then
rm -f "$TMPF"
fi
}
trap cleanup EXIT
trap cleanup EXIT INT TERM
assert() {
local label="$1"
@ -230,7 +239,8 @@ for r in rows:
assert "stored URI matches uploaded URI" "$STORED_URI" "$URI"
fi
rm -f "$TMPF"
# $TMPF cleanup happens via the trap-cleanup function above — covers
# both the success path and any early exit / SIGINT.
echo ""
echo "=== Results: $PASS passed, $FAIL failed ==="

View File

@ -51,11 +51,10 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
model = defaults.Model
}
if model == "" {
if runtime == "claude-code" {
model = "sonnet"
} else {
model = "anthropic:claude-opus-4-7"
}
// SSOT: per-runtime defaults live in models/runtime_defaults.go
// (see RFC #2873). Consolidated from a duplicate of the same
// branch in workspace_provision.go.
model = models.DefaultModel(runtime)
}
tier := ws.Tier
if tier == 0 {

View File

@ -534,11 +534,10 @@ func (h *WorkspaceHandler) ensureDefaultConfig(workspaceID string, payload model
// Generate a minimal config.yaml
model := payload.Model
if model == "" {
if runtime == "claude-code" {
model = "sonnet"
} else {
model = "anthropic:claude-opus-4-7"
}
// SSOT: per-runtime defaults live in models/runtime_defaults.go
// (see RFC #2873). Was previously duplicated here AND in
// org_import.go; consolidating prevents silent drift.
model = models.DefaultModel(runtime)
}
// Sanitize name/role/model for YAML safety — always double-quote so

View File

@ -0,0 +1,39 @@
package models
// runtime_defaults.go — single source of truth for per-runtime defaults
// the platform applies when the operator/agent didn't supply a value.
//
// Why this lives in models/ (not handlers/): default selection is a
// pure data fact about the runtime, not handler logic. Multiple
// callers (Create-workspace handler, org-import handler, future
// auto-provision paths) need the same answer; concentrating the
// rule here means one edit when a runtime's default changes.
//
// Related work (RFC #2873): this is the seed for a future
// `RuntimeConfig` interface that will also expose `ProvisioningTimeout()`,
// `CapabilitiesSupported()`, and other per-runtime facts. For now the
// surface is one helper — extracted from the duplicate branch in
// workspace_provision.go:537 and org_import.go:54 that diverged silently
// during refactors before this consolidation.
// DefaultModel returns the model slug to use when a workspace is
// created without an explicit model and the runtime can't infer one
// from its own config.
//
// - claude-code: "sonnet" — Anthropic's CLI accepts the short
// name and resolves it via the operator's anthropic-oauth or
// ANTHROPIC_API_KEY chain.
// - everything else (hermes, langgraph, crewai, autogen, deepagents,
// codex, openclaw, gemini-cli, external, ""): a fully-qualified
// vendor:model slug that the universal MODEL_PROVIDER chain in
// molecule-core PR #247 can route via per-vendor required_env.
//
// The function never returns an empty string; an unknown runtime
// gets the universal default rather than failing closed (matches the
// pre-refactor behavior — both call sites used the same fallback).
func DefaultModel(runtime string) string {
if runtime == "claude-code" {
return "sonnet"
}
return "anthropic:claude-opus-4-7"
}

View File

@ -0,0 +1,62 @@
package models
import "testing"
// TestDefaultModel pins the contract: known runtimes return their
// expected default; unknowns and the empty string fall through to the
// universal default. Add new runtimes here as `case` entries — pre-fix
// adding a runtime required two source edits + an audit; post-SSOT it
// requires one entry in DefaultModel + one assertion here.
func TestDefaultModel(t *testing.T) {
cases := []struct {
runtime string
want string
}{
// Known runtimes.
{"claude-code", "sonnet"},
// Universal fallback for everything else. Each runtime is named
// explicitly so a future drift (e.g., adding a hermes-specific
// branch) shows up as a failure on the runtime that drifted, not
// as a generic "unknown" failure.
{"hermes", "anthropic:claude-opus-4-7"},
{"langgraph", "anthropic:claude-opus-4-7"},
{"crewai", "anthropic:claude-opus-4-7"},
{"autogen", "anthropic:claude-opus-4-7"},
{"deepagents", "anthropic:claude-opus-4-7"},
{"codex", "anthropic:claude-opus-4-7"},
{"openclaw", "anthropic:claude-opus-4-7"},
{"gemini-cli", "anthropic:claude-opus-4-7"},
{"external", "anthropic:claude-opus-4-7"},
// Unknown / empty — fall through to universal default rather
// than failing closed. Pre-refactor both call sites also fell
// through; pinning the existing behavior, not changing it.
{"", "anthropic:claude-opus-4-7"},
{"some-future-runtime", "anthropic:claude-opus-4-7"},
{"CLAUDE-CODE", "anthropic:claude-opus-4-7"}, // case-sensitive — matches prior behavior
}
for _, tc := range cases {
t.Run(tc.runtime, func(t *testing.T) {
got := DefaultModel(tc.runtime)
if got != tc.want {
t.Errorf("DefaultModel(%q) = %q, want %q", tc.runtime, got, tc.want)
}
})
}
}
// TestDefaultModel_NeverEmpty — invariant: no input produces an empty
// string. The handlers that consume this would write empty into
// config.yaml, which the runtime then can't dispatch — pinning the
// non-empty contract here protects against a future "return early on
// unknown runtime" change that would silently break workspace creation.
func TestDefaultModel_NeverEmpty(t *testing.T) {
for _, runtime := range []string{
"", "claude-code", "hermes", "unknown-runtime",
} {
if got := DefaultModel(runtime); got == "" {
t.Errorf("DefaultModel(%q) returned empty string", runtime)
}
}
}