forked from molecule-ai/molecule-core
Merge pull request #2878 from Molecule-AI/staging
staging → main: auto-promote 89ee8e4
This commit is contained in:
commit
58253f0673
8
.github/workflows/ci.yml
vendored
8
.github/workflows/ci.yml
vendored
@ -272,6 +272,14 @@ jobs:
|
||||
find tests/e2e infra/scripts -type f -name '*.sh' -print0 \
|
||||
| xargs -0 shellcheck --severity=warning
|
||||
|
||||
- if: needs.changes.outputs.scripts == 'true'
|
||||
name: Lint cleanup-trap hygiene (RFC #2873)
|
||||
# Asserts every shell E2E test that calls `mktemp` also installs
|
||||
# an EXIT trap. Catches the /tmp-leak class — a missing trap
|
||||
# silently leaks scratch into CI runners (~10-100KB per run).
|
||||
# See tests/e2e/lint_cleanup_traps.sh for the rule + fix pattern.
|
||||
run: bash tests/e2e/lint_cleanup_traps.sh
|
||||
|
||||
- if: needs.changes.outputs.scripts == 'true'
|
||||
name: Run E2E bash unit tests (no live infra)
|
||||
# Pure-bash unit tests for E2E helper libs (lib/*.sh). These pin
|
||||
|
||||
40
tests/e2e/lint_cleanup_traps.sh
Executable file
40
tests/e2e/lint_cleanup_traps.sh
Executable file
@ -0,0 +1,40 @@
|
||||
#!/usr/bin/env bash
|
||||
# lint_cleanup_traps.sh — regression gate for the OSS-shape program's
|
||||
# "all E2E tests must have proper cleanup" bar (RFC #2873).
|
||||
#
|
||||
# Asserts: every shell file under tests/e2e/ that calls `mktemp` ALSO
|
||||
# installs an `EXIT` trap somewhere in the file. The trap is the
|
||||
# minimum-viable guarantee that scratch files won't leak when an
|
||||
# assertion or curl exits the script non-zero.
|
||||
#
|
||||
# Why this lints (instead of the test runner enforcing): shell scripts
|
||||
# can't easily be wrapped by an outer harness without breaking the
|
||||
# `WSID=… ./test_x.sh` invocation contract. Static gate is the cheap
|
||||
# defense.
|
||||
#
|
||||
# Usage:
|
||||
# tests/e2e/lint_cleanup_traps.sh
|
||||
#
|
||||
# Exits non-zero if any test_*.sh has unmatched mktemp/trap. CI invokes
|
||||
# it from the existing Shellcheck (E2E scripts) workflow.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
cd "$(dirname "$0")"
|
||||
|
||||
violations=0
|
||||
for f in test_*.sh; do
|
||||
if grep -qE '\bmktemp\b' "$f"; then
|
||||
if ! grep -qE 'trap[[:space:]]+.*EXIT' "$f"; then
|
||||
echo "::error file=tests/e2e/$f::has 'mktemp' but no 'trap … EXIT' — scratch will leak when test exits non-zero. Pattern: TMPDIR_E2E=\$(mktemp -d -t prefix-XXX); trap 'rm -rf \"\$TMPDIR_E2E\"' EXIT INT TERM"
|
||||
violations=$((violations + 1))
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$violations" -gt 0 ]; then
|
||||
echo "::error::$violations shell E2E file(s) leak scratch on early exit. See above."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "✓ all $(grep -lE '\bmktemp\b' test_*.sh | wc -l | tr -d ' ') shell E2E files with mktemp also install an EXIT trap"
|
||||
@ -22,6 +22,13 @@ set -euo pipefail
|
||||
WSID="${WSID:?WSID=<workspace-id> required}"
|
||||
BASE="${BASE:-http://localhost:8080}"
|
||||
|
||||
# Per-run scratch dir collected under one trap so every mktemp leak path
|
||||
# (assertion failure, SIGINT, exit non-zero) is plugged. Pre-fix this test
|
||||
# created a /tmp/hermes-e2e-XXXXXX.txt and never deleted it — ~10 KB ×
|
||||
# every CI run leaked into the runner. RFC #2873 cleanup-hygiene PR.
|
||||
TMPDIR_E2E=$(mktemp -d -t chat-attachments-e2e-XXXXXX)
|
||||
trap 'rm -rf "$TMPDIR_E2E"' EXIT INT TERM
|
||||
|
||||
log() { printf "\n=== %s ===\n" "$*"; }
|
||||
|
||||
log "Preflight: workspace online?"
|
||||
@ -29,7 +36,9 @@ STATUS=$(curl -s "$BASE/workspaces/$WSID" | python3 -c 'import json,sys;print(js
|
||||
[ "$STATUS" = "online" ] || { echo "workspace not online ($STATUS)"; exit 1; }
|
||||
|
||||
log "Step 1 — Upload a text file via /chat/uploads"
|
||||
TEST_FILE=$(mktemp -t hermes-e2e-XXXXXX.txt)
|
||||
# `mktemp <full-template>` is portable across BSD (macOS) + GNU; -p is
|
||||
# GNU-only and breaks local dev runs on Mac.
|
||||
TEST_FILE=$(mktemp "$TMPDIR_E2E/hermes-e2e-XXXXXX.txt")
|
||||
echo "secret code: $(openssl rand -hex 4)-$(openssl rand -hex 4)" > "$TEST_FILE"
|
||||
EXPECTED=$(cat "$TEST_FILE" | awk '{print $NF}')
|
||||
UPLOAD=$(curl -s -X POST "$BASE/workspaces/$WSID/chat/uploads" -F "files=@$TEST_FILE")
|
||||
|
||||
@ -24,6 +24,15 @@ set -uo pipefail
|
||||
BASE="${BASE:-http://localhost:8080}"
|
||||
fails=0
|
||||
|
||||
# Per-run scratch dir collected under one trap so every per-runtime
|
||||
# round_trip mktemp leak path (assertion failure, SIGINT, exit
|
||||
# non-zero, function early-return between mktemp and rm) is plugged.
|
||||
# Pre-fix, round_trip's `rm -f "$test_file"` only fired on the success
|
||||
# path inside the function — every test_failure path before the rm
|
||||
# leaked the scratch into /tmp permanently. RFC #2873 cleanup-hygiene PR.
|
||||
TMPDIR_E2E=$(mktemp -d -t mr-attachments-e2e-XXXXXX)
|
||||
trap 'rm -rf "$TMPDIR_E2E"' EXIT INT TERM
|
||||
|
||||
has_patch_in_container() {
|
||||
local container="$1"
|
||||
# Signal that platform helpers are available AND wired into the
|
||||
@ -74,12 +83,16 @@ print(f"executor: claude-code monkey-patch active ({name})")
|
||||
round_trip() {
|
||||
local label="$1" wsid="$2"
|
||||
local test_file expected upload uri payload reply reply_text
|
||||
test_file=$(mktemp -t e2e-mr-XXXX.txt)
|
||||
# Scratch goes under TMPDIR_E2E; the script-level trap rm -rf's the
|
||||
# whole dir on exit, so per-file rm calls are unnecessary AND make
|
||||
# error paths leak when forgotten.
|
||||
# `mktemp <full-template>` is portable across BSD (macOS) + GNU; -p is GNU-only.
|
||||
test_file=$(mktemp "$TMPDIR_E2E/e2e-mr-${label}-XXXX.txt")
|
||||
expected="secret-$(openssl rand -hex 6)"
|
||||
echo "$expected" > "$test_file"
|
||||
upload=$(curl -s -X POST "$BASE/workspaces/$wsid/chat/uploads" -F "files=@$test_file")
|
||||
uri=$(echo "$upload" | python3 -c 'import json,sys;print(json.load(sys.stdin)["files"][0]["uri"])' 2>/dev/null)
|
||||
[ -z "$uri" ] && { echo "FAIL $label: upload returned no URI: $upload"; rm -f "$test_file"; return 1; }
|
||||
[ -z "$uri" ] && { echo "FAIL $label: upload returned no URI: $upload"; return 1; }
|
||||
payload=$(URI="$uri" python3 -c '
|
||||
import json, os
|
||||
uri = os.environ["URI"]
|
||||
@ -103,7 +116,8 @@ try:
|
||||
except Exception as exc:
|
||||
print(f"(parse failed: {exc})")
|
||||
' 2>&1)
|
||||
rm -f "$test_file"
|
||||
# $test_file lives under TMPDIR_E2E; the script-level trap rm -rf's
|
||||
# the dir on exit, covering every return path including SIGINT.
|
||||
|
||||
if echo "$reply_text" | grep -qF "$expected"; then
|
||||
echo "PASS $label round-trip: agent quoted $expected"
|
||||
|
||||
@ -29,11 +29,20 @@ FAIL=0
|
||||
WSID=""
|
||||
|
||||
cleanup() {
|
||||
# Workspace teardown — best-effort, ignore errors so an unrelated CP
|
||||
# outage doesn't shadow a real test failure.
|
||||
if [ -n "$WSID" ]; then
|
||||
curl -s -X DELETE "$BASE/workspaces/$WSID?confirm=true" > /dev/null || true
|
||||
fi
|
||||
# /tmp scratch — pre-fix only ran on success path (the unconditional
|
||||
# rm at the bottom of the script). Trap-based path lets the file leak
|
||||
# whenever the script exits non-zero before reaching the rm. RFC #2873
|
||||
# cleanup-hygiene PR.
|
||||
if [ -n "${TMPF:-}" ]; then
|
||||
rm -f "$TMPF"
|
||||
fi
|
||||
}
|
||||
trap cleanup EXIT
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
assert() {
|
||||
local label="$1"
|
||||
@ -230,7 +239,8 @@ for r in rows:
|
||||
assert "stored URI matches uploaded URI" "$STORED_URI" "$URI"
|
||||
fi
|
||||
|
||||
rm -f "$TMPF"
|
||||
# $TMPF cleanup happens via the trap-cleanup function above — covers
|
||||
# both the success path and any early exit / SIGINT.
|
||||
|
||||
echo ""
|
||||
echo "=== Results: $PASS passed, $FAIL failed ==="
|
||||
|
||||
@ -51,11 +51,10 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
|
||||
model = defaults.Model
|
||||
}
|
||||
if model == "" {
|
||||
if runtime == "claude-code" {
|
||||
model = "sonnet"
|
||||
} else {
|
||||
model = "anthropic:claude-opus-4-7"
|
||||
}
|
||||
// SSOT: per-runtime defaults live in models/runtime_defaults.go
|
||||
// (see RFC #2873). Consolidated from a duplicate of the same
|
||||
// branch in workspace_provision.go.
|
||||
model = models.DefaultModel(runtime)
|
||||
}
|
||||
tier := ws.Tier
|
||||
if tier == 0 {
|
||||
|
||||
@ -534,11 +534,10 @@ func (h *WorkspaceHandler) ensureDefaultConfig(workspaceID string, payload model
|
||||
// Generate a minimal config.yaml
|
||||
model := payload.Model
|
||||
if model == "" {
|
||||
if runtime == "claude-code" {
|
||||
model = "sonnet"
|
||||
} else {
|
||||
model = "anthropic:claude-opus-4-7"
|
||||
}
|
||||
// SSOT: per-runtime defaults live in models/runtime_defaults.go
|
||||
// (see RFC #2873). Was previously duplicated here AND in
|
||||
// org_import.go; consolidating prevents silent drift.
|
||||
model = models.DefaultModel(runtime)
|
||||
}
|
||||
|
||||
// Sanitize name/role/model for YAML safety — always double-quote so
|
||||
|
||||
39
workspace-server/internal/models/runtime_defaults.go
Normal file
39
workspace-server/internal/models/runtime_defaults.go
Normal file
@ -0,0 +1,39 @@
|
||||
package models
|
||||
|
||||
// runtime_defaults.go — single source of truth for per-runtime defaults
|
||||
// the platform applies when the operator/agent didn't supply a value.
|
||||
//
|
||||
// Why this lives in models/ (not handlers/): default selection is a
|
||||
// pure data fact about the runtime, not handler logic. Multiple
|
||||
// callers (Create-workspace handler, org-import handler, future
|
||||
// auto-provision paths) need the same answer; concentrating the
|
||||
// rule here means one edit when a runtime's default changes.
|
||||
//
|
||||
// Related work (RFC #2873): this is the seed for a future
|
||||
// `RuntimeConfig` interface that will also expose `ProvisioningTimeout()`,
|
||||
// `CapabilitiesSupported()`, and other per-runtime facts. For now the
|
||||
// surface is one helper — extracted from the duplicate branch in
|
||||
// workspace_provision.go:537 and org_import.go:54 that diverged silently
|
||||
// during refactors before this consolidation.
|
||||
|
||||
// DefaultModel returns the model slug to use when a workspace is
|
||||
// created without an explicit model and the runtime can't infer one
|
||||
// from its own config.
|
||||
//
|
||||
// - claude-code: "sonnet" — Anthropic's CLI accepts the short
|
||||
// name and resolves it via the operator's anthropic-oauth or
|
||||
// ANTHROPIC_API_KEY chain.
|
||||
// - everything else (hermes, langgraph, crewai, autogen, deepagents,
|
||||
// codex, openclaw, gemini-cli, external, ""): a fully-qualified
|
||||
// vendor:model slug that the universal MODEL_PROVIDER chain in
|
||||
// molecule-core PR #247 can route via per-vendor required_env.
|
||||
//
|
||||
// The function never returns an empty string; an unknown runtime
|
||||
// gets the universal default rather than failing closed (matches the
|
||||
// pre-refactor behavior — both call sites used the same fallback).
|
||||
func DefaultModel(runtime string) string {
|
||||
if runtime == "claude-code" {
|
||||
return "sonnet"
|
||||
}
|
||||
return "anthropic:claude-opus-4-7"
|
||||
}
|
||||
62
workspace-server/internal/models/runtime_defaults_test.go
Normal file
62
workspace-server/internal/models/runtime_defaults_test.go
Normal file
@ -0,0 +1,62 @@
|
||||
package models
|
||||
|
||||
import "testing"
|
||||
|
||||
// TestDefaultModel pins the contract: known runtimes return their
|
||||
// expected default; unknowns and the empty string fall through to the
|
||||
// universal default. Add new runtimes here as `case` entries — pre-fix
|
||||
// adding a runtime required two source edits + an audit; post-SSOT it
|
||||
// requires one entry in DefaultModel + one assertion here.
|
||||
func TestDefaultModel(t *testing.T) {
|
||||
cases := []struct {
|
||||
runtime string
|
||||
want string
|
||||
}{
|
||||
// Known runtimes.
|
||||
{"claude-code", "sonnet"},
|
||||
|
||||
// Universal fallback for everything else. Each runtime is named
|
||||
// explicitly so a future drift (e.g., adding a hermes-specific
|
||||
// branch) shows up as a failure on the runtime that drifted, not
|
||||
// as a generic "unknown" failure.
|
||||
{"hermes", "anthropic:claude-opus-4-7"},
|
||||
{"langgraph", "anthropic:claude-opus-4-7"},
|
||||
{"crewai", "anthropic:claude-opus-4-7"},
|
||||
{"autogen", "anthropic:claude-opus-4-7"},
|
||||
{"deepagents", "anthropic:claude-opus-4-7"},
|
||||
{"codex", "anthropic:claude-opus-4-7"},
|
||||
{"openclaw", "anthropic:claude-opus-4-7"},
|
||||
{"gemini-cli", "anthropic:claude-opus-4-7"},
|
||||
{"external", "anthropic:claude-opus-4-7"},
|
||||
|
||||
// Unknown / empty — fall through to universal default rather
|
||||
// than failing closed. Pre-refactor both call sites also fell
|
||||
// through; pinning the existing behavior, not changing it.
|
||||
{"", "anthropic:claude-opus-4-7"},
|
||||
{"some-future-runtime", "anthropic:claude-opus-4-7"},
|
||||
{"CLAUDE-CODE", "anthropic:claude-opus-4-7"}, // case-sensitive — matches prior behavior
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.runtime, func(t *testing.T) {
|
||||
got := DefaultModel(tc.runtime)
|
||||
if got != tc.want {
|
||||
t.Errorf("DefaultModel(%q) = %q, want %q", tc.runtime, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestDefaultModel_NeverEmpty — invariant: no input produces an empty
|
||||
// string. The handlers that consume this would write empty into
|
||||
// config.yaml, which the runtime then can't dispatch — pinning the
|
||||
// non-empty contract here protects against a future "return early on
|
||||
// unknown runtime" change that would silently break workspace creation.
|
||||
func TestDefaultModel_NeverEmpty(t *testing.T) {
|
||||
for _, runtime := range []string{
|
||||
"", "claude-code", "hermes", "unknown-runtime",
|
||||
} {
|
||||
if got := DefaultModel(runtime); got == "" {
|
||||
t.Errorf("DefaultModel(%q) returned empty string", runtime)
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user