diff --git a/canvas/src/components/tabs/__tests__/ConfigTab.hermes.test.tsx b/canvas/src/components/tabs/__tests__/ConfigTab.hermes.test.tsx new file mode 100644 index 00000000..8aa1b6fa --- /dev/null +++ b/canvas/src/components/tabs/__tests__/ConfigTab.hermes.test.tsx @@ -0,0 +1,181 @@ +// @vitest-environment jsdom +// +// Regression tests for ConfigTab hermes-workspace UX (#1894 + #1900). +// +// All four bugs this suite pins hit the same workspace on 2026-04-23: +// a hermes-runtime workspace whose Config tab showed "LangGraph +// (default)" in the runtime dropdown, an empty Model field, and a +// scary red "No config.yaml found" banner. Clicking Save would +// silently PATCH runtime back to LangGraph, breaking the workspace. +// +// Each test pins one invariant. If any fails, the bug is back. + +import { describe, it, expect, vi, afterEach, beforeEach } from "vitest"; +import { render, screen, fireEvent, cleanup, waitFor } from "@testing-library/react"; +import React from "react"; + +afterEach(cleanup); + +// ── API mock ────────────────────────────────────────────────────────── +// ConfigTab calls three endpoints on load: +// 1. GET /workspaces/:id — workspace metadata (runtime) +// 2. GET /workspaces/:id/model — model +// 3. GET /workspaces/:id/files/config.yaml — template-managed config (may 404) +// And POST /templates for the runtime dropdown options. +// +// Each test wires the mock to return the shape that matches the scenario +// it's pinning. Unhandled URLs default to rejecting so the test fails loud +// if ConfigTab queries something unexpected. +const apiGet = vi.fn(); +const apiPatch = vi.fn(); +const apiPut = vi.fn(); +vi.mock("@/lib/api", () => ({ + api: { + get: (path: string) => apiGet(path), + patch: (path: string, body: unknown) => apiPatch(path, body), + put: (path: string, body: unknown) => apiPut(path, body), + post: vi.fn(), + del: vi.fn(), + }, +})); + +// Zustand store used by Save → restart. Not exercised in these tests. +vi.mock("@/store/canvas", () => ({ + useCanvasStore: Object.assign( + (selector: (s: unknown) => unknown) => selector({ restartWorkspace: vi.fn(), updateNodeData: vi.fn() }), + { getState: () => ({ restartWorkspace: vi.fn(), updateNodeData: vi.fn() }) }, + ), +})); + +// AgentCardSection fetches its own data — stub to avoid noise. +vi.mock("../AgentCardSection", () => ({ + AgentCardSection: () =>
, +})); + +import { ConfigTab } from "../ConfigTab"; + +// helper — wire the api.get mock for one scenario +function wireApi(opts: { + workspaceRuntime?: string; + workspaceModel?: string; + configYamlContent?: string | null; // null = 404 + templates?: Array<{ id: string; name?: string; runtime?: string; models?: unknown[] }>; +}) { + apiGet.mockImplementation((path: string) => { + if (path === `/workspaces/ws-test`) { + return Promise.resolve({ runtime: opts.workspaceRuntime ?? "" }); + } + if (path === `/workspaces/ws-test/model`) { + return Promise.resolve({ model: opts.workspaceModel ?? "" }); + } + if (path === `/workspaces/ws-test/files/config.yaml`) { + if (opts.configYamlContent === null) { + return Promise.reject(new Error("not found")); + } + return Promise.resolve({ content: opts.configYamlContent ?? "" }); + } + if (path === "/templates") { + return Promise.resolve(opts.templates ?? []); + } + return Promise.reject(new Error(`unmocked api.get: ${path}`)); + }); +} + +beforeEach(() => { + apiGet.mockReset(); + apiPatch.mockReset(); + apiPut.mockReset(); +}); + +describe("ConfigTab — hermes workspace", () => { + it("loads runtime from workspace metadata when config.yaml is missing (#1894 bug 1)", async () => { + // This is the hermes case: no platform config.yaml, so the form must + // fall back to GET /workspaces/:id's runtime field. Before the fix, the + // runtime dropdown showed "LangGraph (default)" because the fallback + // didn't exist. + wireApi({ + workspaceRuntime: "hermes", + workspaceModel: "openai/gpt-4o", + configYamlContent: null, + templates: [{ id: "t-hermes", name: "Hermes", runtime: "hermes", models: [] }], + }); + + render(); + + // Wait for loads + const select = await waitFor(() => screen.getByRole("combobox", { name: /runtime/i })); + expect((select as HTMLSelectElement).value).toBe("hermes"); + }); + + it("does NOT show 'No config.yaml found' error for hermes (#1894 bug 3)", async () => { + // Hermes manages its own config at ~/.hermes/config.yaml on the + // workspace host — the platform config.yaml NOT existing is expected, + // not an error. Showing a red error banner misleads the user. + wireApi({ + workspaceRuntime: "hermes", + configYamlContent: null, + templates: [{ id: "t-hermes", name: "Hermes", runtime: "hermes", models: [] }], + }); + + render(); + + await waitFor(() => { + const node = screen.queryByText(/No config\.yaml found/i); + // Assert the red error is absent; a gray info banner with the same + // phrase would also fail this (which is what we want — we don't + // want any "no config.yaml" phrasing on hermes at all). + expect(node).toBeNull(); + }); + }); + + it("shows hermes-specific info banner pointing to Terminal tab (#1894)", async () => { + wireApi({ + workspaceRuntime: "hermes", + configYamlContent: null, + templates: [{ id: "t-hermes", name: "Hermes", runtime: "hermes", models: [] }], + }); + + render(); + + await waitFor(() => { + expect(screen.getByText(/Hermes manages its own config/i)).toBeTruthy(); + }); + }); + + it("DOES show 'No config.yaml found' error for langgraph workspace (default runtime)", async () => { + // Regression guard the other way — the gray info banner is hermes- + // specific. A langgraph workspace with no config.yaml SHOULD still + // see the red error so the user knows to provide a template config. + wireApi({ + workspaceRuntime: "", + configYamlContent: null, + templates: [], + }); + + render(); + + await waitFor(() => { + expect(screen.getByText(/No config\.yaml found/i)).toBeTruthy(); + }); + }); +}); + +describe("ConfigTab — config.yaml on disk", () => { + it("config.yaml runtime/model wins when present, workspace metadata is fallback", async () => { + // If the workspace DB has runtime=langgraph but config.yaml declares + // runtime: crewai, the form should show crewai (config.yaml wins). + // Prevents silent runtime drift across reads. + wireApi({ + workspaceRuntime: "langgraph", // DB + configYamlContent: 'runtime: crewai\nmodel: "claude-opus"\n', + templates: [ + { id: "t-crewai", name: "CrewAI", runtime: "crewai", models: [] }, + ], + }); + + render(); + + const select = await waitFor(() => screen.getByRole("combobox", { name: /runtime/i })); + expect((select as HTMLSelectElement).value).toBe("crewai"); + }); +}); diff --git a/tests/e2e/test_staging_full_saas.sh b/tests/e2e/test_staging_full_saas.sh index 06f46d2b..317c761b 100755 --- a/tests/e2e/test_staging_full_saas.sh +++ b/tests/e2e/test_staging_full_saas.sh @@ -354,9 +354,47 @@ print(parts[0].get('text', '') if parts else '') if [ -z "$AGENT_TEXT" ]; then fail "A2A returned no text. Raw: $A2A_RESP" fi + +# Specific error-class checks — each pattern caught a real P0 bug on +# 2026-04-23 that a generic "error|exception" check missed or misreported: +# +# "[hermes-agent error 401]" → gateway API_SERVER_KEY not propagated (hermes #12) +# "Invalid API key" → tenant auth chain (CP #238 race) +# "model_not_found" → hermes custom provider slug passthrough (#13) +# "Encrypted content is not supported" → hermes codex_responses API misroute (#14) +# "Unknown provider" → bridge misconfigured PROVIDER= (regression of #13 fix) +# "hermes-agent unreachable" → gateway process died +# +# Fail LOUD with the specific pattern so CI log + alert channel makes the +# regression unambiguous. +if echo "$AGENT_TEXT" | grep -qF "[hermes-agent error 401]"; then + fail "A2A — REGRESSION: hermes gateway auth broken (API_SERVER_KEY not in runtime env). See template-hermes#12. Raw: $AGENT_TEXT" +fi +if echo "$AGENT_TEXT" | grep -qF "hermes-agent unreachable"; then + fail "A2A — REGRESSION: hermes gateway process down. Check /var/log/hermes-gateway.log on the workspace EC2. Raw: $AGENT_TEXT" +fi +if echo "$AGENT_TEXT" | grep -qF "model_not_found"; then + fail "A2A — REGRESSION: model slug passed through with provider prefix. See template-hermes#13. Raw: $AGENT_TEXT" +fi +if echo "$AGENT_TEXT" | grep -qF "Encrypted content is not supported"; then + fail "A2A — REGRESSION: hermes custom provider hit /v1/responses instead of chat_completions. Config.yaml should declare api_mode: chat_completions. See template-hermes#14. Raw: $AGENT_TEXT" +fi +if echo "$AGENT_TEXT" | grep -qF "Unknown provider"; then + fail "A2A — REGRESSION: install.sh set PROVIDER to a value not in hermes's registry. Run 'hermes doctor' on the workspace to see valid values. Raw: $AGENT_TEXT" +fi +# Generic catch-all — falls through if none of the known regressions hit. if echo "$AGENT_TEXT" | grep -qiE "error|exception"; then fail "A2A returned an error-shaped response: $AGENT_TEXT" fi + +# Content assertion — the prompt asks the model to reply with exactly "PONG". +# Real models produce "PONG" (possibly with minor wrapping); a broken pipeline +# that echoes the prompt back or returns truncated context won't. Normalize +# to uppercase before matching to tolerate "pong" / "Pong". +if ! echo "$AGENT_TEXT" | tr '[:lower:]' '[:upper:]' | grep -qF "PONG"; then + fail "A2A reply didn't contain expected PONG token. Real: $AGENT_TEXT" +fi + ok "A2A parent round-trip succeeded: \"${AGENT_TEXT:0:80}\"" # ─── 9. HMA + peers + activity (full mode) ───────────────────────────── diff --git a/tools/test-hermes-bridge.sh b/tools/test-hermes-bridge.sh new file mode 100755 index 00000000..a1ee6328 --- /dev/null +++ b/tools/test-hermes-bridge.sh @@ -0,0 +1,199 @@ +#!/usr/bin/env bash +# test-hermes-bridge.sh — regression tests for template-hermes install.sh's +# OpenAI bridge logic. Runs offline (no network, no docker, no CI dependency). +# +# These tests pin the bridge invariants that we fixed on 2026-04-23 after +# production found these bugs: +# +# template-hermes#12: API_SERVER_KEY must be written to /etc/environment +# + /etc/profile.d/ so molecule-runtime inherits it. +# +# template-hermes#13: When bridging OPENAI_API_KEY, the model slug's +# "openai/" prefix must be stripped — OpenAI rejects prefixed names. +# +# template-hermes#14: The bridge must emit `api_mode: "chat_completions"` +# in config.yaml — otherwise hermes's custom provider defaults to +# codex_responses which sends include=[reasoning.encrypted_content], +# rejected by gpt-4o/gpt-4.1. +# +# Also pins the "don't fire" invariants — the bridge must NOT activate +# when the operator has explicitly configured HERMES_CUSTOM_*, and +# setting PROVIDER=openai would crash the hermes gateway ("Unknown provider"). +# +# Invocation: +# +# bash tools/test-hermes-bridge.sh /path/to/template-hermes/install.sh +# +# Default path: ../molecule-ai-workspace-template-hermes/install.sh relative +# to this script, which matches the dev-machine layout of the sibling repo. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +INSTALL_SH="${1:-$SCRIPT_DIR/../../molecule-ai-workspace-template-hermes/install.sh}" + +if [ ! -f "$INSTALL_SH" ]; then + echo "error: install.sh not found at $INSTALL_SH" >&2 + echo "usage: $0 [install.sh-path]" >&2 + exit 2 +fi + +TMP=$(mktemp -d) +trap 'rm -rf "$TMP"' EXIT + +PASS=0 +FAIL=0 + +# run_case — extract just the bridge + config.yaml write blocks from +# install.sh, stub out the parts that would require real side effects +# (system package installs, API_SERVER_KEY write to /etc/, gateway start), +# set up a minimal env, run, and capture the config.yaml output. +# +# Args: +# $1 = test name +# $2+ = env assignments (e.g. OPENAI_API_KEY=xxx, HERMES_DEFAULT_MODEL=openai/gpt-4o) +run_case() { + local name="$1"; shift + local case_dir="$TMP/$name" + mkdir -p "$case_dir" + + # Build a minimal harness that: + # 1. Sources scripts/derive-provider.sh (real, from the template repo) + # 2. Applies the bridge if-block (inlined verbatim from install.sh) + # 3. Emits config.yaml + # Intentionally skips: apt installs, hermes download, /etc writes, + # gateway start. We care about the BRANCH LOGIC not the system effects. + local template_dir + template_dir=$(cd "$(dirname "$INSTALL_SH")" && pwd) + + HERMES_HOME="$case_dir" \ + bash -c " +set -euo pipefail +HERMES_HOME='$case_dir' +$(for kv in "$@"; do printf 'export %s\n' "$kv"; done) +# Source derive-provider from the real template repo +. '$template_dir/scripts/derive-provider.sh' +DEFAULT_MODEL=\"\${HERMES_DEFAULT_MODEL:-nousresearch/hermes-4-70b}\" + +# Bridge block — extracted 1:1 from install.sh (the shape must stay in sync). +if [ \"\${PROVIDER}\" = \"custom\" ] && [ -n \"\${OPENAI_API_KEY:-}\" ] && [ -z \"\${HERMES_CUSTOM_BASE_URL:-}\" ] && [ -z \"\${HERMES_CUSTOM_API_KEY:-}\" ]; then + export HERMES_CUSTOM_BASE_URL='https://api.openai.com/v1' + export HERMES_CUSTOM_API_KEY=\"\${OPENAI_API_KEY}\" + export HERMES_CUSTOM_API_MODE='chat_completions' + DEFAULT_MODEL=\"\${DEFAULT_MODEL#openai/}\" +fi + +# Emit config.yaml (same shape as install.sh) +{ + echo 'model:' + echo \" default: \\\"\${DEFAULT_MODEL}\\\"\" + echo \" provider: \\\"\${PROVIDER}\\\"\" + if [ -n \"\${HERMES_CUSTOM_BASE_URL:-}\" ]; then + echo \" base_url: \\\"\${HERMES_CUSTOM_BASE_URL}\\\"\" + fi + if [ -n \"\${HERMES_CUSTOM_API_KEY:-}\" ]; then + echo \" api_key: \\\"\${HERMES_CUSTOM_API_KEY}\\\"\" + fi + if [ -n \"\${HERMES_CUSTOM_API_MODE:-}\" ]; then + echo \" api_mode: \\\"\${HERMES_CUSTOM_API_MODE}\\\"\" + fi +} > '$case_dir/config.yaml' +" >"$case_dir/stdout" 2>"$case_dir/stderr" || { + printf 'FAIL %s: harness exited non-zero\n' "$name" >&2 + echo "stderr:" >&2 + sed 's/^/ /' "$case_dir/stderr" >&2 + FAIL=$((FAIL+1)) + return 1 + } + cat "$case_dir/config.yaml" +} + +# assert_in — assert a fragment appears in the config.yaml of the named case. +assert_in() { + local name="$1" pattern="$2" + if grep -qF "$pattern" "$TMP/$name/config.yaml"; then + printf 'PASS %s: contains %q\n' "$name" "$pattern" + PASS=$((PASS+1)) + else + printf 'FAIL %s: missing %q\n' "$name" "$pattern" >&2 + echo " actual config.yaml:" >&2 + sed 's/^/ /' "$TMP/$name/config.yaml" >&2 + FAIL=$((FAIL+1)) + fi +} + +assert_not_in() { + local name="$1" pattern="$2" + if grep -qF "$pattern" "$TMP/$name/config.yaml"; then + printf 'FAIL %s: unexpected %q present\n' "$name" "$pattern" >&2 + echo " actual config.yaml:" >&2 + sed 's/^/ /' "$TMP/$name/config.yaml" >&2 + FAIL=$((FAIL+1)) + else + printf 'PASS %s: absent %q\n' "$name" "$pattern" + PASS=$((PASS+1)) + fi +} + +# ─── Case 1: OpenAI bridge fires, strips prefix, sets api_mode ────────── +# Regression guard for #13 + #14. When only OPENAI_API_KEY is set and the +# user specifies openai/gpt-4o, install.sh must: +# - KEEP provider=custom (not flip to "openai" — hermes has no native +# openai provider, gateway would crash "Unknown provider") +# - strip "openai/" prefix from the model → "gpt-4o" +# - emit api_mode: "chat_completions" (so hermes doesn't hit /v1/responses +# with include=[reasoning.encrypted_content] which gpt-4o rejects) +run_case "openai-bridge-happy" \ + OPENAI_API_KEY=sk-test-abc \ + HERMES_DEFAULT_MODEL=openai/gpt-4o >/dev/null + +assert_in "openai-bridge-happy" 'default: "gpt-4o"' +assert_in "openai-bridge-happy" 'provider: "custom"' +assert_in "openai-bridge-happy" 'base_url: "https://api.openai.com/v1"' +assert_in "openai-bridge-happy" 'api_key: "sk-test-abc"' +assert_in "openai-bridge-happy" 'api_mode: "chat_completions"' +assert_not_in "openai-bridge-happy" 'provider: "openai"' +assert_not_in "openai-bridge-happy" 'default: "openai/gpt-4o"' + +# ─── Case 2: Bridge skipped when operator sets HERMES_CUSTOM_* ────────── +# When an operator points at a self-hosted vLLM or similar, the bridge +# must NOT overwrite their values. api_mode should NOT be forced to +# chat_completions (the operator might want codex_responses for o1 models). +run_case "operator-custom-wins" \ + OPENAI_API_KEY=sk-test-abc \ + HERMES_CUSTOM_BASE_URL=http://my-vllm:8080/v1 \ + HERMES_CUSTOM_API_KEY=operator-key \ + HERMES_DEFAULT_MODEL=openai/gpt-4o >/dev/null + +assert_in "operator-custom-wins" 'base_url: "http://my-vllm:8080/v1"' +assert_in "operator-custom-wins" 'api_key: "operator-key"' +assert_not_in "operator-custom-wins" 'api_mode: "chat_completions"' +assert_not_in "operator-custom-wins" 'base_url: "https://api.openai.com/v1"' + +# ─── Case 3: Non-custom providers untouched ───────────────────────────── +# An OPENROUTER_API_KEY should pick provider=openrouter (per +# derive-provider.sh), and the bridge must not fire. +run_case "openrouter-not-touched" \ + OPENROUTER_API_KEY=sk-or-test \ + OPENAI_API_KEY=sk-test-abc \ + HERMES_DEFAULT_MODEL=openai/gpt-4o >/dev/null + +assert_in "openrouter-not-touched" 'provider: "openrouter"' +assert_not_in "openrouter-not-touched" 'api_mode: "chat_completions"' +assert_not_in "openrouter-not-touched" 'base_url: "https://api.openai.com/v1"' +# openrouter keeps the full slug (it can resolve openai/gpt-4o) +assert_in "openrouter-not-touched" 'default: "openai/gpt-4o"' + +# ─── Case 4: Non-openai model on bridge path leaves slug alone ────────── +# If the bridge fires but the model isn't prefixed with openai/, we don't +# want to break the string. Prefix-strip is a no-op when the prefix isn't there. +run_case "non-prefixed-model" \ + OPENAI_API_KEY=sk-test-abc \ + HERMES_DEFAULT_MODEL=gpt-4o >/dev/null + +assert_in "non-prefixed-model" 'default: "gpt-4o"' + +# ─── Summary ──────────────────────────────────────────────────────────── +echo "" +echo "Hermes bridge test: PASS=$PASS FAIL=$FAIL" +[ "$FAIL" = "0" ]