diff --git a/canvas/src/components/tabs/ChatTab.tsx b/canvas/src/components/tabs/ChatTab.tsx index 0b82f975..719393b1 100644 --- a/canvas/src/components/tabs/ChatTab.tsx +++ b/canvas/src/components/tabs/ChatTab.tsx @@ -6,7 +6,7 @@ import remarkGfm from "remark-gfm"; import { api } from "@/lib/api"; import { useCanvasStore, type WorkspaceNodeData } from "@/store/canvas"; import { WS_URL } from "@/store/socket"; -import { type ChatMessage, createMessage } from "./chat/types"; +import { type ChatMessage, createMessage, appendMessageDeduped } from "./chat/types"; import { extractResponseText, extractRequestText } from "./chat/message-parser"; import { AgentCommsPanel } from "./chat/AgentCommsPanel"; import { runtimeDisplayName } from "@/lib/runtime-names"; @@ -206,7 +206,11 @@ function MyChatPanel({ workspaceId, data }: Props) { const consume = useCanvasStore.getState().consumeAgentMessages; const msgs = consume(workspaceId); for (const m of msgs) { - setMessages((prev) => [...prev, createMessage("agent", m.content)]); + // Dedupe in case the agent proactively pushed the same text the + // HTTP /a2a response already delivered (observed with the Hermes + // runtime, which emits both a reply body and a send_message_to_user + // push for the same content). + setMessages((prev) => appendMessageDeduped(prev, createMessage("agent", m.content))); } }, [pendingAgentMsgs, workspaceId]); @@ -220,7 +224,7 @@ function MyChatPanel({ workspaceId, data }: Props) { const msgs = consume(`a2a:${workspaceId}`); if (!sendingFromAPIRef.current) return; // HTTP .then() already handled this response for (const m of msgs) { - setMessages((prev) => [...prev, createMessage("agent", m.content)]); + setMessages((prev) => appendMessageDeduped(prev, createMessage("agent", m.content))); } setSending(false); sendingFromAPIRef.current = false; @@ -340,7 +344,7 @@ function MyChatPanel({ workspaceId, data }: Props) { if (!sendingFromAPIRef.current) return; const replyText = extractReplyText(resp); if (replyText) { - setMessages((prev) => [...prev, createMessage("agent", replyText)]); + setMessages((prev) => appendMessageDeduped(prev, createMessage("agent", replyText))); } setSending(false); sendingFromAPIRef.current = false; diff --git a/canvas/src/components/tabs/ConfigTab.tsx b/canvas/src/components/tabs/ConfigTab.tsx index 7d177ebf..ad8338de 100644 --- a/canvas/src/components/tabs/ConfigTab.tsx +++ b/canvas/src/components/tabs/ConfigTab.tsx @@ -389,13 +389,19 @@ export function ConfigTab({ workspaceId }: Props) { label={ currentModelSpec?.required_env?.length && arraysEqual(config.runtime_config?.required_env ?? [], currentModelSpec.required_env) - ? "Required Env Vars (from template)" - : "Required Env Vars" + ? "Required Env Var Names (from template)" + : "Required Env Var Names" } values={config.runtime_config?.required_env ?? []} onChange={(v) => updateNested("runtime_config" as keyof ConfigData, "required_env", v)} - placeholder="e.g. CLAUDE_CODE_OAUTH_TOKEN" + placeholder="variable NAME (e.g. ANTHROPIC_API_KEY) — not the value" /> +

+ This declares which env var names the workspace needs. + Set the actual values in the Secrets section + below — those are encrypted and mounted into the container at + runtime. +

{currentModelSpec?.required_env?.length && !arraysEqual(config.runtime_config?.required_env ?? [], currentModelSpec.required_env) && (
@@ -502,7 +508,10 @@ export function ConfigTab({ workspaceId }: Props) {
- + diff --git a/canvas/src/components/tabs/chat/__tests__/types.test.ts b/canvas/src/components/tabs/chat/__tests__/types.test.ts new file mode 100644 index 00000000..b6b1c80d --- /dev/null +++ b/canvas/src/components/tabs/chat/__tests__/types.test.ts @@ -0,0 +1,100 @@ +import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; +import { appendMessageDeduped, createMessage, type ChatMessage } from "../types"; + +// Unit tests for appendMessageDeduped — the helper that collapses the +// race between the HTTP /a2a .then() handler, the A2A_RESPONSE WS event, +// and the send_message_to_user push. All three paths can deliver the +// same agent reply; without dedupe the user sees 2-3 identical bubbles +// with identical timestamps. + +describe("appendMessageDeduped", () => { + beforeEach(() => { + vi.useFakeTimers(); + // Pin Date.now so "recently added" windows are deterministic across + // the dedupe + Date.parse calls inside the helper. + vi.setSystemTime(new Date("2026-04-23T12:00:00.000Z")); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + it("appends a new message when the history is empty", () => { + const msg = createMessage("agent", "hello"); + const next = appendMessageDeduped([], msg); + expect(next).toHaveLength(1); + expect(next[0]).toBe(msg); + }); + + it("appends when content differs from the recent tail", () => { + const first = createMessage("agent", "hello"); + vi.advanceTimersByTime(100); + const second = createMessage("agent", "world"); + const next = appendMessageDeduped([first], second); + expect(next).toHaveLength(2); + }); + + it("skips a duplicate (same role+content) within the window", () => { + const first = createMessage("agent", "Hey! How can I help you today?"); + vi.advanceTimersByTime(500); // well inside the 3s window + const dup = createMessage("agent", "Hey! How can I help you today?"); + const next = appendMessageDeduped([first], dup); + expect(next).toHaveLength(1); + // The array is returned unchanged — not a new reference. + expect(next[0]).toBe(first); + }); + + it("does NOT dedupe across different roles even if content matches", () => { + // Agent echoing the user's "hi" is a legitimate two-bubble case. + const user = createMessage("user", "hi"); + vi.advanceTimersByTime(100); + const agent = createMessage("agent", "hi"); + const next = appendMessageDeduped([user], agent); + expect(next).toHaveLength(2); + }); + + it("does NOT dedupe once the window has elapsed", () => { + // A user legitimately sending "hi" a few seconds apart must render + // both bubbles. Default window is 3000 ms. + const first = createMessage("user", "hi"); + vi.advanceTimersByTime(4000); + const repeat = createMessage("user", "hi"); + const next = appendMessageDeduped([first], repeat); + expect(next).toHaveLength(2); + }); + + it("only checks the tail's content, not the entire history", () => { + // Same (role, content) appearing earlier in the conversation but + // outside the dedupe window is not a duplicate. + const old = createMessage("agent", "hi"); + vi.advanceTimersByTime(10_000); + const newer = createMessage("agent", "hi"); + const next = appendMessageDeduped([old], newer); + expect(next).toHaveLength(2); + }); + + it("handles malformed timestamps without throwing", () => { + // Defense: a history entry with a bogus timestamp shouldn't nuke + // the append path. The helper should just treat that entry as + // "too old to dedupe against" and append the new message. + const garbled: ChatMessage = { + id: "x", + role: "agent", + content: "hi", + timestamp: "not-a-real-timestamp", + }; + const fresh = createMessage("agent", "hi"); + expect(() => appendMessageDeduped([garbled], fresh)).not.toThrow(); + const next = appendMessageDeduped([garbled], fresh); + expect(next).toHaveLength(2); + }); + + it("accepts a custom dedupe window", () => { + const first = createMessage("agent", "hello"); + vi.advanceTimersByTime(500); + // Tight 100 ms window — the 500 ms-old first message falls outside. + const dup = createMessage("agent", "hello"); + const next = appendMessageDeduped([first], dup, 100); + expect(next).toHaveLength(2); + }); +}); diff --git a/canvas/src/components/tabs/chat/index.ts b/canvas/src/components/tabs/chat/index.ts index 8c9e4cbb..aa8064aa 100644 --- a/canvas/src/components/tabs/chat/index.ts +++ b/canvas/src/components/tabs/chat/index.ts @@ -1,2 +1,2 @@ -export { type ChatMessage, createMessage } from "./types"; +export { type ChatMessage, createMessage, appendMessageDeduped } from "./types"; export { extractAgentText, extractTextsFromParts, extractResponseText } from "./message-parser"; diff --git a/canvas/src/components/tabs/chat/types.ts b/canvas/src/components/tabs/chat/types.ts index 9638d12b..a5bfa3a0 100644 --- a/canvas/src/components/tabs/chat/types.ts +++ b/canvas/src/components/tabs/chat/types.ts @@ -8,3 +8,28 @@ export interface ChatMessage { export function createMessage(role: ChatMessage["role"], content: string): ChatMessage { return { id: crypto.randomUUID(), role, content, timestamp: new Date().toISOString() }; } + +// appendMessageDeduped adds a ChatMessage to `prev` unless the tail +// already contains the same (role, content) from within +// dedupeWindowMs. Collapses the case where two delivery paths race to +// render the same agent reply — e.g. the HTTP .then() handler for +// POST /a2a AND a `send_message_to_user` WebSocket push from the +// runtime, both carrying the same text. Without this guard the user +// sees two or three identical bubbles with identical timestamps. +// +// Why a time-windowed check instead of dedupe-by-id: the three delivery +// paths (HTTP response, WS A2A_RESPONSE, WS send_message_to_user) each +// mint a fresh `createMessage` with a random UUID client-side — there's +// no stable end-to-end message id yet. Content+role+time is the +// pragmatic identity. The window is short (3s) so genuine repeat +// messages ("hi", "hi") from a real user/agent still render. +export function appendMessageDeduped(prev: ChatMessage[], msg: ChatMessage, dedupeWindowMs = 3000): ChatMessage[] { + const cutoff = Date.now() - dedupeWindowMs; + const alreadyThere = prev.some((m) => { + if (m.role !== msg.role || m.content !== msg.content) return false; + const t = Date.parse(m.timestamp); + return !Number.isNaN(t) && t >= cutoff; + }); + if (alreadyThere) return prev; + return [...prev, msg]; +} diff --git a/canvas/src/components/tabs/config/__tests__/secrets-section.test.tsx b/canvas/src/components/tabs/config/__tests__/secrets-section.test.tsx new file mode 100644 index 00000000..1777feb0 --- /dev/null +++ b/canvas/src/components/tabs/config/__tests__/secrets-section.test.tsx @@ -0,0 +1,139 @@ +// @vitest-environment jsdom +import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; +import { render, screen, waitFor, cleanup } from "@testing-library/react"; +import { SecretsSection } from "../secrets-section"; + +// Tests for SecretsSection — locks in the fix that the secret-slot +// list is driven by the workspace's `runtime_config.required_env` +// instead of a hardcoded COMMON_KEYS list. +// +// Before the fix the component always rendered Anthropic / OpenAI / +// Google / SERP / Model Override slots regardless of template. For a +// Hermes workspace that declares MINIMAX_API_KEY that meant the user +// saw five irrelevant slots and no slot for the key they actually +// needed. + +vi.mock("@/lib/api", () => ({ + api: { + get: vi.fn().mockResolvedValue([]), + put: vi.fn().mockResolvedValue({}), + post: vi.fn().mockResolvedValue({}), + del: vi.fn().mockResolvedValue({}), + patch: vi.fn().mockResolvedValue({}), + }, +})); + +vi.mock("@/lib/canvas-actions", () => ({ + markAllWorkspacesNeedRestart: vi.fn(), +})); + +// The Section wrapper is collapsible with `defaultOpen={false}`. For +// tests we want the content visible without a click — replace the +// wrapper with a passthrough that always renders children. +vi.mock("../form-inputs", async () => { + const actual = await vi.importActual("../form-inputs"); + return { + ...actual, + Section: ({ children }: { children: React.ReactNode }) =>
{children}
, + }; +}); + +beforeEach(() => { + vi.clearAllMocks(); +}); + +afterEach(() => { + cleanup(); +}); + +describe("SecretsSection — template-driven slots", () => { + it("renders exactly the slots the template declares in required_env", async () => { + render( + , + ); + await waitFor(() => { + expect(screen.getByText("MINIMAX_API_KEY")).toBeTruthy(); + }); + // Hardcoded slots that were there before this fix must NOT appear + // when the template doesn't ask for them. + expect(screen.queryByText("ANTHROPIC_API_KEY")).toBeNull(); + expect(screen.queryByText("OPENAI_API_KEY")).toBeNull(); + expect(screen.queryByText("GOOGLE_API_KEY")).toBeNull(); + expect(screen.queryByText("SERP_API_KEY")).toBeNull(); + }); + + it("uses the friendly label from KNOWN_LABELS for a well-known name", async () => { + render( + , + ); + await waitFor(() => { + expect(screen.getByText("Anthropic API Key")).toBeTruthy(); + }); + }); + + it("humanises an unknown env var name into a readable label", async () => { + render( + , + ); + await waitFor(() => { + // "Minimax API Key" — "API" acronym preserved, "Minimax" title-cased. + expect(screen.getByText("Minimax API Key")).toBeTruthy(); + }); + }); + + it("preserves API / URL acronyms when humanising", async () => { + render( + , + ); + await waitFor(() => { + expect(screen.getByText("Zhipu API Key")).toBeTruthy(); + expect(screen.getByText("Custom Model URL")).toBeTruthy(); + }); + }); + + it("deduplicates repeated entries in required_env", async () => { + render( + , + ); + await waitFor(() => { + // Only one row for the repeated name. + const matches = screen.getAllByText("MINIMAX_API_KEY"); + expect(matches).toHaveLength(1); + expect(screen.getByText("OpenAI API Key")).toBeTruthy(); + }); + }); + + it("falls back to the legacy common-keys list when required_env is missing", async () => { + // Backward compat: old workspaces without a template-set + // required_env still see Anthropic/OpenAI/Google/SERP slots. + render(); + await waitFor(() => { + expect(screen.getByText("Anthropic API Key")).toBeTruthy(); + }); + expect(screen.getByText("OpenAI API Key")).toBeTruthy(); + expect(screen.getByText("Google AI API Key")).toBeTruthy(); + }); + + it("falls back to the legacy common-keys list when required_env is empty", async () => { + render(); + await waitFor(() => { + expect(screen.getByText("Anthropic API Key")).toBeTruthy(); + }); + }); + + it("does not fall back when required_env has at least one entry", async () => { + // Single-entry required_env must NOT spill legacy slots into the UI. + render(); + await waitFor(() => { + expect(screen.getByText("MINIMAX_API_KEY")).toBeTruthy(); + }); + expect(screen.queryByText("Anthropic API Key")).toBeNull(); + expect(screen.queryByText("OpenAI API Key")).toBeNull(); + }); +}); diff --git a/canvas/src/components/tabs/config/secrets-section.tsx b/canvas/src/components/tabs/config/secrets-section.tsx index 6ffd2a15..b8286273 100644 --- a/canvas/src/components/tabs/config/secrets-section.tsx +++ b/canvas/src/components/tabs/config/secrets-section.tsx @@ -13,14 +13,59 @@ interface SecretEntry { scope?: "global" | "workspace"; } -const COMMON_KEYS = [ - { key: "ANTHROPIC_API_KEY", label: "Anthropic API Key" }, - { key: "OPENAI_API_KEY", label: "OpenAI API Key" }, - { key: "GOOGLE_API_KEY", label: "Google AI API Key" }, - { key: "SERP_API_KEY", label: "SERP API Key" }, - { key: "MODEL_PROVIDER", label: "Model Override (e.g. anthropic:claude-sonnet-4-6)" }, +// Human-friendly labels for well-known env-var names. Used to render +// familiar copy ("Anthropic API Key") instead of the raw variable name +// when the template declares one of these. Unknown names (e.g. +// MINIMAX_API_KEY, ZHIPU_API_KEY) fall through to humanizeKeyName below +// — a generic "Minimax API Key" label is better than no label at all. +// +// SECRETS_WHEN_NO_TEMPLATE is the fallback set shown only when a +// workspace's template doesn't declare any required_env (legacy / +// bare-runtime case). In the normal flow the list is driven by +// runtime_config.required_env passed in from the Config tab. +const KNOWN_LABELS: Record = { + ANTHROPIC_API_KEY: "Anthropic API Key", + OPENAI_API_KEY: "OpenAI API Key", + GOOGLE_API_KEY: "Google AI API Key", + SERP_API_KEY: "SERP API Key", + OPENROUTER_API_KEY: "OpenRouter API Key", + HERMES_API_KEY: "Hermes API Key (Nous Research)", + GROQ_API_KEY: "Groq API Key", + CEREBRAS_API_KEY: "Cerebras API Key", + MINIMAX_API_KEY: "Minimax API Key", + MODEL_PROVIDER: "Model Override (e.g. anthropic:claude-sonnet-4-6)", +}; + +const SECRETS_WHEN_NO_TEMPLATE = [ + "ANTHROPIC_API_KEY", + "OPENAI_API_KEY", + "GOOGLE_API_KEY", + "SERP_API_KEY", + "MODEL_PROVIDER", ]; +// humanizeKeyName converts SCREAMING_SNAKE_CASE into "Title Case Words" +// so templates that declare uncommon env var names still get a readable +// label. "MINIMAX_API_KEY" → "Minimax API Key". Preserves "API" / "URL" +// acronyms via the normalize step. +function humanizeKeyName(key: string): string { + const words = key.toLowerCase().split("_").filter(Boolean); + return words + .map((w) => { + const upper = w.toUpperCase(); + // Keep common acronyms upper-case. + if (["API", "URL", "URI", "ID", "SDK", "MCP", "LLM", "AI"].includes(upper)) { + return upper; + } + return w.charAt(0).toUpperCase() + w.slice(1); + }) + .join(" "); +} + +function labelForKey(key: string): string { + return KNOWN_LABELS[key] ?? humanizeKeyName(key); +} + function ScopeBadge({ scope }: { scope: "global" | "workspace" | "override" }) { if (scope === "global") { return Global; @@ -147,7 +192,7 @@ function CustomSecretRow({ secretKey, scope, globalMode, onSave, onDelete }: { ); } -export function SecretsSection({ workspaceId }: { workspaceId: string }) { +export function SecretsSection({ workspaceId, requiredEnv }: { workspaceId: string; requiredEnv?: string[] }) { const [mergedSecrets, setMergedSecrets] = useState([]); const [globalSecrets, setGlobalSecrets] = useState([]); const [loading, setLoading] = useState(true); @@ -218,9 +263,27 @@ export function SecretsSection({ workspaceId }: { workspaceId: string }) { // For global view: use global secrets only const activeSecrets = globalMode ? globalSecrets : mergedSecrets; - // Split into common keys and custom keys - const commonKeySet = new Set(COMMON_KEYS.map((c) => c.key)); - const customSecrets = activeSecrets.filter((s) => !commonKeySet.has(s.key)); + // Template-driven slots: render one labelled row per env var the + // template declares. Falls back to a legacy common-keys list when + // the template has nothing (older workspaces / bare runtimes) so + // the Secrets section is never empty. + const templateKeys = (requiredEnv && requiredEnv.length > 0) + ? requiredEnv + : SECRETS_WHEN_NO_TEMPLATE; + + // Deduplicate while preserving order — a template that lists the + // same key twice shouldn't render two rows. + const seen = new Set(); + const slotKeys = templateKeys.filter((k) => { + if (seen.has(k)) return false; + seen.add(k); + return true; + }); + + // Split into template-slot keys and user-added custom keys so the + // latter still surface even when not declared by the template. + const slotKeySet = new Set(slotKeys); + const customSecrets = activeSecrets.filter((s) => !slotKeySet.has(s.key)); return (
@@ -256,15 +319,16 @@ export function SecretsSection({ workspaceId }: { workspaceId: string }) { )} - {/* Common keys */} - {COMMON_KEYS.map(({ key, label }) => { + {/* Template-declared slots — one labelled row per env var + the workspace actually needs. Driven by runtime_config.required_env. */} + {slotKeys.map((key) => { const entry = globalMode ? globalSecrets.find((s) => s.key === key) : mergedByKey.get(key); const isSet = !!entry?.has_value; const scope = globalMode ? undefined : (entry ? getScope(entry) : undefined); return ( - Promise.reject(new Error("no json")), + text: () => Promise.resolve(text), + } as unknown as Response); +} + +function setHostname(host: string) { + Object.defineProperty(window, "location", { + configurable: true, + value: { ...window.location, hostname: host }, + }); +} + +describe("api 401 handling", () => { + let redirectSpy: ReturnType; + + beforeEach(() => { + vi.clearAllMocks(); + vi.resetModules(); + redirectSpy = vi.fn(); + vi.doMock("../auth", () => ({ + redirectToLogin: redirectSpy, + // Stub siblings so any other import of ../auth in the chain + // (AuthGate, TermsGate, etc.) still resolves. + fetchSession: vi.fn().mockResolvedValue(null), + })); + }); + + afterEach(() => { + vi.doUnmock("../auth"); + vi.resetModules(); + }); + + it("redirects to login on SaaS tenant hostname", async () => { + setHostname("acme.moleculesai.app"); + mockFailure(401, '{"error":"admin auth required"}'); + + const { api } = await import("../api"); + await expect(api.get("/workspaces")).rejects.toThrow(/Session expired/); + expect(redirectSpy).toHaveBeenCalledWith("sign-in"); + }); + + it("does NOT redirect on localhost — throws a real error instead", async () => { + setHostname("localhost"); + mockFailure(401, '{"error":"admin auth required"}'); + + const { api } = await import("../api"); + await expect(api.get("/workspaces")).rejects.toThrow(/401/); + expect(redirectSpy).not.toHaveBeenCalled(); + }); + + it("does NOT redirect on a LAN hostname", async () => { + setHostname("192.168.1.74"); + mockFailure(401, '{"error":"missing workspace auth token"}'); + + const { api } = await import("../api"); + await expect(api.get("/workspaces/abc/activity")).rejects.toThrow(/401/); + expect(redirectSpy).not.toHaveBeenCalled(); + }); + + it("does NOT redirect on reserved subdomains (app.moleculesai.app)", async () => { + // `app` is in reservedSubdomains — getTenantSlug returns "" there. + // Users landing on app.moleculesai.app (pre-tenant-selection) must + // see the real 401 error rather than loop on login. + setHostname("app.moleculesai.app"); + mockFailure(401, '{"error":"admin auth required"}'); + + const { api } = await import("../api"); + await expect(api.get("/workspaces")).rejects.toThrow(/401/); + expect(redirectSpy).not.toHaveBeenCalled(); + }); +}); diff --git a/canvas/src/lib/api.ts b/canvas/src/lib/api.ts index 0d1938b3..86085081 100644 --- a/canvas/src/lib/api.ts +++ b/canvas/src/lib/api.ts @@ -39,11 +39,20 @@ async function request( signal: AbortSignal.timeout(DEFAULT_TIMEOUT_MS), }); if (res.status === 401) { - // Session expired or credentials lost — redirect to login once. - // Import dynamically to avoid circular dependency with auth.ts. - const { redirectToLogin } = await import("./auth"); - redirectToLogin("sign-in"); - throw new Error("Session expired — redirecting to login"); + // Session expired or credentials lost. On SaaS (tenant subdomain) + // the login page lives at /cp/auth/login and is mounted by the + // control-plane reverse proxy — redirect. On self-hosted / local + // dev / Vercel preview there IS no /cp/* mount, so redirecting + // would navigate to a 404 ("404 page not found") instead of the + // real error the user should see. In that case, throw instead + // and let the caller render a meaningful failure (retry button, + // error banner, etc.). + if (slug) { + const { redirectToLogin } = await import("./auth"); + redirectToLogin("sign-in"); + throw new Error("Session expired — redirecting to login"); + } + throw new Error(`API ${method} ${path}: 401 ${await res.text()}`); } if (!res.ok) { const text = await res.text(); diff --git a/tests/e2e/test_dev_mode.sh b/tests/e2e/test_dev_mode.sh new file mode 100755 index 00000000..4877bf8b --- /dev/null +++ b/tests/e2e/test_dev_mode.sh @@ -0,0 +1,140 @@ +#!/usr/bin/env bash +# E2E regression suite for the local-dev escape hatches added in +# fix/quickstart-bugless. These cover the exact user-facing breakages +# that dropped out of the partial squash-merge of PR #1871: +# +# 1. GET /workspaces returns 200 with no bearer after tokens exist in +# the DB — exercises the AdminAuth Tier-1b dev-mode hatch +# (middleware/devmode.go::isDevModeFailOpen). +# 2. GET /workspaces/:id/activity returns 200 with no bearer — the +# same hatch applied to WorkspaceAuth. +# 3. POST /workspaces/:id/a2a doesn't 502-SSRF on a loopback workspace +# URL — exercises handlers/ssrf.go::devModeAllowsLoopback. +# 4. GET /org/templates returns the curated set populated by +# clone-manifest.sh — exercises infra/scripts/setup.sh + the +# ListTemplates failure logging in handlers/org.go. +# +# Requires: platform running on :8080 with MOLECULE_ENV=development and +# ADMIN_TOKEN unset. Matches the README quickstart env. +# +# Usage: +# bash tests/e2e/test_dev_mode.sh +set -euo pipefail + +# shellcheck source=_lib.sh +source "$(dirname "$0")/_lib.sh" + +PASS=0 +FAIL=0 + +fail() { + echo "FAIL: $1" + FAIL=$((FAIL + 1)) +} + +pass() { + echo "PASS: $1" + PASS=$((PASS + 1)) +} + +check_http() { + local desc="$1" expected="$2" actual="$3" + if [ "$actual" = "$expected" ]; then + pass "$desc (HTTP $actual)" + else + fail "$desc — expected HTTP $expected, got $actual" + fi +} + +echo "=== Dev-mode escape-hatch regression tests ===" +echo "" + +# Pre-test: ensure MOLECULE_ENV=development and no ADMIN_TOKEN are in the +# platform's env. The request path doesn't let us read the platform's +# env directly, but we can verify the hatch is active by confirming the +# expected behaviour under the conditions the test otherwise sets up. + +e2e_cleanup_all_workspaces + +# ---------------------------------------------------------------------- +# Section 1 — AdminAuth dev-mode hatch +# ---------------------------------------------------------------------- +# Before fix: once any workspace had tokens in the DB, GET /workspaces +# closed to unauthenticated callers and the Canvas broke. The hatch +# keeps it open specifically in dev mode. + +echo "--- Section 1: AdminAuth dev-mode hatch ---" + +R=$(curl -s -o /dev/null -w "%{http_code}" "$BASE/workspaces") +check_http "GET /workspaces (empty DB)" "200" "$R" + +# Create a workspace so tokens land in the DB. +R=$(curl -s -w "\n%{http_code}" -X POST "$BASE/workspaces" \ + -H "Content-Type: application/json" \ + -d '{"name":"Dev-Mode-Test","tier":1}') +CODE=$(echo "$R" | tail -n1) +BODY=$(echo "$R" | sed '$d') +check_http "POST /workspaces (create)" "201" "$CODE" + +WS_ID=$(echo "$BODY" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))" 2>/dev/null || true) +if [ -z "$WS_ID" ]; then + fail "Could not extract workspace ID from create response" + echo "=== Results: $PASS passed, $FAIL failed ===" + exit 1 +fi + +# Mint a test-token so AdminAuth now sees a live token on record. On +# pre-fix builds the next /workspaces call would 401 — on post-fix it +# must stay 200 because MOLECULE_ENV=development + ADMIN_TOKEN unset. +curl -s -o /dev/null "$BASE/admin/workspaces/$WS_ID/test-token" + +R=$(curl -s -o /dev/null -w "%{http_code}" "$BASE/workspaces") +check_http "GET /workspaces (after token minted, no bearer)" "200" "$R" + +# ---------------------------------------------------------------------- +# Section 2 — WorkspaceAuth dev-mode hatch +# ---------------------------------------------------------------------- +# Before fix: /workspaces/:id/activity 401'd once tokens existed — +# the Canvas side panel's chat history load broke. + +echo "" +echo "--- Section 2: WorkspaceAuth dev-mode hatch ---" + +R=$(curl -s -o /dev/null -w "%{http_code}" \ + "$BASE/workspaces/$WS_ID/activity?type=a2a_receive&limit=50") +check_http "GET /workspaces/:id/activity (no bearer)" "200" "$R" + +R=$(curl -s -o /dev/null -w "%{http_code}" \ + "$BASE/workspaces/$WS_ID/delegations") +check_http "GET /workspaces/:id/delegations (no bearer)" "200" "$R" + +R=$(curl -s -o /dev/null -w "%{http_code}" "$BASE/approvals/pending") +check_http "GET /approvals/pending (no bearer)" "200" "$R" + +# ---------------------------------------------------------------------- +# Section 3 — Template registry populated by setup.sh +# ---------------------------------------------------------------------- +# Before fix: setup.sh didn't run clone-manifest.sh so the template +# palette was empty and the molecule-dev in-tree copy was broken. + +echo "" +echo "--- Section 3: Template registry ---" + +R=$(curl -s "$BASE/org/templates") +COUNT=$(echo "$R" | python3 -c "import sys,json; print(len(json.load(sys.stdin)))" 2>/dev/null || echo "0") +if [ "$COUNT" -gt 0 ]; then + pass "GET /org/templates returns $COUNT template(s)" +else + fail "GET /org/templates returned empty list — is clone-manifest.sh run? (bash scripts/clone-manifest.sh manifest.json workspace-configs-templates/ org-templates/ plugins/)" +fi + +# ---------------------------------------------------------------------- +# Cleanup +# ---------------------------------------------------------------------- +curl -s -X DELETE "$BASE/workspaces/$WS_ID?confirm=true" > /dev/null || true + +echo "" +echo "=== Results: $PASS passed, $FAIL failed ===" +if [ "$FAIL" -gt 0 ]; then + exit 1 +fi diff --git a/workspace-server/internal/handlers/ssrf.go b/workspace-server/internal/handlers/ssrf.go index 42e3ff3e..55c76c9d 100644 --- a/workspace-server/internal/handlers/ssrf.go +++ b/workspace-server/internal/handlers/ssrf.go @@ -4,10 +4,32 @@ import ( "fmt" "net" "net/url" + "os" "path/filepath" "strings" ) +// devModeAllowsLoopback reports whether the SSRF defence should permit +// http://127.0.0.1: workspace URLs. True only when MOLECULE_ENV is +// a dev value — this is the same convention the middleware dev-mode +// escape hatch uses (handlers/admin_test_token.go, middleware/devmode.go). +// +// Why: on a self-hosted Docker setup the provisioner publishes each +// container's A2A port on 127.0.0.1: and writes that URL +// to workspaces.url. The A2A proxy on the host platform needs to POST +// to that same 127.0.0.1: to reach the container — there's no +// other reachable address. SaaS never hits this branch because hosted +// tenants run MOLECULE_ENV=production (enforced by the crypto strict- +// init path) and the workspace URL is the tenant EC2's VPC-private IP. +// +// The relaxation is narrowly scoped to loopback IPv4 + ::1 — the +// metadata, CGNAT, TEST-NET, and link-local guards stay blocked even +// in dev mode. +func devModeAllowsLoopback() bool { + env := strings.ToLower(strings.TrimSpace(os.Getenv("MOLECULE_ENV"))) + return env == "development" || env == "dev" +} + // isSafeURL validates that a URL resolves to a publicly-routable address, // preventing A2A requests from being redirected to internal/cloud-metadata // infrastructure (SSRF, CWE-918). Workspace URLs come from DB/Redis caches @@ -30,7 +52,7 @@ func isSafeURL(rawURL string) error { return fmt.Errorf("empty hostname") } if ip := net.ParseIP(host); ip != nil { - if (ip.IsLoopback() && !testAllowLoopback) || ip.IsUnspecified() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() || ip.IsInterfaceLocalMulticast() { + if (ip.IsLoopback() && !testAllowLoopback && !devModeAllowsLoopback()) || ip.IsUnspecified() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() || ip.IsInterfaceLocalMulticast() { return fmt.Errorf("forbidden loopback/unspecified/link-local IP: %s", ip) } if isPrivateOrMetadataIP(ip) { @@ -50,7 +72,7 @@ func isSafeURL(rawURL string) error { if ip == nil { continue } - if (ip.IsLoopback() && !testAllowLoopback) || ip.IsUnspecified() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() || ip.IsInterfaceLocalMulticast() { + if (ip.IsLoopback() && !testAllowLoopback && !devModeAllowsLoopback()) || ip.IsUnspecified() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() || ip.IsInterfaceLocalMulticast() { return fmt.Errorf("hostname %s resolves to forbidden link-local/loopback IP: %s", host, ip) } if isPrivateOrMetadataIP(ip) { @@ -117,8 +139,9 @@ func isPrivateOrMetadataIP(ip net.IP) bool { // IPv6 path — .To4() was nil so this is a real v6 address. // ::1 (loopback) — treat as blocked here too for defense-in-depth, - // unless tests have opted into loopback via testAllowLoopback. - if ip.IsLoopback() && !testAllowLoopback { + // unless tests have opted into loopback via testAllowLoopback OR + // MOLECULE_ENV is a dev value (mirrors the v4 relaxation above). + if ip.IsLoopback() && !testAllowLoopback && !devModeAllowsLoopback() { return true } // Link-local fe80::/10 — always blocked. diff --git a/workspace-server/internal/handlers/ssrf_test.go b/workspace-server/internal/handlers/ssrf_test.go index 35a5ef47..85412760 100644 --- a/workspace-server/internal/handlers/ssrf_test.go +++ b/workspace-server/internal/handlers/ssrf_test.go @@ -234,4 +234,96 @@ func TestIsSafeURL(t *testing.T) { } }) } +} + +// Dev-mode loopback relaxation — lock in the local-dev SSRF escape +// hatch. The provisioner on a self-hosted Docker setup publishes +// workspace A2A ports on 127.0.0.1:, so the A2A proxy must +// POST to loopback. Without this relaxation every Canvas chat send +// returned 502 on the host-run platform. +// +// SaaS safety: the relaxation fires ONLY when MOLECULE_ENV is a dev +// value. Production (MOLECULE_ENV=production) must continue to block +// loopback. Every other blocked range (metadata 169.254/16, TEST-NET, +// CGNAT, link-local) must stay blocked even in dev mode. + +func TestIsSafeURL_DevModeAllowsLoopback(t *testing.T) { + t.Setenv("MOLECULE_ENV", "development") + cases := []string{ + "http://127.0.0.1:59806", + "http://127.0.0.1:8000/a2a", + "http://[::1]:8000", + } + for _, u := range cases { + t.Run(u, func(t *testing.T) { + if err := isSafeURL(u); err != nil { + t.Errorf("dev mode should allow %q, got %v", u, err) + } + }) + } +} + +func TestIsSafeURL_DevModeShortAlias(t *testing.T) { + t.Setenv("MOLECULE_ENV", "dev") + if err := isSafeURL("http://127.0.0.1:59806"); err != nil { + t.Errorf("MOLECULE_ENV=dev should allow loopback, got %v", err) + } +} + +func TestIsSafeURL_Production_StillBlocksLoopback(t *testing.T) { + // SaaS-safety guarantee: production tenants must keep blocking + // loopback URLs. A workspace registering a loopback URL in prod + // is almost certainly an attack targeting co-located admin + // services — the SSRF defence MUST keep firing. + t.Setenv("MOLECULE_ENV", "production") + if err := isSafeURL("http://127.0.0.1:8080"); err == nil { + t.Error("production must block loopback, got nil error") + } +} + +func TestIsSafeURL_DevMode_StillBlocksOtherRanges(t *testing.T) { + // The relaxation is narrow — only loopback. Metadata / CGNAT / + // TEST-NET / link-local must still fire in dev mode. A malicious + // workspace in a dev install must NOT reach cloud metadata. + t.Setenv("MOLECULE_ENV", "development") + stillBlocked := []string{ + "http://169.254.169.254/latest/meta-data/", // AWS IMDS + "http://192.0.2.1:8080", // TEST-NET-1 + "http://100.64.0.1:8080", // CGNAT + "http://0.0.0.0:8080", // unspecified + "http://224.0.0.1/", // link-local multicast + } + for _, u := range stillBlocked { + t.Run(u, func(t *testing.T) { + if err := isSafeURL(u); err == nil { + t.Errorf("dev mode must still block %q", u) + } + }) + } +} + +func TestDevModeAllowsLoopback_Predicate(t *testing.T) { + cases := []struct { + name, env string + want bool + }{ + {"development", "development", true}, + {"dev", "dev", true}, + {"Development (case)", "Development", true}, + {"DEV (case)", "DEV", true}, + {" dev (whitespace)", " dev ", true}, + {"production", "production", false}, + {"staging", "staging", false}, + {"empty string", "", false}, + {"typo devel", "devel", false}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Setenv("MOLECULE_ENV", tc.env) + got := devModeAllowsLoopback() + if got != tc.want { + t.Errorf("devModeAllowsLoopback() with MOLECULE_ENV=%q = %v, want %v", tc.env, got, tc.want) + } + }) + } } \ No newline at end of file