Merge remote-tracking branch 'origin/staging' into fix/restore-quickstart-plus-hotfixes
This commit is contained in:
commit
d53583f9c6
38
.github/workflows/e2e-api.yml
vendored
38
.github/workflows/e2e-api.yml
vendored
@ -1,35 +1,21 @@
|
||||
name: E2E API Smoke Test
|
||||
# Extracted from ci.yml so workflow-level concurrency can protect this job
|
||||
# from run-level cancellation (issue #458).
|
||||
#
|
||||
# Problem: the job-level `concurrency.cancel-in-progress: false` in ci.yml
|
||||
# prevented *sibling* E2E jobs from killing each other, but GitHub still
|
||||
# cancelled the parent *workflow run* when a new push arrived. Since the job
|
||||
# lived inside that run, it got cancelled too.
|
||||
#
|
||||
# Fix: a dedicated workflow gets its own concurrency group at the workflow
|
||||
# level. New pushes to the same branch queue here instead of cancelling.
|
||||
# Fast jobs (platform-build, canvas-build, etc.) stay in ci.yml and continue
|
||||
# to benefit from run-level cancellation for quick feedback.
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
branches: [main, staging]
|
||||
paths:
|
||||
- 'workspace-server/**'
|
||||
- 'tests/e2e/**'
|
||||
- '.github/workflows/e2e-api.yml'
|
||||
pull_request:
|
||||
branches: [main]
|
||||
branches: [main, staging]
|
||||
paths:
|
||||
- 'workspace-server/**'
|
||||
- 'tests/e2e/**'
|
||||
- '.github/workflows/e2e-api.yml'
|
||||
|
||||
# Workflow-level concurrency: new runs queue rather than cancel.
|
||||
# `cancel-in-progress: false` is load-bearing — without it GitHub would still
|
||||
# cancel this run when the next push arrives, defeating the whole fix.
|
||||
# The group key includes github.ref so PRs don't compete with main.
|
||||
concurrency:
|
||||
group: e2e-api-${{ github.ref }}
|
||||
cancel-in-progress: false
|
||||
@ -39,12 +25,6 @@ jobs:
|
||||
name: E2E API Smoke Test
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 15
|
||||
# Postgres + Redis run as sibling containers via `docker run`. Could
|
||||
# switch to a `services:` block now that we're on Linux, but the
|
||||
# explicit start-and-wait gives us pg_isready / PING readiness checks
|
||||
# that match the 30-tick timeouts the rest of the job expects. Ports
|
||||
# 15432/16379 avoid collision with anything the host may already have
|
||||
# on the standard ports.
|
||||
env:
|
||||
DATABASE_URL: postgres://dev:dev@localhost:15432/molecule?sslmode=disable
|
||||
REDIS_URL: redis://localhost:16379
|
||||
@ -61,12 +41,7 @@ jobs:
|
||||
- name: Start Postgres (docker)
|
||||
run: |
|
||||
docker rm -f "$PG_CONTAINER" 2>/dev/null || true
|
||||
docker run -d --name "$PG_CONTAINER" \
|
||||
-e POSTGRES_USER=dev \
|
||||
-e POSTGRES_PASSWORD=dev \
|
||||
-e POSTGRES_DB=molecule \
|
||||
-p 15432:5432 \
|
||||
postgres:16
|
||||
docker run -d --name "$PG_CONTAINER" -e POSTGRES_USER=dev -e POSTGRES_PASSWORD=dev -e POSTGRES_DB=molecule -p 15432:5432 postgres:16
|
||||
for i in $(seq 1 30); do
|
||||
if docker exec "$PG_CONTAINER" pg_isready -U dev >/dev/null 2>&1; then
|
||||
echo "Postgres ready after ${i}s"
|
||||
@ -89,6 +64,7 @@ jobs:
|
||||
sleep 1
|
||||
done
|
||||
echo "::error::Redis did not become ready in 15s"
|
||||
docker logs "$REDIS_CONTAINER" || true
|
||||
exit 1
|
||||
- name: Build platform
|
||||
working-directory: workspace-server
|
||||
@ -111,16 +87,14 @@ jobs:
|
||||
cat workspace-server/platform.log || true
|
||||
exit 1
|
||||
- name: Assert migrations applied
|
||||
# Migrations auto-run at platform boot. Fail fast if they silently
|
||||
# didn't — catches future migration-author mistakes before the E2E run.
|
||||
run: |
|
||||
tables=$(docker exec "$PG_CONTAINER" psql -U dev -d molecule -tAc "SELECT count(*) FROM information_schema.tables WHERE table_schema='public' AND table_name='workspaces'")
|
||||
if [ "$tables" != "1" ]; then
|
||||
echo "::error::Migrations did not apply — 'workspaces' table missing"
|
||||
echo "::error::Migrations did not apply"
|
||||
cat workspace-server/platform.log || true
|
||||
exit 1
|
||||
fi
|
||||
echo "Migrations OK (workspaces table present)"
|
||||
echo "Migrations OK"
|
||||
- name: Run E2E API tests
|
||||
run: bash tests/e2e/test_api.sh
|
||||
- name: Dump platform log on failure
|
||||
|
||||
@ -387,7 +387,6 @@ function AllKeysModal({
|
||||
}) {
|
||||
const [entries, setEntries] = useState<KeyEntry[]>([]);
|
||||
const [globalError, setGlobalError] = useState<string | null>(null);
|
||||
const firstInputRef = useRef<HTMLInputElement>(null);
|
||||
|
||||
useEffect(() => {
|
||||
if (!open) return;
|
||||
@ -403,12 +402,6 @@ function AllKeysModal({
|
||||
setGlobalError(null);
|
||||
}, [open, missingKeys]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!open) return;
|
||||
const raf = requestAnimationFrame(() => firstInputRef.current?.focus());
|
||||
return () => cancelAnimationFrame(raf);
|
||||
}, [open]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!open) return;
|
||||
const handler = (e: KeyboardEvent) => {
|
||||
@ -471,6 +464,15 @@ function AllKeysModal({
|
||||
onKeysAdded();
|
||||
}, [entries, onKeysAdded]);
|
||||
|
||||
// Focus trap: auto-focus first input when modal opens
|
||||
useEffect(() => {
|
||||
if (!open) return;
|
||||
const timer = requestAnimationFrame(() => {
|
||||
document.getElementById("missing-keys-title")?.focus();
|
||||
});
|
||||
return () => cancelAnimationFrame(timer);
|
||||
}, [open]);
|
||||
|
||||
if (!open) return null;
|
||||
|
||||
const allSaved = entries.length > 0 && entries.every((e) => e.saved);
|
||||
@ -482,8 +484,8 @@ function AllKeysModal({
|
||||
return (
|
||||
<div className="fixed inset-0 z-50 flex items-center justify-center">
|
||||
<div
|
||||
aria-hidden="true"
|
||||
className="absolute inset-0 bg-black/70 backdrop-blur-sm"
|
||||
aria-hidden="true"
|
||||
onClick={onCancel}
|
||||
/>
|
||||
|
||||
@ -530,7 +532,7 @@ function AllKeysModal({
|
||||
</div>
|
||||
{entry.saved && (
|
||||
<span className="text-[9px] text-emerald-400 bg-emerald-900/30 px-1.5 py-0.5 rounded flex items-center gap-1">
|
||||
<svg width="8" height="8" viewBox="0 0 8 8" fill="none" aria-hidden="true">
|
||||
<svg width="8" height="8" viewBox="0 0 8 8" fill="none">
|
||||
<path d="M1.5 4L3.5 6L6.5 2" stroke="currentColor" strokeWidth="1.2" strokeLinecap="round" strokeLinejoin="round" />
|
||||
</svg>
|
||||
Saved
|
||||
@ -545,7 +547,7 @@ function AllKeysModal({
|
||||
onChange={(e) => updateEntry(index, { value: e.target.value.trimStart() })}
|
||||
placeholder={entry.key.includes("API_KEY") ? "sk-..." : "Enter value"}
|
||||
type="password"
|
||||
ref={index === 0 ? firstInputRef : undefined}
|
||||
autoFocus={index === 0}
|
||||
onKeyDown={(e) => {
|
||||
if (e.key === "Enter" && entry.value.trim()) {
|
||||
handleSaveKey(index);
|
||||
|
||||
@ -19,11 +19,18 @@ vi.mock("@/lib/api", () => ({
|
||||
api: { get: vi.fn(), put: vi.fn(), patch: vi.fn(), post: vi.fn() },
|
||||
}));
|
||||
|
||||
const mockCanvasState = {
|
||||
restartWorkspace: vi.fn(),
|
||||
updateNodeData: vi.fn(),
|
||||
};
|
||||
|
||||
vi.mock("@/store/canvas", () => ({
|
||||
useCanvasStore: vi.fn(() => ({
|
||||
restartWorkspace: vi.fn(),
|
||||
updateNodeData: vi.fn(),
|
||||
})),
|
||||
useCanvasStore: Object.assign(
|
||||
vi.fn((selector: (s: Record<string, unknown>) => unknown) =>
|
||||
selector(mockCanvasState as Record<string, unknown>)
|
||||
),
|
||||
{ getState: () => mockCanvasState }
|
||||
),
|
||||
}));
|
||||
|
||||
vi.mock("../tabs/config/secrets-section", () => ({
|
||||
|
||||
@ -48,20 +48,12 @@ const mockStore = {
|
||||
nodes: [] as Array<{ id: string; data: { parentId: string | null } }>,
|
||||
};
|
||||
|
||||
// useCanvasStore.getState() is called directly by ContextMenu to read `nodes`
|
||||
// for parent-filtering (see ContextMenu.tsx childNodes computation). The mock
|
||||
// must expose both the selector-calling function form AND the .getState()
|
||||
// form so production code using either pattern doesn't hit "not a function".
|
||||
// Factory body runs under vi.mock's hoist — cannot reference outer scope,
|
||||
// so we build the mock function inside and reach `mockStore` via `globalThis`.
|
||||
vi.mock("@/store/canvas", () => {
|
||||
const fn = vi.fn((selector: (s: typeof mockStore) => unknown) =>
|
||||
selector(mockStore),
|
||||
);
|
||||
return {
|
||||
useCanvasStore: Object.assign(fn, { getState: () => mockStore }),
|
||||
};
|
||||
});
|
||||
vi.mock("@/store/canvas", () => ({
|
||||
useCanvasStore: Object.assign(
|
||||
vi.fn((selector: (s: typeof mockStore) => unknown) => selector(mockStore)),
|
||||
{ getState: () => mockStore }
|
||||
),
|
||||
}));
|
||||
|
||||
// ── Component under test — imported AFTER mocks ───────────────────────────────
|
||||
import { ContextMenu } from "../ContextMenu";
|
||||
|
||||
@ -85,7 +85,7 @@ describe("MissingKeysModal — WCAG 2.1 dialog accessibility", () => {
|
||||
const backdrop = document.querySelector('[aria-hidden="true"]');
|
||||
expect(backdrop).toBeTruthy();
|
||||
// Verify the backdrop is the full-screen overlay (has bg-black/70)
|
||||
expect(backdrop?.className).toContain("bg-black");
|
||||
expect(backdrop?.className).toContain("bg-black/70");
|
||||
});
|
||||
|
||||
it("decorative warning SVG in header has aria-hidden='true'", () => {
|
||||
|
||||
@ -26,9 +26,16 @@ vi.mock("@/lib/api", () => ({
|
||||
},
|
||||
}));
|
||||
|
||||
const mockCanvasTabState = {
|
||||
setPanelTab: vi.fn(),
|
||||
};
|
||||
|
||||
vi.mock("@/store/canvas", () => ({
|
||||
useCanvasStore: vi.fn((selector: (s: Record<string, unknown>) => unknown) =>
|
||||
selector({ setPanelTab: vi.fn() })
|
||||
useCanvasStore: Object.assign(
|
||||
vi.fn((selector: (s: Record<string, unknown>) => unknown) =>
|
||||
selector(mockCanvasTabState as Record<string, unknown>)
|
||||
),
|
||||
{ getState: () => mockCanvasTabState }
|
||||
),
|
||||
summarizeWorkspaceCapabilities: vi.fn(() => ({ skills: [], tools: [] })),
|
||||
}));
|
||||
|
||||
181
canvas/src/components/tabs/__tests__/ConfigTab.hermes.test.tsx
Normal file
181
canvas/src/components/tabs/__tests__/ConfigTab.hermes.test.tsx
Normal file
@ -0,0 +1,181 @@
|
||||
// @vitest-environment jsdom
|
||||
//
|
||||
// Regression tests for ConfigTab hermes-workspace UX (#1894 + #1900).
|
||||
//
|
||||
// All four bugs this suite pins hit the same workspace on 2026-04-23:
|
||||
// a hermes-runtime workspace whose Config tab showed "LangGraph
|
||||
// (default)" in the runtime dropdown, an empty Model field, and a
|
||||
// scary red "No config.yaml found" banner. Clicking Save would
|
||||
// silently PATCH runtime back to LangGraph, breaking the workspace.
|
||||
//
|
||||
// Each test pins one invariant. If any fails, the bug is back.
|
||||
|
||||
import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";
|
||||
import { render, screen, cleanup, waitFor } from "@testing-library/react";
|
||||
import React from "react";
|
||||
|
||||
afterEach(cleanup);
|
||||
|
||||
// ── API mock ──────────────────────────────────────────────────────────
|
||||
// ConfigTab calls three endpoints on load:
|
||||
// 1. GET /workspaces/:id — workspace metadata (runtime)
|
||||
// 2. GET /workspaces/:id/model — model
|
||||
// 3. GET /workspaces/:id/files/config.yaml — template-managed config (may 404)
|
||||
// And POST /templates for the runtime dropdown options.
|
||||
//
|
||||
// Each test wires the mock to return the shape that matches the scenario
|
||||
// it's pinning. Unhandled URLs default to rejecting so the test fails loud
|
||||
// if ConfigTab queries something unexpected.
|
||||
const apiGet = vi.fn();
|
||||
const apiPatch = vi.fn();
|
||||
const apiPut = vi.fn();
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: {
|
||||
get: (path: string) => apiGet(path),
|
||||
patch: (path: string, body: unknown) => apiPatch(path, body),
|
||||
put: (path: string, body: unknown) => apiPut(path, body),
|
||||
post: vi.fn(),
|
||||
del: vi.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
// Zustand store used by Save → restart. Not exercised in these tests.
|
||||
vi.mock("@/store/canvas", () => ({
|
||||
useCanvasStore: Object.assign(
|
||||
(selector: (s: unknown) => unknown) => selector({ restartWorkspace: vi.fn(), updateNodeData: vi.fn() }),
|
||||
{ getState: () => ({ restartWorkspace: vi.fn(), updateNodeData: vi.fn() }) },
|
||||
),
|
||||
}));
|
||||
|
||||
// AgentCardSection fetches its own data — stub to avoid noise.
|
||||
vi.mock("../AgentCardSection", () => ({
|
||||
AgentCardSection: () => <div data-testid="agent-card-stub" />,
|
||||
}));
|
||||
|
||||
import { ConfigTab } from "../ConfigTab";
|
||||
|
||||
// helper — wire the api.get mock for one scenario
|
||||
function wireApi(opts: {
|
||||
workspaceRuntime?: string;
|
||||
workspaceModel?: string;
|
||||
configYamlContent?: string | null; // null = 404
|
||||
templates?: Array<{ id: string; name?: string; runtime?: string; models?: unknown[] }>;
|
||||
}) {
|
||||
apiGet.mockImplementation((path: string) => {
|
||||
if (path === `/workspaces/ws-test`) {
|
||||
return Promise.resolve({ runtime: opts.workspaceRuntime ?? "" });
|
||||
}
|
||||
if (path === `/workspaces/ws-test/model`) {
|
||||
return Promise.resolve({ model: opts.workspaceModel ?? "" });
|
||||
}
|
||||
if (path === `/workspaces/ws-test/files/config.yaml`) {
|
||||
if (opts.configYamlContent === null) {
|
||||
return Promise.reject(new Error("not found"));
|
||||
}
|
||||
return Promise.resolve({ content: opts.configYamlContent ?? "" });
|
||||
}
|
||||
if (path === "/templates") {
|
||||
return Promise.resolve(opts.templates ?? []);
|
||||
}
|
||||
return Promise.reject(new Error(`unmocked api.get: ${path}`));
|
||||
});
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
apiGet.mockReset();
|
||||
apiPatch.mockReset();
|
||||
apiPut.mockReset();
|
||||
});
|
||||
|
||||
describe("ConfigTab — hermes workspace", () => {
|
||||
it("loads runtime from workspace metadata when config.yaml is missing (#1894 bug 1)", async () => {
|
||||
// This is the hermes case: no platform config.yaml, so the form must
|
||||
// fall back to GET /workspaces/:id's runtime field. Before the fix, the
|
||||
// runtime dropdown showed "LangGraph (default)" because the fallback
|
||||
// didn't exist.
|
||||
wireApi({
|
||||
workspaceRuntime: "hermes",
|
||||
workspaceModel: "openai/gpt-4o",
|
||||
configYamlContent: null,
|
||||
templates: [{ id: "t-hermes", name: "Hermes", runtime: "hermes", models: [] }],
|
||||
});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
|
||||
// Wait for loads
|
||||
const select = await waitFor(() => screen.getByRole("combobox", { name: /runtime/i }));
|
||||
expect((select as HTMLSelectElement).value).toBe("hermes");
|
||||
});
|
||||
|
||||
it("does NOT show 'No config.yaml found' error for hermes (#1894 bug 3)", async () => {
|
||||
// Hermes manages its own config at ~/.hermes/config.yaml on the
|
||||
// workspace host — the platform config.yaml NOT existing is expected,
|
||||
// not an error. Showing a red error banner misleads the user.
|
||||
wireApi({
|
||||
workspaceRuntime: "hermes",
|
||||
configYamlContent: null,
|
||||
templates: [{ id: "t-hermes", name: "Hermes", runtime: "hermes", models: [] }],
|
||||
});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
|
||||
await waitFor(() => {
|
||||
const node = screen.queryByText(/No config\.yaml found/i);
|
||||
// Assert the red error is absent; a gray info banner with the same
|
||||
// phrase would also fail this (which is what we want — we don't
|
||||
// want any "no config.yaml" phrasing on hermes at all).
|
||||
expect(node).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
it("shows hermes-specific info banner pointing to Terminal tab (#1894)", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "hermes",
|
||||
configYamlContent: null,
|
||||
templates: [{ id: "t-hermes", name: "Hermes", runtime: "hermes", models: [] }],
|
||||
});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText(/Hermes manages its own config/i)).toBeTruthy();
|
||||
});
|
||||
});
|
||||
|
||||
it("DOES show 'No config.yaml found' error for langgraph workspace (default runtime)", async () => {
|
||||
// Regression guard the other way — the gray info banner is hermes-
|
||||
// specific. A langgraph workspace with no config.yaml SHOULD still
|
||||
// see the red error so the user knows to provide a template config.
|
||||
wireApi({
|
||||
workspaceRuntime: "",
|
||||
configYamlContent: null,
|
||||
templates: [],
|
||||
});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText(/No config\.yaml found/i)).toBeTruthy();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("ConfigTab — config.yaml on disk", () => {
|
||||
it("config.yaml runtime/model wins when present, workspace metadata is fallback", async () => {
|
||||
// If the workspace DB has runtime=langgraph but config.yaml declares
|
||||
// runtime: crewai, the form should show crewai (config.yaml wins).
|
||||
// Prevents silent runtime drift across reads.
|
||||
wireApi({
|
||||
workspaceRuntime: "langgraph", // DB
|
||||
configYamlContent: 'runtime: crewai\nmodel: "claude-opus"\n',
|
||||
templates: [
|
||||
{ id: "t-crewai", name: "CrewAI", runtime: "crewai", models: [] },
|
||||
],
|
||||
});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
|
||||
const select = await waitFor(() => screen.getByRole("combobox", { name: /runtime/i }));
|
||||
expect((select as HTMLSelectElement).value).toBe("crewai");
|
||||
});
|
||||
});
|
||||
@ -277,20 +277,48 @@ else
|
||||
fi
|
||||
|
||||
# ─── 7. Wait for workspace(s) online ───────────────────────────────────
|
||||
log "7/11 Waiting for workspace(s) to reach status=online..."
|
||||
WS_DEADLINE=$(( $(date +%s) + 600 ))
|
||||
# Hermes cold-boot takes 10-13 min on slow apt days (apt + uv + hermes
|
||||
# install + npm browser-tools). The controlplane bootstrap-watcher
|
||||
# deadline fires at 5 min and sets status=failed prematurely; heartbeat
|
||||
# then transitions failed → online after install.sh finishes. So:
|
||||
#
|
||||
# - 20 min deadline (hermes worst-case + slack)
|
||||
# - 'failed' is a TRANSIENT state we must tolerate — log and keep
|
||||
# polling, only hard-fail at the deadline. Pre-bootstrap-watcher-fix
|
||||
# (controlplane#245) this was a flake generator: workspace went
|
||||
# failed→online inside our window but we bailed at the failed read.
|
||||
log "7/11 Waiting for workspace(s) to reach status=online (up to 20 min — hermes cold boot)..."
|
||||
WS_DEADLINE=$(( $(date +%s) + 1200 ))
|
||||
WS_TO_CHECK="$PARENT_ID"
|
||||
[ -n "$CHILD_ID" ] && WS_TO_CHECK="$WS_TO_CHECK $CHILD_ID"
|
||||
for wid in $WS_TO_CHECK; do
|
||||
WS_LAST_STATUS=""
|
||||
WS_FAILED_LOGGED=0
|
||||
while true; do
|
||||
if [ "$(date +%s)" -gt "$WS_DEADLINE" ]; then
|
||||
fail "Workspace $wid never reached online within 10 min"
|
||||
WS_LAST_ERR=$(tenant_call GET "/workspaces/$wid" 2>/dev/null | \
|
||||
python3 -c "import json,sys; print(json.load(sys.stdin).get('last_sample_error',''))" 2>/dev/null || echo "")
|
||||
fail "Workspace $wid never reached online within 20 min (last status=$WS_LAST_STATUS, err=$WS_LAST_ERR)"
|
||||
fi
|
||||
WS_JSON=$(tenant_call GET "/workspaces/$wid" 2>/dev/null || echo '{}')
|
||||
WS_STATUS=$(echo "$WS_JSON" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))" 2>/dev/null)
|
||||
if [ "$WS_STATUS" != "$WS_LAST_STATUS" ]; then
|
||||
log " $wid → $WS_STATUS"
|
||||
WS_LAST_STATUS="$WS_STATUS"
|
||||
fi
|
||||
case "$WS_STATUS" in
|
||||
online) break ;;
|
||||
failed) fail "Workspace $wid status=failed: $(echo "$WS_JSON" | python3 -c 'import json,sys; print(json.load(sys.stdin).get("last_sample_error",""))')" ;;
|
||||
failed)
|
||||
# Not a hard fail — bootstrap-watcher frequently marks failed at
|
||||
# 5 min on hermes, then heartbeat recovers to online around 10-13
|
||||
# min when install.sh finishes. Log once per workspace so the CI
|
||||
# output isn't spammy.
|
||||
if [ "$WS_FAILED_LOGGED" = "0" ]; then
|
||||
log " $wid transiently failed — waiting for heartbeat recovery (bootstrap-watcher deadline, see cp#245)"
|
||||
WS_FAILED_LOGGED=1
|
||||
fi
|
||||
sleep 10
|
||||
;;
|
||||
*) sleep 10 ;;
|
||||
esac
|
||||
done
|
||||
@ -326,9 +354,47 @@ print(parts[0].get('text', '') if parts else '')
|
||||
if [ -z "$AGENT_TEXT" ]; then
|
||||
fail "A2A returned no text. Raw: $A2A_RESP"
|
||||
fi
|
||||
|
||||
# Specific error-class checks — each pattern caught a real P0 bug on
|
||||
# 2026-04-23 that a generic "error|exception" check missed or misreported:
|
||||
#
|
||||
# "[hermes-agent error 401]" → gateway API_SERVER_KEY not propagated (hermes #12)
|
||||
# "Invalid API key" → tenant auth chain (CP #238 race)
|
||||
# "model_not_found" → hermes custom provider slug passthrough (#13)
|
||||
# "Encrypted content is not supported" → hermes codex_responses API misroute (#14)
|
||||
# "Unknown provider" → bridge misconfigured PROVIDER= (regression of #13 fix)
|
||||
# "hermes-agent unreachable" → gateway process died
|
||||
#
|
||||
# Fail LOUD with the specific pattern so CI log + alert channel makes the
|
||||
# regression unambiguous.
|
||||
if echo "$AGENT_TEXT" | grep -qF "[hermes-agent error 401]"; then
|
||||
fail "A2A — REGRESSION: hermes gateway auth broken (API_SERVER_KEY not in runtime env). See template-hermes#12. Raw: $AGENT_TEXT"
|
||||
fi
|
||||
if echo "$AGENT_TEXT" | grep -qF "hermes-agent unreachable"; then
|
||||
fail "A2A — REGRESSION: hermes gateway process down. Check /var/log/hermes-gateway.log on the workspace EC2. Raw: $AGENT_TEXT"
|
||||
fi
|
||||
if echo "$AGENT_TEXT" | grep -qF "model_not_found"; then
|
||||
fail "A2A — REGRESSION: model slug passed through with provider prefix. See template-hermes#13. Raw: $AGENT_TEXT"
|
||||
fi
|
||||
if echo "$AGENT_TEXT" | grep -qF "Encrypted content is not supported"; then
|
||||
fail "A2A — REGRESSION: hermes custom provider hit /v1/responses instead of chat_completions. Config.yaml should declare api_mode: chat_completions. See template-hermes#14. Raw: $AGENT_TEXT"
|
||||
fi
|
||||
if echo "$AGENT_TEXT" | grep -qF "Unknown provider"; then
|
||||
fail "A2A — REGRESSION: install.sh set PROVIDER to a value not in hermes's registry. Run 'hermes doctor' on the workspace to see valid values. Raw: $AGENT_TEXT"
|
||||
fi
|
||||
# Generic catch-all — falls through if none of the known regressions hit.
|
||||
if echo "$AGENT_TEXT" | grep -qiE "error|exception"; then
|
||||
fail "A2A returned an error-shaped response: $AGENT_TEXT"
|
||||
fi
|
||||
|
||||
# Content assertion — the prompt asks the model to reply with exactly "PONG".
|
||||
# Real models produce "PONG" (possibly with minor wrapping); a broken pipeline
|
||||
# that echoes the prompt back or returns truncated context won't. Normalize
|
||||
# to uppercase before matching to tolerate "pong" / "Pong".
|
||||
if ! echo "$AGENT_TEXT" | tr '[:lower:]' '[:upper:]' | grep -qF "PONG"; then
|
||||
fail "A2A reply didn't contain expected PONG token. Real: $AGENT_TEXT"
|
||||
fi
|
||||
|
||||
ok "A2A parent round-trip succeeded: \"${AGENT_TEXT:0:80}\""
|
||||
|
||||
# ─── 9. HMA + peers + activity (full mode) ─────────────────────────────
|
||||
|
||||
199
tools/test-hermes-bridge.sh
Executable file
199
tools/test-hermes-bridge.sh
Executable file
@ -0,0 +1,199 @@
|
||||
#!/usr/bin/env bash
|
||||
# test-hermes-bridge.sh — regression tests for template-hermes install.sh's
|
||||
# OpenAI bridge logic. Runs offline (no network, no docker, no CI dependency).
|
||||
#
|
||||
# These tests pin the bridge invariants that we fixed on 2026-04-23 after
|
||||
# production found these bugs:
|
||||
#
|
||||
# template-hermes#12: API_SERVER_KEY must be written to /etc/environment
|
||||
# + /etc/profile.d/ so molecule-runtime inherits it.
|
||||
#
|
||||
# template-hermes#13: When bridging OPENAI_API_KEY, the model slug's
|
||||
# "openai/" prefix must be stripped — OpenAI rejects prefixed names.
|
||||
#
|
||||
# template-hermes#14: The bridge must emit `api_mode: "chat_completions"`
|
||||
# in config.yaml — otherwise hermes's custom provider defaults to
|
||||
# codex_responses which sends include=[reasoning.encrypted_content],
|
||||
# rejected by gpt-4o/gpt-4.1.
|
||||
#
|
||||
# Also pins the "don't fire" invariants — the bridge must NOT activate
|
||||
# when the operator has explicitly configured HERMES_CUSTOM_*, and
|
||||
# setting PROVIDER=openai would crash the hermes gateway ("Unknown provider").
|
||||
#
|
||||
# Invocation:
|
||||
#
|
||||
# bash tools/test-hermes-bridge.sh /path/to/template-hermes/install.sh
|
||||
#
|
||||
# Default path: ../molecule-ai-workspace-template-hermes/install.sh relative
|
||||
# to this script, which matches the dev-machine layout of the sibling repo.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
INSTALL_SH="${1:-$SCRIPT_DIR/../../molecule-ai-workspace-template-hermes/install.sh}"
|
||||
|
||||
if [ ! -f "$INSTALL_SH" ]; then
|
||||
echo "error: install.sh not found at $INSTALL_SH" >&2
|
||||
echo "usage: $0 [install.sh-path]" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
TMP=$(mktemp -d)
|
||||
trap 'rm -rf "$TMP"' EXIT
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
# run_case — extract just the bridge + config.yaml write blocks from
|
||||
# install.sh, stub out the parts that would require real side effects
|
||||
# (system package installs, API_SERVER_KEY write to /etc/, gateway start),
|
||||
# set up a minimal env, run, and capture the config.yaml output.
|
||||
#
|
||||
# Args:
|
||||
# $1 = test name
|
||||
# $2+ = env assignments (e.g. OPENAI_API_KEY=xxx, HERMES_DEFAULT_MODEL=openai/gpt-4o)
|
||||
run_case() {
|
||||
local name="$1"; shift
|
||||
local case_dir="$TMP/$name"
|
||||
mkdir -p "$case_dir"
|
||||
|
||||
# Build a minimal harness that:
|
||||
# 1. Sources scripts/derive-provider.sh (real, from the template repo)
|
||||
# 2. Applies the bridge if-block (inlined verbatim from install.sh)
|
||||
# 3. Emits config.yaml
|
||||
# Intentionally skips: apt installs, hermes download, /etc writes,
|
||||
# gateway start. We care about the BRANCH LOGIC not the system effects.
|
||||
local template_dir
|
||||
template_dir=$(cd "$(dirname "$INSTALL_SH")" && pwd)
|
||||
|
||||
HERMES_HOME="$case_dir" \
|
||||
bash -c "
|
||||
set -euo pipefail
|
||||
HERMES_HOME='$case_dir'
|
||||
$(for kv in "$@"; do printf 'export %s\n' "$kv"; done)
|
||||
# Source derive-provider from the real template repo
|
||||
. '$template_dir/scripts/derive-provider.sh'
|
||||
DEFAULT_MODEL=\"\${HERMES_DEFAULT_MODEL:-nousresearch/hermes-4-70b}\"
|
||||
|
||||
# Bridge block — extracted 1:1 from install.sh (the shape must stay in sync).
|
||||
if [ \"\${PROVIDER}\" = \"custom\" ] && [ -n \"\${OPENAI_API_KEY:-}\" ] && [ -z \"\${HERMES_CUSTOM_BASE_URL:-}\" ] && [ -z \"\${HERMES_CUSTOM_API_KEY:-}\" ]; then
|
||||
export HERMES_CUSTOM_BASE_URL='https://api.openai.com/v1'
|
||||
export HERMES_CUSTOM_API_KEY=\"\${OPENAI_API_KEY}\"
|
||||
export HERMES_CUSTOM_API_MODE='chat_completions'
|
||||
DEFAULT_MODEL=\"\${DEFAULT_MODEL#openai/}\"
|
||||
fi
|
||||
|
||||
# Emit config.yaml (same shape as install.sh)
|
||||
{
|
||||
echo 'model:'
|
||||
echo \" default: \\\"\${DEFAULT_MODEL}\\\"\"
|
||||
echo \" provider: \\\"\${PROVIDER}\\\"\"
|
||||
if [ -n \"\${HERMES_CUSTOM_BASE_URL:-}\" ]; then
|
||||
echo \" base_url: \\\"\${HERMES_CUSTOM_BASE_URL}\\\"\"
|
||||
fi
|
||||
if [ -n \"\${HERMES_CUSTOM_API_KEY:-}\" ]; then
|
||||
echo \" api_key: \\\"\${HERMES_CUSTOM_API_KEY}\\\"\"
|
||||
fi
|
||||
if [ -n \"\${HERMES_CUSTOM_API_MODE:-}\" ]; then
|
||||
echo \" api_mode: \\\"\${HERMES_CUSTOM_API_MODE}\\\"\"
|
||||
fi
|
||||
} > '$case_dir/config.yaml'
|
||||
" >"$case_dir/stdout" 2>"$case_dir/stderr" || {
|
||||
printf 'FAIL %s: harness exited non-zero\n' "$name" >&2
|
||||
echo "stderr:" >&2
|
||||
sed 's/^/ /' "$case_dir/stderr" >&2
|
||||
FAIL=$((FAIL+1))
|
||||
return 1
|
||||
}
|
||||
cat "$case_dir/config.yaml"
|
||||
}
|
||||
|
||||
# assert_in — assert a fragment appears in the config.yaml of the named case.
|
||||
assert_in() {
|
||||
local name="$1" pattern="$2"
|
||||
if grep -qF "$pattern" "$TMP/$name/config.yaml"; then
|
||||
printf 'PASS %s: contains %q\n' "$name" "$pattern"
|
||||
PASS=$((PASS+1))
|
||||
else
|
||||
printf 'FAIL %s: missing %q\n' "$name" "$pattern" >&2
|
||||
echo " actual config.yaml:" >&2
|
||||
sed 's/^/ /' "$TMP/$name/config.yaml" >&2
|
||||
FAIL=$((FAIL+1))
|
||||
fi
|
||||
}
|
||||
|
||||
assert_not_in() {
|
||||
local name="$1" pattern="$2"
|
||||
if grep -qF "$pattern" "$TMP/$name/config.yaml"; then
|
||||
printf 'FAIL %s: unexpected %q present\n' "$name" "$pattern" >&2
|
||||
echo " actual config.yaml:" >&2
|
||||
sed 's/^/ /' "$TMP/$name/config.yaml" >&2
|
||||
FAIL=$((FAIL+1))
|
||||
else
|
||||
printf 'PASS %s: absent %q\n' "$name" "$pattern"
|
||||
PASS=$((PASS+1))
|
||||
fi
|
||||
}
|
||||
|
||||
# ─── Case 1: OpenAI bridge fires, strips prefix, sets api_mode ──────────
|
||||
# Regression guard for #13 + #14. When only OPENAI_API_KEY is set and the
|
||||
# user specifies openai/gpt-4o, install.sh must:
|
||||
# - KEEP provider=custom (not flip to "openai" — hermes has no native
|
||||
# openai provider, gateway would crash "Unknown provider")
|
||||
# - strip "openai/" prefix from the model → "gpt-4o"
|
||||
# - emit api_mode: "chat_completions" (so hermes doesn't hit /v1/responses
|
||||
# with include=[reasoning.encrypted_content] which gpt-4o rejects)
|
||||
run_case "openai-bridge-happy" \
|
||||
OPENAI_API_KEY=sk-test-abc \
|
||||
HERMES_DEFAULT_MODEL=openai/gpt-4o >/dev/null
|
||||
|
||||
assert_in "openai-bridge-happy" 'default: "gpt-4o"'
|
||||
assert_in "openai-bridge-happy" 'provider: "custom"'
|
||||
assert_in "openai-bridge-happy" 'base_url: "https://api.openai.com/v1"'
|
||||
assert_in "openai-bridge-happy" 'api_key: "sk-test-abc"'
|
||||
assert_in "openai-bridge-happy" 'api_mode: "chat_completions"'
|
||||
assert_not_in "openai-bridge-happy" 'provider: "openai"'
|
||||
assert_not_in "openai-bridge-happy" 'default: "openai/gpt-4o"'
|
||||
|
||||
# ─── Case 2: Bridge skipped when operator sets HERMES_CUSTOM_* ──────────
|
||||
# When an operator points at a self-hosted vLLM or similar, the bridge
|
||||
# must NOT overwrite their values. api_mode should NOT be forced to
|
||||
# chat_completions (the operator might want codex_responses for o1 models).
|
||||
run_case "operator-custom-wins" \
|
||||
OPENAI_API_KEY=sk-test-abc \
|
||||
HERMES_CUSTOM_BASE_URL=http://my-vllm:8080/v1 \
|
||||
HERMES_CUSTOM_API_KEY=operator-key \
|
||||
HERMES_DEFAULT_MODEL=openai/gpt-4o >/dev/null
|
||||
|
||||
assert_in "operator-custom-wins" 'base_url: "http://my-vllm:8080/v1"'
|
||||
assert_in "operator-custom-wins" 'api_key: "operator-key"'
|
||||
assert_not_in "operator-custom-wins" 'api_mode: "chat_completions"'
|
||||
assert_not_in "operator-custom-wins" 'base_url: "https://api.openai.com/v1"'
|
||||
|
||||
# ─── Case 3: Non-custom providers untouched ─────────────────────────────
|
||||
# An OPENROUTER_API_KEY should pick provider=openrouter (per
|
||||
# derive-provider.sh), and the bridge must not fire.
|
||||
run_case "openrouter-not-touched" \
|
||||
OPENROUTER_API_KEY=sk-or-test \
|
||||
OPENAI_API_KEY=sk-test-abc \
|
||||
HERMES_DEFAULT_MODEL=openai/gpt-4o >/dev/null
|
||||
|
||||
assert_in "openrouter-not-touched" 'provider: "openrouter"'
|
||||
assert_not_in "openrouter-not-touched" 'api_mode: "chat_completions"'
|
||||
assert_not_in "openrouter-not-touched" 'base_url: "https://api.openai.com/v1"'
|
||||
# openrouter keeps the full slug (it can resolve openai/gpt-4o)
|
||||
assert_in "openrouter-not-touched" 'default: "openai/gpt-4o"'
|
||||
|
||||
# ─── Case 4: Non-openai model on bridge path leaves slug alone ──────────
|
||||
# If the bridge fires but the model isn't prefixed with openai/, we don't
|
||||
# want to break the string. Prefix-strip is a no-op when the prefix isn't there.
|
||||
run_case "non-prefixed-model" \
|
||||
OPENAI_API_KEY=sk-test-abc \
|
||||
HERMES_DEFAULT_MODEL=gpt-4o >/dev/null
|
||||
|
||||
assert_in "non-prefixed-model" 'default: "gpt-4o"'
|
||||
|
||||
# ─── Summary ────────────────────────────────────────────────────────────
|
||||
echo ""
|
||||
echo "Hermes bridge test: PASS=$PASS FAIL=$FAIL"
|
||||
[ "$FAIL" = "0" ]
|
||||
@ -4,7 +4,7 @@ go 1.25.0
|
||||
|
||||
require (
|
||||
github.com/DATA-DOG/go-sqlmock v1.5.2
|
||||
github.com/Molecule-AI/molecule-ai-plugin-github-app-auth v0.0.0-20260416194734-2cd28737f845
|
||||
github.com/Molecule-AI/molecule-ai-plugin-github-app-auth v0.0.0-20260421064811-7d98ae51e31d
|
||||
github.com/alicebob/miniredis/v2 v2.37.0
|
||||
github.com/creack/pty v1.1.18
|
||||
github.com/docker/docker v28.2.2+incompatible
|
||||
@ -16,6 +16,7 @@ require (
|
||||
github.com/google/uuid v1.6.0
|
||||
github.com/gorilla/websocket v1.5.3
|
||||
github.com/lib/pq v1.10.9
|
||||
github.com/opencontainers/image-spec v1.1.1
|
||||
github.com/redis/go-redis/v9 v9.7.0
|
||||
github.com/robfig/cron/v3 v3.0.1
|
||||
golang.org/x/crypto v0.49.0
|
||||
@ -56,7 +57,6 @@ require (
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
github.com/morikuni/aec v1.1.0 // indirect
|
||||
github.com/opencontainers/go-digest v1.0.0 // indirect
|
||||
github.com/opencontainers/image-spec v1.1.1 // indirect
|
||||
github.com/pelletier/go-toml/v2 v2.2.2 // indirect
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
|
||||
@ -78,4 +78,3 @@ require (
|
||||
google.golang.org/protobuf v1.36.11 // indirect
|
||||
gotest.tools/v3 v3.5.2 // indirect
|
||||
)
|
||||
|
||||
|
||||
@ -4,8 +4,8 @@ github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7Oputl
|
||||
github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU=
|
||||
github.com/Microsoft/go-winio v0.4.21 h1:+6mVbXh4wPzUrl1COX9A+ZCvEpYsOBZ6/+kwDnvLyro=
|
||||
github.com/Microsoft/go-winio v0.4.21/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84=
|
||||
github.com/Molecule-AI/molecule-ai-plugin-github-app-auth v0.0.0-20260416194734-2cd28737f845 h1:Pae8GmpJOP/Bpf2KE1FhdN3zoPSbV/tl25yiAqEc4lM=
|
||||
github.com/Molecule-AI/molecule-ai-plugin-github-app-auth v0.0.0-20260416194734-2cd28737f845/go.mod h1:3a6LR/zd7FjR9ZwLTbytwYlWuCBsbCOVFlEg0WnoYiM=
|
||||
github.com/Molecule-AI/molecule-ai-plugin-github-app-auth v0.0.0-20260421064811-7d98ae51e31d h1:GpYhP6FxaJZc1Ljy5/YJ9ZIVGvfOqZBmDolNr2S5x2g=
|
||||
github.com/Molecule-AI/molecule-ai-plugin-github-app-auth v0.0.0-20260421064811-7d98ae51e31d/go.mod h1:3a6LR/zd7FjR9ZwLTbytwYlWuCBsbCOVFlEg0WnoYiM=
|
||||
github.com/alicebob/miniredis/v2 v2.37.0 h1:RheObYW32G1aiJIj81XVt78ZHJpHonHLHW7OLIshq68=
|
||||
github.com/alicebob/miniredis/v2 v2.37.0/go.mod h1:TcL7YfarKPGDAthEtl5NBeHZfeUQj6OXMm/+iu5cLMM=
|
||||
github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
|
||||
|
||||
@ -357,6 +357,18 @@ func validateDiscoveryCaller(ctx context.Context, c *gin.Context, workspaceID st
|
||||
|
||||
tok := wsauth.BearerTokenFromHeader(c.GetHeader("Authorization"))
|
||||
if tok == "" {
|
||||
// Canvas hits this endpoint via session cookie, not bearer token.
|
||||
// Add verifiedCPSession() as a fallback after the bearer check so
|
||||
// SaaS canvas Peers tab doesn't 401. Self-hosted workspaces are
|
||||
// unaffected — they have no CP session cookie.
|
||||
ok, presented := middleware.VerifiedCPSession(c.GetHeader("Cookie"))
|
||||
if ok {
|
||||
return nil
|
||||
}
|
||||
if presented {
|
||||
c.JSON(http.StatusUnauthorized, gin.H{"error": "invalid session"})
|
||||
return errors.New("invalid session")
|
||||
}
|
||||
c.JSON(http.StatusUnauthorized, gin.H{"error": "missing workspace auth token"})
|
||||
return errors.New("missing token")
|
||||
}
|
||||
|
||||
@ -105,6 +105,183 @@ func TestTerminalConnect_KI005_RejectsUnauthorizedCrossWorkspace(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestKI005_SelfAccess_AlwaysAllowed — when callerID equals the target workspace
|
||||
// ID the request always passes (self-access: workspace's own token reaches its
|
||||
// own terminal without needing the hierarchy check).
|
||||
func TestKI005_SelfAccess_AlwaysAllowed(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
mock.ExpectQuery("SELECT COALESCE").
|
||||
WithArgs("ws-self").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"instance_id"}).AddRow(""))
|
||||
|
||||
h := NewTerminalHandler(nil)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-self"}}
|
||||
c.Request = httptest.NewRequest("GET", "/workspaces/ws-self/terminal", nil)
|
||||
// Self-access: X-Workspace-ID matches the route param, no auth needed.
|
||||
c.Request.Header.Set("X-Workspace-ID", "ws-self")
|
||||
|
||||
h.HandleConnect(c)
|
||||
|
||||
// Self-access passes without any token check or CanCommunicate query.
|
||||
if w.Code != http.StatusServiceUnavailable {
|
||||
t.Errorf("self-access: expected 503 (Docker unavailable), got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestKI005_CanCommunicatePeer_Allowed — when the caller and target are siblings
|
||||
// (share a parent), CanCommunicate returns true and the terminal access is granted.
|
||||
func TestKI005_CanCommunicatePeer_Allowed(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
// DB: caller workspace row for token validation.
|
||||
mock.ExpectQuery("SELECT t.id, t.workspace_id").
|
||||
WithArgs(sqlmock.AnyArg()).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "workspace_id"}).
|
||||
AddRow("tok-caller", "ws-peer-a"))
|
||||
|
||||
// DB: caller and target are siblings → CanCommunicate queries both.
|
||||
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id").
|
||||
WithArgs("ws-peer-a").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).
|
||||
AddRow("ws-peer-a", "org-lead"))
|
||||
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id").
|
||||
WithArgs("ws-peer-b").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).
|
||||
AddRow("ws-peer-b", "org-lead"))
|
||||
|
||||
// DB: target workspace has no instance_id → local Docker path.
|
||||
mock.ExpectQuery("SELECT COALESCE").
|
||||
WithArgs("ws-peer-b").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"instance_id"}).AddRow(""))
|
||||
|
||||
h := NewTerminalHandler(nil)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-peer-b"}}
|
||||
c.Request = httptest.NewRequest("GET", "/workspaces/ws-peer-b/terminal", nil)
|
||||
c.Request.Header.Set("X-Workspace-ID", "ws-peer-a")
|
||||
c.Request.Header.Set("Authorization", "Bearer peer-token")
|
||||
|
||||
h.HandleConnect(c)
|
||||
|
||||
if w.Code != http.StatusServiceUnavailable {
|
||||
t.Errorf("peer access: expected 503 (Docker unavailable), got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestKI005_CanCommunicateNonPeer_Forbidden — when caller and target have
|
||||
// different parents (not siblings, not root-level), CanCommunicate returns
|
||||
// false and the terminal access is blocked with 403.
|
||||
func TestKI005_CanCommunicateNonPeer_Forbidden(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
// DB: caller workspace row for token validation.
|
||||
mock.ExpectQuery("SELECT t.id, t.workspace_id").
|
||||
WithArgs(sqlmock.AnyArg()).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "workspace_id"}).
|
||||
AddRow("tok-attacker", "ws-attacker"))
|
||||
|
||||
// DB: caller and target have different parents → CanCommunicate denies.
|
||||
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id").
|
||||
WithArgs("ws-attacker").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).
|
||||
AddRow("ws-attacker", "org-a"))
|
||||
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id").
|
||||
WithArgs("ws-victim").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).
|
||||
AddRow("ws-victim", "org-b"))
|
||||
|
||||
h := NewTerminalHandler(nil)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-victim"}}
|
||||
c.Request = httptest.NewRequest("GET", "/workspaces/ws-victim/terminal", nil)
|
||||
c.Request.Header.Set("X-Workspace-ID", "ws-attacker")
|
||||
c.Request.Header.Set("Authorization", "Bearer attacker-token")
|
||||
|
||||
h.HandleConnect(c)
|
||||
|
||||
if w.Code != http.StatusForbidden {
|
||||
t.Errorf("cross-workspace: expected 403, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestKI005_TokenMismatch_Unauthorized — when the bearer token belongs to a
|
||||
// different workspace than the claimed X-Workspace-ID, ValidateToken fails
|
||||
// and the request is rejected with 401 before CanCommunicate is checked.
|
||||
func TestKI005_TokenMismatch_Unauthorized(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
// DB: token belongs to a different workspace than claimed — ValidateToken
|
||||
// returns ErrInvalidToken (workspaceID mismatch).
|
||||
mock.ExpectQuery("SELECT t.id, t.workspace_id").
|
||||
WithArgs(sqlmock.AnyArg()).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "workspace_id"}))
|
||||
|
||||
h := NewTerminalHandler(nil)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-target"}}
|
||||
c.Request = httptest.NewRequest("GET", "/workspaces/ws-target/terminal", nil)
|
||||
c.Request.Header.Set("X-Workspace-ID", "ws-claimed")
|
||||
c.Request.Header.Set("Authorization", "Bearer wrong-workspace-token")
|
||||
|
||||
h.HandleConnect(c)
|
||||
|
||||
if w.Code != http.StatusUnauthorized {
|
||||
t.Errorf("token mismatch: expected 401, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestKI005_NoXWorkspaceIDHeader_LegacyAllowed — when no X-Workspace-ID header
|
||||
// is present (legacy canvas, direct browser access), the hierarchy check is
|
||||
// skipped and the request proceeds to the container (standard WorkspaceAuth
|
||||
// gates apply upstream).
|
||||
func TestKI005_NoXWorkspaceIDHeader_LegacyAllowed(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
// DB: no instance_id → local Docker path.
|
||||
mock.ExpectQuery("SELECT COALESCE").
|
||||
WithArgs("ws-legacy").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"instance_id"}).AddRow(""))
|
||||
|
||||
h := NewTerminalHandler(nil)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-legacy"}}
|
||||
c.Request = httptest.NewRequest("GET", "/workspaces/ws-legacy/terminal", nil)
|
||||
// No X-Workspace-ID header: legacy access, no hierarchy check.
|
||||
|
||||
h.HandleConnect(c)
|
||||
|
||||
if w.Code != http.StatusServiceUnavailable {
|
||||
t.Errorf("legacy access: expected 503 (Docker unavailable), got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestOpenTunnelCmd_BuildsArgv guards against silent drift in the EIC
|
||||
// tunnel invocation (e.g. someone flipping --local-port to --port).
|
||||
func TestOpenTunnelCmd_BuildsArgv(t *testing.T) {
|
||||
|
||||
@ -402,6 +402,17 @@ func (h *WorkspaceHandler) issueAndInjectToken(ctx context.Context, workspaceID
|
||||
cfg.ConfigFiles = make(map[string][]byte)
|
||||
}
|
||||
cfg.ConfigFiles[".auth_token"] = []byte(token)
|
||||
// Option B (issue #1877): write token to volume BEFORE ContainerStart.
|
||||
// Pre-write eliminates the race window where a restarted container could
|
||||
// read a stale /configs/.auth_token before WriteFilesToContainer runs.
|
||||
// This call is best-effort — if it fails (or provisioner is nil in tests)
|
||||
// we still log and fall through; the runtime's heartbeat.py will retry
|
||||
// on 401 if needed.
|
||||
if h.provisioner != nil {
|
||||
if writeErr := h.provisioner.WriteAuthTokenToVolume(ctx, workspaceID, token); writeErr != nil {
|
||||
log.Printf("Provisioner: warning — pre-write token to volume failed for %s: %v (token still injected via WriteFilesToContainer after start)", workspaceID, writeErr)
|
||||
}
|
||||
}
|
||||
log.Printf("Provisioner: injected fresh auth token for workspace %s into config volume", workspaceID)
|
||||
}
|
||||
|
||||
|
||||
@ -132,6 +132,17 @@ func (h *WorkspaceHandler) Restart(c *gin.Context) {
|
||||
RebuildConfig: body.RebuildConfig,
|
||||
})
|
||||
|
||||
// #239: rebuild_config=true — try org-templates as last-resort source so a
|
||||
// workspace with a destroyed config volume can self-recover without admin
|
||||
// intervention. Only fires when no other template was resolved above.
|
||||
if templatePath == "" && body.RebuildConfig {
|
||||
if p, label := resolveOrgTemplate(h.configsDir, wsName); p != "" {
|
||||
templatePath = p
|
||||
configLabel = label
|
||||
log.Printf("Restart: rebuild_config — using org-template %s for %s (%s)", label, wsName, id)
|
||||
}
|
||||
}
|
||||
|
||||
if templatePath == "" {
|
||||
log.Printf("Restart: reusing existing config volume for %s (%s)", wsName, id)
|
||||
} else {
|
||||
|
||||
@ -193,7 +193,7 @@ func verifiedCPSession(cookieHeader string) (valid, presented bool) {
|
||||
client := &http.Client{Timeout: 3 * time.Second}
|
||||
req, err := http.NewRequest("GET", verifyURL, nil)
|
||||
if err != nil {
|
||||
log.Printf("verifiedCPSession: build req: %v", err)
|
||||
log.Printf("VerifiedCPSession: build req: %v", err)
|
||||
return false, true
|
||||
}
|
||||
req.Header.Set("Cookie", cookieHeader)
|
||||
@ -201,7 +201,7 @@ func verifiedCPSession(cookieHeader string) (valid, presented bool) {
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
log.Printf("verifiedCPSession: upstream: %v", err)
|
||||
log.Printf("VerifiedCPSession: upstream: %v", err)
|
||||
// NOTE: we deliberately do NOT cache transport failures.
|
||||
// Caching them would mean a 3s CP blip locks out all users
|
||||
// for the negative-TTL window. Next request retries.
|
||||
@ -231,10 +231,10 @@ func verifiedCPSession(cookieHeader string) (valid, presented bool) {
|
||||
return true, true
|
||||
}
|
||||
|
||||
// VerifiedCPSession is the exported alias for handlers/discovery.go.
|
||||
// Internal-only deployments (self-hosted / dev) where CP_UPSTREAM_URL
|
||||
// is unset get (false, true) so the session path is skipped and the
|
||||
// bearer token path runs as normal.
|
||||
// VerifiedCPSession is the exported alias — callers in other packages
|
||||
// (discovery.go, wsauth_middleware.go) use this name. Internal-only
|
||||
// deployments (self-hosted/dev) where CP_UPSTREAM_URL is unset get
|
||||
// (false, true) so the session path is skipped and bearer token auth runs.
|
||||
func VerifiedCPSession(cookieHeader string) (valid, presented bool) {
|
||||
return verifiedCPSession(cookieHeader)
|
||||
}
|
||||
|
||||
@ -37,7 +37,7 @@ func mockCPServer(t *testing.T, status int, body string) (*httptest.Server, *ato
|
||||
|
||||
func TestVerifiedCPSession_EmptyCookie(t *testing.T) {
|
||||
resetSessionCache()
|
||||
ok, presented := verifiedCPSession("")
|
||||
ok, presented := VerifiedCPSession("")
|
||||
if ok || presented {
|
||||
t.Errorf("empty cookie should be (false, false); got (%v, %v)", ok, presented)
|
||||
}
|
||||
@ -47,7 +47,7 @@ func TestVerifiedCPSession_NoSlugConfigured(t *testing.T) {
|
||||
resetSessionCache()
|
||||
t.Setenv("CP_UPSTREAM_URL", "https://cp.test")
|
||||
t.Setenv("MOLECULE_ORG_SLUG", "")
|
||||
ok, presented := verifiedCPSession("session=foo")
|
||||
ok, presented := VerifiedCPSession("session=foo")
|
||||
// Without a slug we can't ask about tenant membership. Must
|
||||
// refuse (false, false) — caller falls through to bearer tier.
|
||||
if ok || presented {
|
||||
@ -59,7 +59,7 @@ func TestVerifiedCPSession_NoCPConfigured(t *testing.T) {
|
||||
resetSessionCache()
|
||||
t.Setenv("CP_UPSTREAM_URL", "")
|
||||
t.Setenv("MOLECULE_ORG_SLUG", "acme")
|
||||
ok, presented := verifiedCPSession("session=foo")
|
||||
ok, presented := VerifiedCPSession("session=foo")
|
||||
// Self-hosted path: CP not configured, but cookie WAS presented.
|
||||
// Presented=true lets the caller know not to fall through to
|
||||
// bearer as if no credential arrived.
|
||||
@ -74,7 +74,7 @@ func TestVerifiedCPSession_MemberTrue(t *testing.T) {
|
||||
t.Setenv("CP_UPSTREAM_URL", srv.URL)
|
||||
t.Setenv("MOLECULE_ORG_SLUG", "acme")
|
||||
|
||||
ok, presented := verifiedCPSession("session=valid")
|
||||
ok, presented := VerifiedCPSession("session=valid")
|
||||
if !ok || !presented {
|
||||
t.Errorf("valid member should be (true, true); got (%v, %v)", ok, presented)
|
||||
}
|
||||
@ -83,7 +83,7 @@ func TestVerifiedCPSession_MemberTrue(t *testing.T) {
|
||||
}
|
||||
|
||||
// Second call must be served from cache.
|
||||
ok, _ = verifiedCPSession("session=valid")
|
||||
ok, _ = VerifiedCPSession("session=valid")
|
||||
if !ok {
|
||||
t.Errorf("cached call should still be true")
|
||||
}
|
||||
@ -99,7 +99,7 @@ func TestVerifiedCPSession_MemberFalse(t *testing.T) {
|
||||
t.Setenv("CP_UPSTREAM_URL", srv.URL)
|
||||
t.Setenv("MOLECULE_ORG_SLUG", "acme")
|
||||
|
||||
ok, presented := verifiedCPSession("session=wrong-tenant")
|
||||
ok, presented := VerifiedCPSession("session=wrong-tenant")
|
||||
if ok || !presented {
|
||||
t.Errorf("non-member should be (false, true); got (%v, %v)", ok, presented)
|
||||
}
|
||||
@ -107,7 +107,7 @@ func TestVerifiedCPSession_MemberFalse(t *testing.T) {
|
||||
t.Fatalf("expected 1 upstream hit")
|
||||
}
|
||||
// Cached negatively.
|
||||
_, _ = verifiedCPSession("session=wrong-tenant")
|
||||
_, _ = VerifiedCPSession("session=wrong-tenant")
|
||||
if hits.Load() != 1 {
|
||||
t.Errorf("negative result should cache too; got %d hits", hits.Load())
|
||||
}
|
||||
@ -119,7 +119,7 @@ func TestVerifiedCPSession_Upstream401(t *testing.T) {
|
||||
t.Setenv("CP_UPSTREAM_URL", srv.URL)
|
||||
t.Setenv("MOLECULE_ORG_SLUG", "acme")
|
||||
|
||||
ok, presented := verifiedCPSession("session=expired")
|
||||
ok, presented := VerifiedCPSession("session=expired")
|
||||
if ok || !presented {
|
||||
t.Errorf("401 upstream should be (false, true); got (%v, %v)", ok, presented)
|
||||
}
|
||||
@ -131,7 +131,7 @@ func TestVerifiedCPSession_MalformedJSON(t *testing.T) {
|
||||
t.Setenv("CP_UPSTREAM_URL", srv.URL)
|
||||
t.Setenv("MOLECULE_ORG_SLUG", "acme")
|
||||
|
||||
ok, presented := verifiedCPSession("session=broken")
|
||||
ok, presented := VerifiedCPSession("session=broken")
|
||||
if ok || !presented {
|
||||
t.Errorf("malformed body should be (false, true); got (%v, %v)", ok, presented)
|
||||
}
|
||||
@ -143,7 +143,7 @@ func TestVerifiedCPSession_TransportErrorNotCached(t *testing.T) {
|
||||
t.Setenv("CP_UPSTREAM_URL", "http://127.0.0.1:1")
|
||||
t.Setenv("MOLECULE_ORG_SLUG", "acme")
|
||||
|
||||
ok, presented := verifiedCPSession("session=whatever")
|
||||
ok, presented := VerifiedCPSession("session=whatever")
|
||||
if ok || !presented {
|
||||
t.Errorf("transport error should be (false, true); got (%v, %v)", ok, presented)
|
||||
}
|
||||
@ -178,12 +178,12 @@ func TestVerifiedCPSession_CrossTenantIsolation(t *testing.T) {
|
||||
cookie := "session=shared-auth"
|
||||
|
||||
t.Setenv("MOLECULE_ORG_SLUG", "acme")
|
||||
if ok, _ := verifiedCPSession(cookie); !ok {
|
||||
if ok, _ := VerifiedCPSession(cookie); !ok {
|
||||
t.Errorf("acme should say member=true")
|
||||
}
|
||||
|
||||
t.Setenv("MOLECULE_ORG_SLUG", "bob")
|
||||
if ok, _ := verifiedCPSession(cookie); ok {
|
||||
if ok, _ := VerifiedCPSession(cookie); ok {
|
||||
t.Errorf("bob tenant must NOT accept acme cookie despite same session bytes")
|
||||
}
|
||||
if len(reqs) != 2 {
|
||||
|
||||
@ -174,7 +174,7 @@ func AdminAuth(database *sql.DB) gin.HandlerFunc {
|
||||
// hosted / dev deploys without a CP fall through to the
|
||||
// bearer-only path unchanged.
|
||||
if cookieHeader := c.GetHeader("Cookie"); cookieHeader != "" {
|
||||
if ok, _ := verifiedCPSession(cookieHeader); ok {
|
||||
if ok, _ := VerifiedCPSession(cookieHeader); ok {
|
||||
c.Next()
|
||||
return
|
||||
}
|
||||
|
||||
@ -0,0 +1,127 @@
|
||||
package provisioner
|
||||
|
||||
// Regression tests for PR #1738 (merged 2026-04-23) — CPProvisioner.Stop +
|
||||
// IsRunning must look up the real EC2 instance_id (i-*) from the DB
|
||||
// before calling the control plane, NOT pass the workspace UUID verbatim.
|
||||
//
|
||||
// Original bug:
|
||||
// url := fmt.Sprintf("%s/cp/workspaces/%s?instance_id=%s",
|
||||
// baseURL, workspaceID, workspaceID)
|
||||
// ^^^^^^^^^^^^^^
|
||||
// sends UUID as instance_id
|
||||
//
|
||||
// AWS then rejects with InvalidInstanceID.Malformed, the next provision
|
||||
// hits InvalidGroup.Duplicate on the leftover SG, and Save & Restart
|
||||
// cascades into a full failure. Production incident 2026-04-22 on
|
||||
// hongmingwang workspace a8af9d79 + recurrent on every SaaS workspace
|
||||
// secret update that triggers a restart.
|
||||
//
|
||||
// These tests pin two invariants of the fix:
|
||||
// 1. Stop + IsRunning query resolveInstanceID(ctx, workspaceID) BEFORE
|
||||
// hitting CP, and use the returned i-* ID (not the workspace UUID)
|
||||
// in the instance_id query param.
|
||||
// 2. Empty instance_id → no CP call (idempotent no-op).
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestStop_UsesRealInstanceIDNotWorkspaceUUID is the load-bearing
|
||||
// regression guard for #1738. If someone reverts the resolveInstanceID
|
||||
// lookup and ships the `workspaceID, workspaceID` version back, this
|
||||
// test fails immediately.
|
||||
func TestStop_UsesRealInstanceIDNotWorkspaceUUID(t *testing.T) {
|
||||
primeInstanceIDLookup(t, map[string]string{
|
||||
"ws-cd5c9906-bfd7-4e2a-8c0b-9f1e2d3a4b5c": "i-0a1b2c3d4e5f67890",
|
||||
})
|
||||
|
||||
var sawInstance string
|
||||
var sawPath string
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
sawInstance = r.URL.Query().Get("instance_id")
|
||||
sawPath = r.URL.Path
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
p := &CPProvisioner{
|
||||
baseURL: srv.URL,
|
||||
orgID: "org-1",
|
||||
sharedSecret: "s3cret",
|
||||
adminToken: "tok-xyz",
|
||||
httpClient: srv.Client(),
|
||||
}
|
||||
if err := p.Stop(context.Background(), "ws-cd5c9906-bfd7-4e2a-8c0b-9f1e2d3a4b5c"); err != nil {
|
||||
t.Fatalf("Stop: %v", err)
|
||||
}
|
||||
|
||||
// Load-bearing assertion: the AWS-facing instance_id must be the
|
||||
// i-* ID from the DB, NEVER the workspace UUID.
|
||||
if sawInstance != "i-0a1b2c3d4e5f67890" {
|
||||
t.Errorf("#1738 REGRESSION: instance_id query = %q, want i-0a1b2c3d4e5f67890. "+
|
||||
"CP would forward this to AWS TerminateInstances — a UUID triggers "+
|
||||
"InvalidInstanceID.Malformed and orphans the EC2. See PR #1738.", sawInstance)
|
||||
}
|
||||
|
||||
// Sanity: path still carries the workspace UUID (that's how CP looks
|
||||
// up the row). Only the instance_id query param changed.
|
||||
if sawPath != "/cp/workspaces/ws-cd5c9906-bfd7-4e2a-8c0b-9f1e2d3a4b5c" {
|
||||
t.Errorf("path = %q, want /cp/workspaces/ws-cd5c9906-bfd7-4e2a-8c0b-9f1e2d3a4b5c", sawPath)
|
||||
}
|
||||
}
|
||||
|
||||
// TestStop_NoInstanceIDSkipsCPCall — when the workspace has no EC2 on
|
||||
// file (never provisioned, already deprovisioned, or external runtime),
|
||||
// Stop must be a no-op. Calling CP with empty instance_id triggers the
|
||||
// exact AWS error the fix was meant to prevent.
|
||||
func TestStop_NoInstanceIDSkipsCPCall(t *testing.T) {
|
||||
primeInstanceIDLookup(t, map[string]string{}) // empty map → "" for everything
|
||||
|
||||
called := false
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
called = true
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
p := &CPProvisioner{baseURL: srv.URL, orgID: "org-1", httpClient: srv.Client()}
|
||||
if err := p.Stop(context.Background(), "ws-never-provisioned"); err != nil {
|
||||
t.Errorf("Stop with no instance_id should be no-op, got err: %v", err)
|
||||
}
|
||||
if called {
|
||||
t.Error("#1738 REGRESSION: Stop hit CP with empty instance_id — would trigger " +
|
||||
"InvalidInstanceID.Malformed downstream. Fix must short-circuit on empty lookup.")
|
||||
}
|
||||
}
|
||||
|
||||
// TestIsRunning_UsesRealInstanceIDNotWorkspaceUUID mirrors the Stop test
|
||||
// for IsRunning's GET /cp/workspaces/:id/status?instance_id=... path.
|
||||
// Same class of bug, same acceptance criterion.
|
||||
func TestIsRunning_UsesRealInstanceIDNotWorkspaceUUID(t *testing.T) {
|
||||
primeInstanceIDLookup(t, map[string]string{
|
||||
"ws-abc": "i-deadbeef",
|
||||
})
|
||||
|
||||
var sawInstance string
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
sawInstance = r.URL.Query().Get("instance_id")
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(`{"state":"running"}`))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
p := &CPProvisioner{baseURL: srv.URL, orgID: "org-1", httpClient: srv.Client()}
|
||||
running, err := p.IsRunning(context.Background(), "ws-abc")
|
||||
if err != nil {
|
||||
t.Fatalf("IsRunning: %v", err)
|
||||
}
|
||||
if !running {
|
||||
t.Errorf("expected running=true")
|
||||
}
|
||||
if sawInstance != "i-deadbeef" {
|
||||
t.Errorf("#1738 REGRESSION: IsRunning sent instance_id=%q, want i-deadbeef", sawInstance)
|
||||
}
|
||||
}
|
||||
@ -749,6 +749,41 @@ func (p *Provisioner) ReadFromVolume(ctx context.Context, volumeName, filePath s
|
||||
return clean, nil
|
||||
}
|
||||
|
||||
// WriteAuthTokenToVolume writes the workspace auth token into the config volume
|
||||
// BEFORE the container starts, eliminating the token-injection race window where
|
||||
// a restarted container could read a stale token from /configs/.auth_token before
|
||||
// WriteFilesToContainer writes the new one. Issue #1877.
|
||||
//
|
||||
// Uses a throwaway alpine container to write directly to the named volume,
|
||||
// bypassing the container lifecycle entirely.
|
||||
func (p *Provisioner) WriteAuthTokenToVolume(ctx context.Context, workspaceID, token string) error {
|
||||
volName := ConfigVolumeName(workspaceID)
|
||||
resp, err := p.cli.ContainerCreate(ctx, &container.Config{
|
||||
Image: "alpine",
|
||||
Cmd: []string{"sh", "-c", "mkdir -p /vol && printf '%s' $TOKEN > /vol/.auth_token && chmod 0600 /vol/.auth_token"},
|
||||
Env: []string{"TOKEN=" + token},
|
||||
}, &container.HostConfig{
|
||||
Binds: []string{volName + ":/vol"},
|
||||
}, nil, nil, "")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create token-write container: %w", err)
|
||||
}
|
||||
defer p.cli.ContainerRemove(ctx, resp.ID, container.RemoveOptions{Force: true})
|
||||
if err := p.cli.ContainerStart(ctx, resp.ID, container.StartOptions{}); err != nil {
|
||||
return fmt.Errorf("failed to start token-write container: %w", err)
|
||||
}
|
||||
waitCh, errCh := p.cli.ContainerWait(ctx, resp.ID, container.WaitConditionNotRunning)
|
||||
select {
|
||||
case <-waitCh:
|
||||
case writeErr := <-errCh:
|
||||
if writeErr != nil {
|
||||
return fmt.Errorf("token-write container exited with error: %w", writeErr)
|
||||
}
|
||||
}
|
||||
log.Printf("Provisioner: wrote auth token to volume %s/.auth_token", volName)
|
||||
return nil
|
||||
}
|
||||
|
||||
// execInContainer runs a command inside a running container as root.
|
||||
// Best-effort: logs errors but does not fail the caller.
|
||||
func (p *Provisioner) execInContainer(ctx context.Context, containerID string, cmd []string) {
|
||||
|
||||
@ -17,7 +17,7 @@ from pathlib import Path
|
||||
|
||||
import httpx
|
||||
|
||||
from platform_auth import auth_headers
|
||||
from platform_auth import auth_headers, refresh_cache
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -102,6 +102,35 @@ class HeartbeatLoop:
|
||||
self._consecutive_failures = 0
|
||||
except Exception as e:
|
||||
self._consecutive_failures += 1
|
||||
# Issue #1877: if heartbeat 401'd, re-read the token from disk
|
||||
# and retry once. This handles the platform's token-rotation race
|
||||
# where WriteFilesToContainer hasn't finished writing the new
|
||||
# token before the runtime boots and caches the old value.
|
||||
is_401 = False
|
||||
if isinstance(e, httpx.HTTPStatusError) and e.response.status_code == 401:
|
||||
is_401 = True
|
||||
if is_401:
|
||||
logger.warning("Heartbeat 401 for %s — refreshing token cache and retrying once", self.workspace_id)
|
||||
refresh_cache()
|
||||
try:
|
||||
await client.post(
|
||||
f"{self.platform_url}/registry/heartbeat",
|
||||
json={
|
||||
"workspace_id": self.workspace_id,
|
||||
"error_rate": self.error_rate,
|
||||
"sample_error": self.sample_error,
|
||||
"active_tasks": self.active_tasks,
|
||||
"current_task": self.current_task,
|
||||
"uptime_seconds": int(time.time() - self.start_time),
|
||||
},
|
||||
headers=auth_headers(),
|
||||
)
|
||||
self._consecutive_failures = 0
|
||||
self.request_count += 1
|
||||
except Exception:
|
||||
# Retry also failed — fall through to the normal
|
||||
# failure tracking below.
|
||||
pass
|
||||
if self._consecutive_failures <= 3 or self._consecutive_failures % MAX_CONSECUTIVE_FAILURES == 0:
|
||||
logger.warning("Heartbeat failed (%d consecutive): %s", self._consecutive_failures, e)
|
||||
if self._consecutive_failures >= MAX_CONSECUTIVE_FAILURES:
|
||||
|
||||
@ -103,3 +103,14 @@ def clear_cache() -> None:
|
||||
files between cases."""
|
||||
global _cached_token
|
||||
_cached_token = None
|
||||
|
||||
|
||||
def refresh_cache() -> str | None:
|
||||
"""Force re-read of the token from disk, discarding the in-process cache.
|
||||
|
||||
Use this when a 401 response suggests the cached token is stale —
|
||||
e.g. after the platform rotates tokens during a restart (issue #1877).
|
||||
Returns the (new) token value or None if not found/error."""
|
||||
global _cached_token
|
||||
_cached_token = None
|
||||
return get_token()
|
||||
|
||||
@ -24,7 +24,7 @@ Planned as the ecosystem matures (none are implemented yet — rule of
|
||||
three: promote a class here only after 3+ plugins ship the same custom
|
||||
shape via their own ``adapters/<runtime>.py``):
|
||||
|
||||
* ``MCPServerAdaptor`` — install a plugin as an MCP server *(TODO)*
|
||||
* :class:`MCPServerAdaptor` — install a plugin as an MCP server ✅ (issue #847)
|
||||
* ``DeepAgentsSubagentAdaptor`` — register a DeepAgents sub-agent
|
||||
(runtime-locked to deepagents) *(TODO)*
|
||||
* ``LangGraphSubgraphAdaptor`` — install a LangGraph sub-graph *(TODO)*
|
||||
@ -339,5 +339,95 @@ def _deep_merge_hooks(existing: dict, fragment: dict) -> dict:
|
||||
for top_key, val in fragment.items():
|
||||
if top_key == "hooks":
|
||||
continue
|
||||
out.setdefault(top_key, val)
|
||||
# mcpServers must be deep-merged: plugin A ships "firecrawl" and
|
||||
# plugin B ships "github" → both entries land in settings.json.
|
||||
# Using setdefault would skip the fragment's value when the key
|
||||
# already exists, so we explicitly handle the dict case.
|
||||
if top_key in out and isinstance(out[top_key], dict) and isinstance(val, dict):
|
||||
out[top_key] = {**out[top_key], **val}
|
||||
else:
|
||||
out.setdefault(top_key, val)
|
||||
return out
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# MCPServerAdaptor — issue #847.
|
||||
# Promoted from custom adapters after 4 plugin proposals (molecule-firecrawl
|
||||
# #512, molecule-github-mcp #520, molecule-browser-use #553, mcp-connector
|
||||
# #573) all shipped the same pattern independently.
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
class MCPServerAdaptor:
|
||||
"""Sub-type adaptor for plugins that wrap an MCP server.
|
||||
|
||||
The plugin ships:
|
||||
|
||||
* ``settings-fragment.json`` with an ``mcpServers`` block — standard
|
||||
Claude Code ``claude_desktop_config`` format, e.g.:
|
||||
|
||||
.. code-block:: json
|
||||
|
||||
{
|
||||
"mcpServers": {
|
||||
"my-server": {
|
||||
"command": "npx",
|
||||
"args": ["-y", "@org/my-mcp-server"]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
* ``skills/<name>/SKILL.md`` (optional) — agentskills.io skill docs;
|
||||
``AgentskillsAdaptor`` logic handles these.
|
||||
* ``rules/*.md`` (optional) — always-on prose appended to CLAUDE.md;
|
||||
``AgentskillsAdaptor`` logic handles these.
|
||||
* ``setup.sh`` (optional) — install npm packages, build binaries, etc.;
|
||||
``AgentskillsAdaptor`` logic handles these.
|
||||
|
||||
On ``install()``:
|
||||
|
||||
1. ``settings-fragment.json`` → ``_install_claude_layer()`` merges the
|
||||
``mcpServers`` block into ``<configs>/.claude/settings.json``.
|
||||
Hooks are also merged via the same path (so MCP-server plugins
|
||||
can also ship hooks if they need them).
|
||||
2. Skills + rules + setup.sh → delegated to ``AgentskillsAdaptor``.
|
||||
|
||||
On ``uninstall()``:
|
||||
|
||||
1. Skills + rules → delegated to ``AgentskillsAdaptor.uninstall()``.
|
||||
2. ``mcpServers`` entries are intentionally **not** removed from
|
||||
``settings.json`` on uninstall. MCP server configurations are
|
||||
often shared with other tools or manually curated, so removing
|
||||
them could break a user's setup. The user must remove them
|
||||
manually if desired.
|
||||
|
||||
Usage — in the plugin's per-runtime adapter file:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
# plugins/<name>/adapters/claude_code.py
|
||||
from plugins_registry.builtins import MCPServerAdaptor as Adaptor
|
||||
"""
|
||||
|
||||
def __init__(self, plugin_name: str, runtime: str) -> None:
|
||||
self.plugin_name = plugin_name
|
||||
self.runtime = runtime
|
||||
|
||||
async def install(self, ctx: InstallContext) -> InstallResult:
|
||||
result = InstallResult(
|
||||
plugin_name=self.plugin_name,
|
||||
runtime=self.runtime,
|
||||
source="plugin",
|
||||
)
|
||||
# 1. Merge mcpServers (and any hooks) from settings-fragment.json.
|
||||
_install_claude_layer(ctx, result, self.plugin_name)
|
||||
# 2. Skills + rules + setup.sh — reuse AgentskillsAdaptor logic.
|
||||
sub = await AgentskillsAdaptor(self.plugin_name, self.runtime).install(ctx)
|
||||
result.files_written.extend(sub.files_written)
|
||||
result.warnings.extend(sub.warnings)
|
||||
return result
|
||||
|
||||
async def uninstall(self, ctx: InstallContext) -> None:
|
||||
# Delegate to AgentskillsAdaptor for skills + rules cleanup.
|
||||
# NOTE: mcpServers entries are intentionally NOT removed (see class docstring).
|
||||
await AgentskillsAdaptor(self.plugin_name, self.runtime).uninstall(ctx)
|
||||
|
||||
@ -481,3 +481,234 @@ def test_deep_merge_hooks_top_level_keys_merged():
|
||||
# setdefault semantics: existing keys win, new keys are added
|
||||
assert result["someKey"] == "old"
|
||||
assert result["anotherKey"] == "value"
|
||||
|
||||
|
||||
def test_deep_merge_hooks_mcpServers_deep_merged():
|
||||
"""mcpServers dicts from two plugins must be merged, not replaced.
|
||||
|
||||
Plugin A ships firecrawl, plugin B ships github → both land in the
|
||||
final settings.json (issue #847 motivation).
|
||||
"""
|
||||
existing = {
|
||||
"mcpServers": {
|
||||
"firecrawl": {
|
||||
"command": "npx",
|
||||
"args": ["-y", "@org/firecrawl-mcp"],
|
||||
}
|
||||
}
|
||||
}
|
||||
fragment = {
|
||||
"mcpServers": {
|
||||
"github": {
|
||||
"command": "npx",
|
||||
"args": ["-y", "@github/github-mcp-server"],
|
||||
}
|
||||
},
|
||||
"hooks": {},
|
||||
}
|
||||
result = _deep_merge_hooks(existing, fragment)
|
||||
assert "firecrawl" in result["mcpServers"]
|
||||
assert "github" in result["mcpServers"]
|
||||
# existing entries must not be overwritten
|
||||
assert result["mcpServers"]["firecrawl"]["command"] == "npx"
|
||||
|
||||
|
||||
def test_deep_merge_hooks_mcpServers_idempotent():
|
||||
"""Re-merging the same mcpServers fragment must not duplicate entries."""
|
||||
fragment = {
|
||||
"mcpServers": {
|
||||
"firecrawl": {"command": "npx", "args": ["-y", "@org/firecrawl-mcp"]}
|
||||
},
|
||||
"hooks": {},
|
||||
}
|
||||
state = _deep_merge_hooks({}, fragment)
|
||||
state = _deep_merge_hooks(state, fragment)
|
||||
state = _deep_merge_hooks(state, fragment)
|
||||
assert len(state["mcpServers"]) == 1
|
||||
|
||||
|
||||
def test_deep_merge_hooks_mcpServers_three_plugins():
|
||||
"""Three plugins each contributing one mcpServer all land in final output."""
|
||||
state = {}
|
||||
for name in ["firecrawl", "github", "browser-use"]:
|
||||
fragment = {
|
||||
"mcpServers": {name: {"command": "npx", "args": [f"-y @{name}"]}},
|
||||
"hooks": {},
|
||||
}
|
||||
state = _deep_merge_hooks(state, fragment)
|
||||
|
||||
assert set(state["mcpServers"].keys()) == {"firecrawl", "github", "browser-use"}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# MCPServerAdaptor tests — issue #847
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
from plugins_registry.builtins import MCPServerAdaptor # noqa: E402
|
||||
|
||||
|
||||
async def test_mcp_server_adaptor_install_writes_mcpServers(tmp_path: Path):
|
||||
"""install() must merge mcpServers from settings-fragment.json into settings.json."""
|
||||
plugin = tmp_path / "my-mcp-plugin"
|
||||
plugin.mkdir()
|
||||
(plugin / "settings-fragment.json").write_text(
|
||||
json.dumps({
|
||||
"mcpServers": {
|
||||
"my-server": {
|
||||
"command": "npx",
|
||||
"args": ["-y", "@org/my-mcp-server"],
|
||||
}
|
||||
}
|
||||
})
|
||||
)
|
||||
# Also add a skill so we can verify AgentskillsAdaptor delegation.
|
||||
(plugin / "skills" / "docs").mkdir(parents=True)
|
||||
(plugin / "skills" / "docs" / "SKILL.md").write_text("# docs skill\n")
|
||||
|
||||
configs = tmp_path / "configs"
|
||||
configs.mkdir()
|
||||
result = await MCPServerAdaptor("my-mcp-plugin", "claude_code").install(
|
||||
_make_ctx(configs, plugin)
|
||||
)
|
||||
|
||||
settings = json.loads((configs / ".claude" / "settings.json").read_text())
|
||||
assert "mcpServers" in settings
|
||||
assert "my-server" in settings["mcpServers"]
|
||||
assert settings["mcpServers"]["my-server"]["command"] == "npx"
|
||||
# Skills were also installed (AgentskillsAdaptor delegation).
|
||||
assert (configs / "skills" / "docs" / "SKILL.md").exists()
|
||||
assert ".claude/settings.json" in result.files_written
|
||||
|
||||
|
||||
async def test_mcp_server_adaptor_install_no_fragment_no_warning(tmp_path: Path):
|
||||
"""Plugin without settings-fragment.json must install silently (no settings.json created)."""
|
||||
plugin = tmp_path / "bare-mcp"
|
||||
plugin.mkdir()
|
||||
configs = tmp_path / "configs"
|
||||
configs.mkdir()
|
||||
|
||||
result = await MCPServerAdaptor("bare-mcp", "claude_code").install(
|
||||
_make_ctx(configs, plugin)
|
||||
)
|
||||
# _install_claude_layer creates .claude dir, but no settings.json when
|
||||
# there's no settings-fragment.json.
|
||||
assert not (configs / ".claude" / "settings.json").exists()
|
||||
assert result.warnings == []
|
||||
|
||||
|
||||
async def test_mcp_server_adaptor_uninstall_does_not_remove_mcpServers(tmp_path: Path):
|
||||
"""uninstall() must remove skills/rules but leave mcpServers in settings.json.
|
||||
|
||||
Rationale: MCP server configs are often shared or manually curated;
|
||||
removing them on plugin uninstall could break the user's environment.
|
||||
"""
|
||||
plugin = tmp_path / "my-mcp-plugin"
|
||||
plugin.mkdir()
|
||||
(plugin / "settings-fragment.json").write_text(
|
||||
json.dumps({
|
||||
"mcpServers": {
|
||||
"my-server": {
|
||||
"command": "npx",
|
||||
"args": ["-y", "@org/my-mcp-server"],
|
||||
}
|
||||
}
|
||||
})
|
||||
)
|
||||
(plugin / "rules").mkdir(parents=True)
|
||||
(plugin / "rules" / "r.md").write_text("- my rule\n")
|
||||
(plugin / "skills" / "s").mkdir(parents=True)
|
||||
(plugin / "skills" / "s" / "SKILL.md").write_text("# skill\n")
|
||||
|
||||
configs = tmp_path / "configs"
|
||||
configs.mkdir()
|
||||
adaptor = MCPServerAdaptor("my-mcp-plugin", "claude_code")
|
||||
|
||||
await adaptor.install(_make_ctx(configs, plugin))
|
||||
assert (configs / "skills" / "s").exists()
|
||||
assert "my-server" in json.loads((configs / ".claude" / "settings.json").read_text()).get("mcpServers", {})
|
||||
|
||||
await adaptor.uninstall(_make_ctx(configs, plugin))
|
||||
|
||||
# Skills and rules removed by AgentskillsAdaptor delegation.
|
||||
assert not (configs / "skills" / "s").exists()
|
||||
assert not (configs / "CLAUDE.md").exists() or "# Plugin: my-mcp-plugin" not in (configs / "CLAUDE.md").read_text()
|
||||
# mcpServers intentionally kept.
|
||||
settings = json.loads((configs / ".claude" / "settings.json").read_text())
|
||||
assert "mcpServers" in settings
|
||||
assert "my-server" in settings["mcpServers"]
|
||||
|
||||
|
||||
async def test_mcp_server_adaptor_install_merges_with_existing_settings(tmp_path: Path):
|
||||
"""install() must deep-merge mcpServers with an already-populated settings.json."""
|
||||
plugin = tmp_path / "second-mcp"
|
||||
plugin.mkdir()
|
||||
(plugin / "settings-fragment.json").write_text(
|
||||
json.dumps({
|
||||
"mcpServers": {
|
||||
"github": {
|
||||
"command": "npx",
|
||||
"args": ["-y", "@github/github-mcp-server"],
|
||||
}
|
||||
}
|
||||
})
|
||||
)
|
||||
|
||||
configs = tmp_path / "configs"
|
||||
configs.mkdir()
|
||||
# Pre-existing settings.json with an mcpServer already present.
|
||||
claude_dir = configs / ".claude"
|
||||
claude_dir.mkdir(parents=True)
|
||||
(claude_dir / "settings.json").write_text(
|
||||
json.dumps({
|
||||
"mcpServers": {
|
||||
"firecrawl": {
|
||||
"command": "npx",
|
||||
"args": ["-y", "@firecrawl/firecrawl-mcp"],
|
||||
}
|
||||
}
|
||||
})
|
||||
)
|
||||
|
||||
await MCPServerAdaptor("second-mcp", "claude_code").install(_make_ctx(configs, plugin))
|
||||
|
||||
settings = json.loads((claude_dir / "settings.json").read_text())
|
||||
assert "firecrawl" in settings["mcpServers"]
|
||||
assert "github" in settings["mcpServers"]
|
||||
|
||||
|
||||
async def test_mcp_server_adaptor_install_also_handles_hooks(tmp_path: Path):
|
||||
"""An MCPServer plugin can also ship PreToolUse/PostToolUse hooks via the
|
||||
same settings-fragment.json; they must be merged without duplication."""
|
||||
plugin = tmp_path / "mcp-with-hooks"
|
||||
plugin.mkdir()
|
||||
(plugin / "hooks").mkdir(parents=True)
|
||||
(plugin / "hooks" / "lint.sh").write_text("#!/bin/bash\necho ok\n")
|
||||
(plugin / "hooks" / "lint.sh").chmod(0o755)
|
||||
(plugin / "settings-fragment.json").write_text(
|
||||
json.dumps({
|
||||
"mcpServers": {
|
||||
"my-server": {"command": "npx", "args": ["-y", "@x/server"]}
|
||||
},
|
||||
"hooks": {
|
||||
"PreToolUse": [
|
||||
{
|
||||
"matcher": "Bash",
|
||||
"hooks": [{"type": "command", "command": "${CLAUDE_DIR}/hooks/lint.sh"}],
|
||||
}
|
||||
]
|
||||
},
|
||||
})
|
||||
)
|
||||
|
||||
configs = tmp_path / "configs"
|
||||
configs.mkdir()
|
||||
await MCPServerAdaptor("mcp-with-hooks", "claude_code").install(_make_ctx(configs, plugin))
|
||||
|
||||
settings = json.loads((configs / ".claude" / "settings.json").read_text())
|
||||
assert "my-server" in settings["mcpServers"]
|
||||
assert len(settings["hooks"]["PreToolUse"]) == 1
|
||||
assert settings["hooks"]["PreToolUse"][0]["matcher"] == "Bash"
|
||||
|
||||
|
||||
import json # noqa: E402 — also used in new tests above
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user