+ )}
+ {/* Footer ID + composer belong to My Chat only. The Agent Comms
+ tab is a read-only peer/A2A feed (parity with desktop
+ ChatTab, where the agent-comms tabpanel has no composer). */}
+ {tab === "my" && (
+ <>
{/* Footer ID */}
+ >
+ )}
);
}
diff --git a/canvas/src/components/mobile/__tests__/MobileChat.test.tsx b/canvas/src/components/mobile/__tests__/MobileChat.test.tsx
index 7a0a8ca12..0c8e24595 100644
--- a/canvas/src/components/mobile/__tests__/MobileChat.test.tsx
+++ b/canvas/src/components/mobile/__tests__/MobileChat.test.tsx
@@ -21,6 +21,14 @@ import { MobileChat } from "../MobileChat";
vi.mock("@/lib/api");
import { api } from "@/lib/api";
+// AgentCommsPanel (mounted by the Agent Comms sub-tab, #231) subscribes
+// to the global socket via useSocketEvent. Stub it to a no-op so the
+// panel mounts without the real ReconnectingSocket — the parity tests
+// only assert the panel renders (vs the old static placeholder).
+vi.mock("@/hooks/useSocketEvent", () => ({
+ useSocketEvent: vi.fn(),
+}));
+
// ─── Mock store ───────────────────────────────────────────────────────────────
const mockAgentId = "ws-chat-test";
@@ -155,6 +163,12 @@ beforeEach(() => {
mockOnBack.mockClear();
mockStoreState.nodes = [];
mockStoreState.agentMessages = {};
+ // jsdom doesn't implement scrollIntoView. The Agent Comms tab now
+ // mounts AgentCommsPanel (#231), which scrolls its feed to bottom on
+ // arrival; a no-op stub keeps the panel from throwing under jsdom
+ // (same stub AgentCommsPanel's own render test installs).
+ Element.prototype.scrollIntoView =
+ vi.fn() as unknown as Element["scrollIntoView"];
// Set up spies on the real api methods. Tests override these per-call.
const getSpy = vi.spyOn(api, "get");
const postSpy = vi.spyOn(api, "post");
@@ -474,3 +488,146 @@ describe("MobileChat — chat history", () => {
expect(getSpy).toHaveBeenCalledTimes(2);
});
});
+
+// ─── #232 · Attachment render parity with desktop ChatTab ────────────────────
+//
+// Regression for the CTO-reported mobile bug: MobileChat used to render
+// only m.content (no attachment surface), so files sent/received in a
+// conversation were invisible on mobile while desktop showed them. The
+// fix routes m.attachments through the same AttachmentPreview the
+// desktop ChatTab bubble uses.
+
+describe("MobileChat — attachment render parity (#232)", () => {
+ beforeEach(() => {
+ mockStoreState.nodes = [onlineNode];
+ });
+
+ it("renders an attachment from a history message via AttachmentPreview", async () => {
+ const getSpy = vi.spyOn(api, "get");
+ // useChatHistory reads { messages, reached_end }.
+ getSpy.mockResolvedValueOnce({
+ messages: [
+ {
+ id: "m-att-1",
+ role: "agent",
+ content: "Here is the report",
+ attachments: [
+ {
+ name: "report.csv",
+ uri: "workspace://out/report.csv",
+ mimeType: "text/csv",
+ size: 2048,
+ },
+ ],
+ timestamp: new Date().toISOString(),
+ },
+ ],
+ reached_end: true,
+ });
+
+ let rr: ReturnType;
+ await act(async () => {
+ rr = renderChat(mockAgentId);
+ });
+ const { container } = rr!;
+
+ // A non-image attachment renders the AttachmentChip download button
+ // with title="Download " — same component the desktop bubble
+ // dispatches through AttachmentPreview.
+ await waitFor(() => {
+ const chip = container.querySelector('[title="Download report.csv"]');
+ expect(chip).toBeTruthy();
+ });
+ expect(container.textContent ?? "").toContain("report.csv");
+ });
+});
+
+// ─── #231 · Agent Comms (A2A/peer) render parity with desktop ChatTab ────────
+//
+// Regression for the CTO-reported mobile bug: the Agent Comms sub-tab
+// rendered a static placeholder string ("peer-to-peer A2A traffic
+// surfaces in the Comms tab") instead of the real feed. The fix mounts
+// the same AgentCommsPanel the desktop ChatTab agent-comms tabpanel
+// uses, so peer/A2A + delegation activity is visible on mobile.
+
+describe("MobileChat — Agent Comms render parity (#231)", () => {
+ beforeEach(() => {
+ mockStoreState.nodes = [onlineNode];
+ });
+
+ it("mounts AgentCommsPanel on the Agent Comms tab (not the old placeholder)", async () => {
+ const getSpy = vi.spyOn(api, "get");
+ // 1st GET: useChatHistory (My Chat) on mount.
+ getSpy.mockResolvedValueOnce({ messages: [], reached_end: true });
+ // 2nd GET: AgentCommsPanel's activity load when the tab is shown.
+ // Empty list → panel renders its own empty state, which still
+ // proves AgentCommsPanel mounted (vs. the removed placeholder).
+ getSpy.mockResolvedValueOnce([]);
+
+ let rr: ReturnType;
+ await act(async () => {
+ rr = renderChat(mockAgentId);
+ });
+ const { container } = rr!;
+
+ const commsTab = Array.from(container.querySelectorAll("button")).find(
+ (b) => b.textContent?.trim() === "Agent Comms",
+ );
+ expect(commsTab).toBeTruthy();
+ await act(async () => {
+ commsTab!.click();
+ });
+
+ await waitFor(() => {
+ const text = container.textContent ?? "";
+ // The panel's empty state — proves AgentCommsPanel mounted.
+ expect(text).toContain("No agent-to-agent communications yet.");
+ });
+ // The old hard-coded placeholder must be gone.
+ expect(container.textContent ?? "").not.toContain(
+ "peer-to-peer A2A traffic surfaces in the Comms tab",
+ );
+ // The panel hit its activity endpoint.
+ expect(getSpy).toHaveBeenCalledWith(
+ expect.stringContaining(`/workspaces/${mockAgentId}/activity`),
+ );
+ });
+
+ it("renders a peer message on the Agent Comms tab", async () => {
+ const getSpy = vi.spyOn(api, "get");
+ getSpy.mockResolvedValueOnce({ messages: [], reached_end: true });
+ // a2a_receive from a peer → AgentCommsPanel.toCommMessage maps it
+ // to an inbound bubble with the request text.
+ getSpy.mockResolvedValueOnce([
+ {
+ id: "act-1",
+ activity_type: "a2a_receive",
+ source_id: "peer-ws-uuid",
+ target_id: mockAgentId,
+ method: "message/send",
+ summary: "peer asked something",
+ request_body: { task: "Please review PR 42" },
+ response_body: null,
+ status: "ok",
+ created_at: new Date().toISOString(),
+ },
+ ]);
+
+ let rr: ReturnType;
+ await act(async () => {
+ rr = renderChat(mockAgentId);
+ });
+ const { container } = rr!;
+
+ const commsTab = Array.from(container.querySelectorAll("button")).find(
+ (b) => b.textContent?.trim() === "Agent Comms",
+ );
+ await act(async () => {
+ commsTab!.click();
+ });
+
+ await waitFor(() => {
+ expect(container.textContent ?? "").toContain("Please review PR 42");
+ });
+ });
+});
diff --git a/canvas/src/components/settings/SecretRow.tsx b/canvas/src/components/settings/SecretRow.tsx
index 92bb633ea..10339e869 100644
--- a/canvas/src/components/settings/SecretRow.tsx
+++ b/canvas/src/components/settings/SecretRow.tsx
@@ -3,16 +3,24 @@ import { useState, useCallback, useRef, useEffect } from 'react';
import type { Secret, SecretGroup } from '@/types/secrets';
import { useSecretsStore } from '@/stores/secrets-store';
import { StatusBadge } from '@/components/ui/StatusBadge';
-import { RevealToggle } from '@/components/ui/RevealToggle';
import { KeyValueField } from '@/components/ui/KeyValueField';
import { ValidationHint } from '@/components/ui/ValidationHint';
import { TestConnectionButton } from '@/components/ui/TestConnectionButton';
import { validateSecretValue } from '@/lib/validation/secret-formats';
import { SERVICES } from '@/lib/services';
-const AUTO_HIDE_MS = 30_000;
const VALIDATION_DEBOUNCE_MS = 400;
+// Secret values are write-only from the browser: the server List endpoint
+// "Never exposes values", there is no per-secret decrypt route, and the
+// only decrypted path (GET /secrets/values) is bulk + token-gated for
+// remote agents. The old eye/RevealToggle was a dead affordance — it
+// flipped its own icon but could never reveal anything, which read as
+// "this doesn't work" (esp. once clicked → eye-with-slash). We show an
+// honest static indicator instead; rotation is via Edit.
+const WRITE_ONLY_TITLE =
+ 'Value is write-only and cannot be revealed — use Edit to replace/rotate it';
+
interface SecretRowProps {
secret: Secret;
workspaceId: string;
@@ -31,28 +39,12 @@ export function SecretRow({ secret, workspaceId }: SecretRowProps) {
const setSecretStatus = useSecretsStore((s) => s.setSecretStatus);
const isEditing = editingKey === secret.name;
- const [revealed, setRevealed] = useState(false);
const [editValue, setEditValue] = useState('');
const [validationError, setValidationError] = useState(null);
const [isSaving, setIsSaving] = useState(false);
const [saveError, setSaveError] = useState(null);
const debounceRef = useRef>(undefined);
const editBtnRef = useRef(null);
- const revealTimerRef = useRef>(undefined);
-
- // Auto-hide revealed value after 30s
- useEffect(() => {
- if (revealed) {
- clearTimeout(revealTimerRef.current);
- revealTimerRef.current = setTimeout(() => setRevealed(false), AUTO_HIDE_MS);
- return () => clearTimeout(revealTimerRef.current);
- }
- }, [revealed]);
-
- // Reset revealed state when panel closes (session-only)
- useEffect(() => {
- return () => setRevealed(false);
- }, []);
// Debounced validation
useEffect(() => {
@@ -133,11 +125,15 @@ export function SecretRow({ secret, workspaceId }: SecretRowProps) {
{secret.masked_value}
+ );
+ }
+ return ;
+}
+
+function WorkspaceTokensTab({ workspaceId }: TokensTabProps) {
const [tokens, setTokens] = useState([]);
const [loading, setLoading] = useState(true);
const [creating, setCreating] = useState(false);
diff --git a/canvas/src/components/settings/__tests__/SecretRow.test.tsx b/canvas/src/components/settings/__tests__/SecretRow.test.tsx
index 1db10a82b..6763355be 100644
--- a/canvas/src/components/settings/__tests__/SecretRow.test.tsx
+++ b/canvas/src/components/settings/__tests__/SecretRow.test.tsx
@@ -138,14 +138,54 @@ describe("SecretRow — display mode", () => {
expect(document.querySelector('[role="row"]')).toBeTruthy();
});
- it("has Reveal, Copy, Edit, Delete buttons", () => {
+ it("has Copy, Edit, Delete buttons", () => {
render();
- expect(screen.getByTestId("reveal-toggle")).toBeTruthy();
expect(screen.getByRole("button", { name: /copy/i })).toBeTruthy();
expect(screen.getByRole("button", { name: /edit/i })).toBeTruthy();
expect(screen.getByRole("button", { name: /delete/i })).toBeTruthy();
});
+ // Regression: the reveal/eye control was a dead affordance. Clicking it
+ // flipped its own icon (eye → eye-with-slash) but never revealed the value,
+ // because secret values are write-only from the browser (server List
+ // "Never exposes values"; there is no per-secret decrypt endpoint and the
+ // client has no plaintext-fetch function). The honest fix removes the
+ // toggle and shows a static "write-only / cannot be revealed" indicator.
+ // See internal tracking issue + internal#210/#211.
+ it("does NOT render a reveal/eye toggle (values are write-only)", () => {
+ render();
+ expect(screen.queryByTestId("reveal-toggle")).toBeNull();
+ expect(
+ screen.queryByRole("button", { name: /toggle reveal/i }),
+ ).toBeNull();
+ });
+
+ it("shows a write-only indicator explaining the value cannot be revealed", () => {
+ render();
+ const indicator = screen.getByTestId("write-only-indicator");
+ expect(indicator).toBeTruthy();
+ // Affordance must be honest: explain it cannot be revealed and that
+ // Edit is the rotate path. It must not be a clickable button.
+ const title = indicator.getAttribute("title") ?? "";
+ expect(title.toLowerCase()).toMatch(/write-only|cannot be revealed/);
+ expect(indicator.tagName).not.toBe("BUTTON");
+ });
+
+ it("write-only indicator is present for the Anthropic/OAuth-token row too", () => {
+ // The reported bug singled out CLAUDE_CODE_OAUTH_TOKEN (anthropic group);
+ // the fix is group-agnostic — every row gets the same honest affordance.
+ const OAUTH_SECRET = {
+ name: "CLAUDE_CODE_OAUTH_TOKEN",
+ masked_value: "••••••••••••••••9d2a",
+ group: "anthropic" as const,
+ status: "unverified" as const,
+ updated_at: "2024-01-04",
+ };
+ render();
+ expect(screen.queryByTestId("reveal-toggle")).toBeNull();
+ expect(screen.getByTestId("write-only-indicator")).toBeTruthy();
+ });
+
it("shows invalid status correctly", () => {
render();
expect(screen.getByTestId("status-badge").getAttribute("data-status")).toBe("invalid");
diff --git a/canvas/src/components/settings/__tests__/TokensTab.test.tsx b/canvas/src/components/settings/__tests__/TokensTab.test.tsx
index cb923de55..f9409583b 100644
--- a/canvas/src/components/settings/__tests__/TokensTab.test.tsx
+++ b/canvas/src/components/settings/__tests__/TokensTab.test.tsx
@@ -302,3 +302,35 @@ describe("TokensTab — error", () => {
expect(document.querySelector('[role="status"]')).toBeNull();
});
});
+
+// ─── "global" sentinel (no node selected) ────────────────────────────────────
+//
+// Regression: SettingsPanel passes the literal "global" when no canvas
+// node is selected. workspace tokens are per-workspace and there is no
+// /workspaces/global/tokens endpoint — calling it 500'd
+// ("invalid input syntax for type uuid: global"). The tab must NOT call
+// the API in that state and must point the user at the Org API Keys tab.
+describe("TokensTab — global sentinel (no node selected)", () => {
+ beforeEach(() => {
+ mockApiGet.mockReset();
+ mockApiPost.mockReset();
+ mockApiGet.mockRejectedValue(new Error("should not be called"));
+ });
+
+ it("does not call the API and shows a pointer to Org API Keys", async () => {
+ render();
+ await flush();
+ expect(mockApiGet).not.toHaveBeenCalled();
+ expect(mockApiPost).not.toHaveBeenCalled();
+ expect(document.body.textContent).toContain("Select a workspace node");
+ expect(document.body.textContent).toContain("Org API Keys");
+ // No error banner, no scary 500 surfacing.
+ expect(document.querySelector(".text-bad")).toBeNull();
+ });
+
+ it("has no create button in the global state", async () => {
+ render();
+ await flush();
+ expect(document.body.textContent).not.toContain("New Token");
+ });
+});
diff --git a/canvas/src/components/tabs/FilesTab.tsx b/canvas/src/components/tabs/FilesTab.tsx
index caf222795..196551da4 100644
--- a/canvas/src/components/tabs/FilesTab.tsx
+++ b/canvas/src/components/tabs/FilesTab.tsx
@@ -45,11 +45,54 @@ export function FilesTab({ workspaceId, data }: Props) {
if (data && isExternalLikeRuntime(data.runtime)) {
return ;
}
- return ;
+ return ;
}
-function PlatformOwnedFilesTab({ workspaceId }: { workspaceId: string }) {
- const [root, setRoot] = useState("/configs");
+/** Picks the initial root for the FilesTab dropdown based on the
+ * workspace's runtime. Decision: per-runtime default (Hongming
+ * 2026-05-15, internal#425 Decisions §2).
+ *
+ * - openclaw → `/agent-home` (the agent's identity/state — the
+ * user-facing interesting files for that runtime live in
+ * `~/.openclaw/` inside the container, which `/agent-home` maps to
+ * via the Phase 2b docker-exec backend).
+ * - everything else (claude-code, hermes, external-like, undefined)
+ * → `/configs` (the legacy default — managed config that flows
+ * through the per-runtime indirection in
+ * workspace-server/internal/handlers/template_files_eic.go).
+ *
+ * When the runtime is undefined (legacy callers that don't thread
+ * `data` through, or a workspace whose runtime field hasn't loaded
+ * yet) the default is `/configs` — matches today's behaviour, no
+ * surprise.
+ *
+ * Note on `/agent-home` pre-Phase-2b: the backend short-circuits
+ * with HTTP 501 and the canonical "implementation pending" body.
+ * The tab renders empty + the error banner explains. This is by
+ * design — lets us land the canvas UX before the backend ships,
+ * per the RFC's phased rollout. The 501 is graceful: it doesn't
+ * poison error toasts or generate "workspace not found" noise.
+ *
+ * Adding a new runtime that should default to `/agent-home`: add it
+ * to the agentHomeDefaultRuntimes set below. Adding a runtime that
+ * should default to a different root: extend this function. */
+const agentHomeDefaultRuntimes = new Set(["openclaw"]);
+
+function defaultRootForRuntime(runtime: string | undefined): string {
+ if (runtime && agentHomeDefaultRuntimes.has(runtime)) {
+ return "/agent-home";
+ }
+ return "/configs";
+}
+
+function PlatformOwnedFilesTab({
+ workspaceId,
+ runtime,
+}: {
+ workspaceId: string;
+ runtime?: string;
+}) {
+ const [root, setRoot] = useState(() => defaultRootForRuntime(runtime));
const [selectedFile, setSelectedFile] = useState(null);
const [fileContent, setFileContent] = useState("");
const [editContent, setEditContent] = useState("");
diff --git a/canvas/src/components/tabs/FilesTab/FileEditor.tsx b/canvas/src/components/tabs/FilesTab/FileEditor.tsx
index db5301c5d..3e51356e6 100644
--- a/canvas/src/components/tabs/FilesTab/FileEditor.tsx
+++ b/canvas/src/components/tabs/FilesTab/FileEditor.tsx
@@ -3,6 +3,22 @@
import { useRef } from "react";
import { getIcon } from "./tree";
+// secretShapeMarker is the canonical body the workspace-server Files
+// API returns when a file's path OR content matched a credential
+// regex (internal#425 RFC, Phase 2b — backed by
+// workspace-server/internal/secrets.ScanBytes). The marker is a
+// fixed prefix so the canvas can detect it without parsing JSON and
+// without round-tripping the matched bytes through the editor (which
+// would defeat the purpose — clipboard, browser history, log
+// surfaces would all see them).
+//
+// Today (Phase 1 / before 2b ships) the backend returns 501 for the
+// only root that uses this path, so the marker is dead code until
+// 2b lands. Wiring it in now keeps the canvas + backend contracts
+// aligned in one PR rather than a follow-up. The constant is
+// importable so a future test can pin the exact string.
+export const SECRET_SHAPE_DENIED_MARKER = "";
+
interface Props {
selectedFile: string | null;
fileContent: string;
@@ -31,6 +47,22 @@ export function FileEditor({
const editorRef = useRef(null);
const isDirty = editContent !== fileContent;
+ // internal#425 Phase 3: detect the secret-shape denial marker and
+ // render a placeholder instead of the editor. The marker comes
+ // from workspace-server Phase 2b (secrets.ScanBytes) which refuses
+ // to surface the file's bytes. We deliberately don't expose
+ // the matched pattern's Name here — the canvas just shows the
+ // generic denial. The Files API log surface has the Pattern.Name
+ // for operators who need to debug a false positive.
+ const isSecretShapeDenied = fileContent === SECRET_SHAPE_DENIED_MARKER;
+
+ // /agent-home is read-only from the canvas (Phase 2b ships read +
+ // delete; Phase-2b-followup may add write). Edits to /configs are
+ // unchanged. Until 2b ships, /agent-home returns 501 so this
+ // read-only gate is also dead code, but wiring it in now keeps
+ // the UI honest the moment 2b lands without a follow-up canvas PR.
+ const isReadOnlyRoot = root !== "/configs";
+
if (!selectedFile) {
return (
@@ -75,11 +107,42 @@ export function FileEditor({
{/* Editor area */}
{loadingFile ? (
Loading...
+ ) : isSecretShapeDenied ? (
+ // Files API refused to surface this file's bytes because its
+ // path or content matched a credential regex
+ // (workspace-server/internal/secrets, internal#425 Phase 2b).
+ // We render a placeholder INSTEAD OF the textarea so the
+ // matched bytes never enter the DOM. Clipboard / view-source
+ // / element-inspector all see the placeholder, not the
+ // credential.
+
+
+
🛡️
+
+ {SECRET_SHAPE_DENIED_MARKER}
+
+
+ The platform refused to surface this file because its
+ path or content matched a credential-shape pattern.
+ The bytes never left the workspace container.
+
+
+ If this is a false positive (test fixture, docs example,
+ or content that happens to share a credential's shape),
+ rename the file or adjust the content via the workspace
+ terminal so the regex no longer matches, then refresh.
+
+
+
) : (
diff --git a/canvas/src/components/tabs/FilesTab/__tests__/agentHome.test.tsx b/canvas/src/components/tabs/FilesTab/__tests__/agentHome.test.tsx
new file mode 100644
index 000000000..0bb4005de
--- /dev/null
+++ b/canvas/src/components/tabs/FilesTab/__tests__/agentHome.test.tsx
@@ -0,0 +1,181 @@
+// @vitest-environment jsdom
+/**
+ * Tests for the /agent-home root selector + per-runtime default-root
+ * + secret-shape denial placeholder (internal#425 Phase 3).
+ *
+ * Separate file so the diff is reviewable as a unit and the existing
+ * FilesToolbar / FileEditor / FilesTab tests don't have to grow
+ * agent-home-specific cases. Once Phase 2b lands, the read-only +
+ * 501-stub assertions here can be tightened (or moved into the main
+ * test file as the agent-home root becomes a first-class affordance).
+ */
+import React from "react";
+import { render, screen, cleanup } from "@testing-library/react";
+import { afterEach, describe, expect, it, vi } from "vitest";
+
+import { FilesToolbar } from "../FilesToolbar";
+import {
+ FileEditor,
+ SECRET_SHAPE_DENIED_MARKER,
+} from "../FileEditor";
+
+afterEach(cleanup);
+
+describe("internal#425 Phase 3 — /agent-home root selector", () => {
+ it("dropdown includes /agent-home as an option", () => {
+ // Pins the affordance is in the DOM even pre-Phase-2b — the
+ // canvas design freezes today, the backend lands the dispatch
+ // later. Without this, a future refactor that drops the option
+ // would silently regress the RFC's Phase 1 contract (canvas
+ // visibility) without breaking any other test.
+ render(
+ ,
+ );
+ const select = screen.getByRole("combobox", {
+ name: /file root directory/i,
+ }) as HTMLSelectElement;
+ const values = Array.from(select.options).map((o) => o.value);
+ expect(values).toContain("/agent-home");
+ });
+
+ it("dropdown shows /agent-home as the SELECTED root when prop is /agent-home", () => {
+ render(
+ ,
+ );
+ const select = screen.getByRole("combobox", {
+ name: /file root directory/i,
+ }) as HTMLSelectElement;
+ expect(select.value).toBe("/agent-home");
+ });
+});
+
+describe("internal#425 Phase 3 — secret-shape denial placeholder", () => {
+ // Files API Phase 2b returns SECRET_SHAPE_DENIED_MARKER as the file
+ // body when the file's path or content matched a credential regex.
+ // The editor MUST render the marker as a placeholder, not pump it
+ // through the textarea — that would put the marker (and any future
+ // matched bytes if the backend contract changes) into the DOM
+ // value, clipboard, and inspector.
+
+ it("renders the denial placeholder INSTEAD of the textarea when fileContent is the marker", () => {
+ render(
+ ,
+ );
+ // Placeholder region present
+ expect(
+ screen.getByRole("region", { name: /file content denied/i }),
+ ).toBeTruthy();
+ // Marker text visible (so a debugging operator sees the canonical
+ // contract string without having to dig into the source).
+ expect(screen.getByText(SECRET_SHAPE_DENIED_MARKER)).toBeTruthy();
+ // Critically: NO textarea — the bytes never reach a controlled
+ // input. A regression that re-introduces the textarea path would
+ // make the matched marker (and any future content) selectable +
+ // copyable.
+ expect(screen.queryByRole("textbox")).toBeNull();
+ });
+
+ it("renders the textarea normally when fileContent is regular content", () => {
+ render(
+ ,
+ );
+ expect(screen.getByRole("textbox")).toBeTruthy();
+ expect(screen.queryByRole("region", { name: /file content denied/i }))
+ .toBeNull();
+ });
+
+ it("/agent-home renders textarea READ-ONLY for non-denied content", () => {
+ // Phase 2b ships read + delete on /agent-home; write semantics
+ // are decided later. Until then, the canvas presents the editor
+ // as read-only so a user can't type into a buffer that the
+ // backend will refuse to PUT. Without this gate, the user would
+ // edit, hit Save, get a 501, and lose their context for why.
+ render(
+ ,
+ );
+ const textarea = screen.getByRole("textbox") as HTMLTextAreaElement;
+ expect(textarea.readOnly).toBe(true);
+ });
+
+ it("/configs renders textarea WRITABLE (regression guard for the read-only gate)", () => {
+ render(
+ ,
+ );
+ const textarea = screen.getByRole("textbox") as HTMLTextAreaElement;
+ expect(textarea.readOnly).toBe(false);
+ });
+});
+
+describe("internal#425 Phase 3 — marker constant is the canonical string", () => {
+ // The marker string is part of the canvas <-> workspace-server
+ // contract. The workspace-server emits this exact body; the canvas
+ // detects it by exact-equality. A typo on either side would
+ // silently break detection — the canvas would render the literal
+ // string in the textarea instead of the placeholder. Pin the
+ // contract value here.
+ it("matches the contract value ''", () => {
+ expect(SECRET_SHAPE_DENIED_MARKER).toBe("");
+ });
+});
diff --git a/canvas/src/components/ui/TestConnectionButton.tsx b/canvas/src/components/ui/TestConnectionButton.tsx
index 940c06e49..a85714ef8 100644
--- a/canvas/src/components/ui/TestConnectionButton.tsx
+++ b/canvas/src/components/ui/TestConnectionButton.tsx
@@ -2,7 +2,7 @@
import { useState, useCallback, useRef, useEffect } from 'react';
import type { TestConnectionState, SecretGroup } from '@/types/secrets';
-import { validateSecret } from '@/lib/api/secrets';
+import { validateSecret, ApiError } from '@/lib/api/secrets';
interface TestConnectionButtonProps {
provider: SecretGroup;
@@ -55,9 +55,23 @@ export function TestConnectionButton({
}
onResult?.(result.valid);
resetTimerRef.current = setTimeout(() => setState('idle'), RESET_DELAYS[nextState]!);
- } catch {
+ } catch (err) {
+ // Distinguish a real failure shape rather than always claiming a
+ // timeout. A reachable server that answered with an HTTP status
+ // (ApiError) did NOT time out — most commonly the validation route
+ // is not available (404/501), which must not masquerade as
+ // "service down". Only an actual thrown network/abort error is a
+ // connectivity failure.
setState('failure');
- setErrorDetail('Connection timed out. Service may be down.');
+ if (err instanceof ApiError) {
+ setErrorDetail(
+ err.status === 404 || err.status === 501
+ ? 'Key validation is not available for this service yet. The key was not tested.'
+ : `Could not verify key (server returned ${err.status}). Saving is unaffected.`,
+ );
+ } else {
+ setErrorDetail('Could not reach the validation service. Check your connection and try again.');
+ }
onResult?.(false);
resetTimerRef.current = setTimeout(() => setState('idle'), RESET_DELAYS.failure);
}
diff --git a/canvas/src/components/ui/__tests__/TestConnectionButton.test.tsx b/canvas/src/components/ui/__tests__/TestConnectionButton.test.tsx
index 62d61ff11..21114ef1d 100644
--- a/canvas/src/components/ui/__tests__/TestConnectionButton.test.tsx
+++ b/canvas/src/components/ui/__tests__/TestConnectionButton.test.tsx
@@ -28,8 +28,20 @@ const mockValidateSecret = vi.fn();
vi.mock("@/lib/api/secrets", () => ({
validateSecret: (...args: unknown[]) => mockValidateSecret(...args),
+ ApiError: class ApiError extends Error {
+ status: number;
+ constructor(status: number, message: string) {
+ super(message);
+ this.name = "ApiError";
+ this.status = status;
+ }
+ },
}));
+// Re-import the mocked ApiError so test cases construct the same class the
+// component's `instanceof` check sees.
+import { ApiError } from "@/lib/api/secrets";
+
beforeEach(() => {
vi.useFakeTimers();
vi.clearAllMocks();
@@ -201,8 +213,27 @@ describe("TestConnectionButton — failure path", () => {
});
describe("TestConnectionButton — catch path", () => {
- it("shows 'Connection timed out' on network error", async () => {
- mockValidateSecret.mockRejectedValue(new Error("timeout"));
+ it("does NOT claim a timeout when the validate endpoint 404s (regression: internal#492)", async () => {
+ // The validate route is unimplemented on the server and returns a fast
+ // 404. Before the fix this rendered the misleading hardcoded string
+ // "Connection timed out. Service may be down." It must instead state
+ // honestly that validation isn't available and the key was not tested.
+ mockValidateSecret.mockRejectedValue(new ApiError(404, "Not Found"));
+ render(
+ ,
+ );
+ fireEvent.click(document.querySelector('button[type="button"]')!);
+ await act(async () => {
+ await vi.advanceTimersByTimeAsync(0);
+ });
+ expect(document.body.textContent).not.toContain("Connection timed out");
+ expect(document.body.textContent).not.toContain("Service may be down");
+ expect(document.body.textContent).toContain("not available");
+ expect(document.body.textContent).toContain("not tested");
+ });
+
+ it("reports a non-404 server error with its status, not a timeout", async () => {
+ mockValidateSecret.mockRejectedValue(new ApiError(500, "Internal Server Error"));
render(
,
);
@@ -210,7 +241,20 @@ describe("TestConnectionButton — catch path", () => {
await act(async () => {
await vi.advanceTimersByTimeAsync(0);
});
- expect(document.body.textContent).toContain("Connection timed out");
+ expect(document.body.textContent).toContain("500");
+ expect(document.body.textContent).not.toContain("Connection timed out");
+ });
+
+ it("shows a connectivity message on a genuine network error", async () => {
+ mockValidateSecret.mockRejectedValue(new Error("network down"));
+ render(
+ ,
+ );
+ fireEvent.click(document.querySelector('button[type="button"]')!);
+ await act(async () => {
+ await vi.advanceTimersByTimeAsync(0);
+ });
+ expect(document.body.textContent).toContain("Could not reach the validation service");
});
it("calls onResult(false) on network error", async () => {
diff --git a/workspace-server/internal/handlers/a2a_proxy.go b/workspace-server/internal/handlers/a2a_proxy.go
index 8fbef20c6..c06c553fd 100644
--- a/workspace-server/internal/handlers/a2a_proxy.go
+++ b/workspace-server/internal/handlers/a2a_proxy.go
@@ -399,7 +399,21 @@ func (h *WorkspaceHandler) proxyA2ARequest(ctx context.Context, workspaceID stri
// (no Do(), no maybeMarkContainerDead). The response is a synthetic
// {status:"queued"} envelope so the caller (canvas, another workspace)
// knows delivery is acknowledged but pending consumption.
- if lookupDeliveryMode(ctx, workspaceID) == models.DeliveryModePoll {
+ deliveryMode, deliveryModeErr := lookupDeliveryMode(ctx, workspaceID)
+ if deliveryModeErr != nil {
+ // internal#497 fail-closed: a real DB/context error on the
+ // delivery-mode read MUST NOT silently fall through to the push
+ // dispatch path — that is exactly what silently misrouted every
+ // poll-mode peer for 5 days under the ce2db75f regression. Surface
+ // a structured error so the delegation is marked failed (loud +
+ // retryable) instead of dispatched to the wrong path.
+ log.Printf("ProxyA2A: delivery-mode lookup failed for %s: %v — failing closed", workspaceID, deliveryModeErr)
+ return 0, nil, &proxyA2AError{
+ Status: http.StatusServiceUnavailable,
+ Response: gin.H{"error": "delivery-mode lookup failed; refusing to dispatch to avoid silent misrouting"},
+ }
+ }
+ if deliveryMode == models.DeliveryModePoll {
if logActivity {
h.logA2AReceiveQueued(ctx, workspaceID, callerID, body, a2aMethod)
}
diff --git a/workspace-server/internal/handlers/a2a_proxy_helpers.go b/workspace-server/internal/handlers/a2a_proxy_helpers.go
index 8145a66a1..08fd21020 100644
--- a/workspace-server/internal/handlers/a2a_proxy_helpers.go
+++ b/workspace-server/internal/handlers/a2a_proxy_helpers.go
@@ -194,6 +194,11 @@ func (h *WorkspaceHandler) maybeMarkContainerDead(ctx context.Context, workspace
}
db.ClearWorkspaceKeys(ctx, workspaceID)
h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceOffline), workspaceID, map[string]interface{}{})
+ // Tracked via goAsync (not bare `go`) so the asyncWG can be drained
+ // before a test swaps the global db.DB. runRestartCycle reads db.DB
+ // before its provisioner gate, so an untracked detached goroutine
+ // races setupTestDB's t.Cleanup db.DB restore. Matches the already-
+ // correct site at a2a_proxy.go:648.
h.goAsync(func() { h.RestartByID(workspaceID) })
return true
}
@@ -241,6 +246,9 @@ func (h *WorkspaceHandler) preflightContainerHealth(ctx context.Context, workspa
}
db.ClearWorkspaceKeys(ctx, workspaceID)
h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceOffline), workspaceID, map[string]interface{}{})
+ // Tracked via goAsync (see maybeMarkContainerDead): preflight's
+ // detached restart must be drainable so it doesn't race the global
+ // db.DB swap in test cleanup.
h.goAsync(func() { h.RestartByID(workspaceID) })
return &proxyA2AError{
Status: http.StatusServiceUnavailable,
@@ -262,8 +270,9 @@ func (h *WorkspaceHandler) logA2AFailure(ctx context.Context, workspaceID, calle
errWsName = workspaceID
}
summary := "A2A request to " + errWsName + " failed: " + errMsg
+ parent := ctx
h.goAsync(func() {
- logCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), 30*time.Second)
+ logCtx, cancel := context.WithTimeout(context.WithoutCancel(parent), 30*time.Second)
defer cancel()
LogActivity(logCtx, h.broadcaster, ActivityParams{
WorkspaceID: workspaceID,
@@ -309,8 +318,9 @@ func (h *WorkspaceHandler) logA2ASuccess(ctx context.Context, workspaceID, calle
}
summary := a2aMethod + " → " + wsNameForLog
toolTrace := extractToolTrace(respBody)
+ parent := ctx
h.goAsync(func() {
- logCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), 30*time.Second)
+ logCtx, cancel := context.WithTimeout(context.WithoutCancel(parent), 30*time.Second)
defer cancel()
LogActivity(logCtx, h.broadcaster, ActivityParams{
WorkspaceID: workspaceID,
@@ -458,40 +468,64 @@ func parseUsageFromA2AResponse(body []byte) (inputTokens, outputTokens int64) {
return 0, 0
}
-// lookupDeliveryMode returns the workspace's delivery_mode. On any DB
-// error or missing row it returns DeliveryModePush — the fail-closed
-// default. "Closed" here means "fall back to today's behavior (synchronous
-// dispatch)" rather than "fall back to drop the request silently into
-// activity_logs where the agent might never see it." A poll-mode workspace
-// that briefly reads as push will get its A2A request dispatched to the
-// stored URL (or a 502 if no URL); a push-mode workspace that briefly
-// reads as poll would get its request silently queued with no dispatch.
-// The first failure is loud + recoverable; the second is silent.
+// lookupDeliveryMode returns the workspace's delivery_mode.
+//
+// internal#497 / RFC#497 fail-closed (SURGICAL scope): the *specific*
+// failure mode that hid the ce2db75f regression for 5 days is now
+// propagated instead of silently swallowed — a CONTEXT error
+// (context.Canceled / context.DeadlineExceeded). Under ce2db75f the
+// detached delegation goroutine ran on a cancelled request context, every
+// `SELECT delivery_mode` failed `context canceled`, this function returned
+// push, the poll-mode short-circuit in proxyA2ARequest was skipped, and
+// poll-mode peers (e.g. an operator laptop on molecule-mcp-claude-channel)
+// silently never got their a2a_receive inbox row. A transient,
+// systematic-once-triggered context cancellation became permanent
+// invisible misrouting. Returning that error lets the caller fail loud
+// (mark the delegation failed) instead of mis-dispatching.
+//
+// Scope is deliberately narrow: only ctx errors propagate. Other DB
+// errors retain the long-standing documented "fall back to push (today's
+// synchronous behavior)" contract — that path is loud + recoverable
+// (502 / SSRF reject / restart), unlike the silent poll-mode drop, and
+// the surrounding proxy (incl. the sibling checkWorkspaceBudget) is
+// intentionally built around that fail-open-to-push behavior. Widening
+// further is an RFC#497 follow-up, not part of this P0 fix.
+//
+// A genuinely *absent* configuration is NOT an error and still resolves to
+// push (the safe synchronous default): sql.ErrNoRows, a NULL/empty column,
+// or an unrecognised value all return (push, nil).
//
// The function is intentionally lookup-only — it never mutates the row.
// The register handler (registry.go) is the only writer for delivery_mode.
//
// See #2339 PR 1 for the column + register-flow side; this is the
// proxy-side read used for the short-circuit in proxyA2ARequest.
-func lookupDeliveryMode(ctx context.Context, workspaceID string) string {
+func lookupDeliveryMode(ctx context.Context, workspaceID string) (string, error) {
var mode sql.NullString
err := db.DB.QueryRowContext(ctx,
`SELECT delivery_mode FROM workspaces WHERE id = $1`, workspaceID,
).Scan(&mode)
if err != nil {
- if !errors.Is(err, sql.ErrNoRows) {
- log.Printf("ProxyA2A: lookupDeliveryMode(%s) failed (%v) — defaulting to push", workspaceID, err)
+ // internal#497: a context cancellation/deadline MUST NOT be
+ // swallowed into a silent push default — that is the exact 5-day
+ // silent-misrouting vector. Propagate so the caller fails closed.
+ if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
+ log.Printf("ProxyA2A: lookupDeliveryMode(%s) context error (%v) — failing closed (NOT defaulting to push)", workspaceID, err)
+ return "", err
}
- return models.DeliveryModePush
+ if !errors.Is(err, sql.ErrNoRows) {
+ log.Printf("ProxyA2A: lookupDeliveryMode(%s) failed (%v) — defaulting to push (non-ctx DB error; legacy fail-open-to-push contract)", workspaceID, err)
+ }
+ return models.DeliveryModePush, nil
}
if !mode.Valid || mode.String == "" {
- return models.DeliveryModePush
+ return models.DeliveryModePush, nil
}
if !models.IsValidDeliveryMode(mode.String) {
log.Printf("ProxyA2A: workspace %s has invalid delivery_mode=%q — defaulting to push", workspaceID, mode.String)
- return models.DeliveryModePush
+ return models.DeliveryModePush, nil
}
- return mode.String
+ return mode.String, nil
}
// logA2AReceiveQueued records a poll-mode "queued" A2A receive into
diff --git a/workspace-server/internal/handlers/a2a_proxy_test.go b/workspace-server/internal/handlers/a2a_proxy_test.go
index 3cf954624..d2173d4c3 100644
--- a/workspace-server/internal/handlers/a2a_proxy_test.go
+++ b/workspace-server/internal/handlers/a2a_proxy_test.go
@@ -2235,12 +2235,18 @@ func TestProxyA2A_PushMode_NoShortCircuit(t *testing.T) {
}
}
-// TestProxyA2A_PollMode_FailsClosedToPush verifies the safety contract:
-// a DB error reading delivery_mode must default to push (the existing
-// behavior), NOT poll. Failing to push means a poll-mode workspace
-// briefly attempts a real dispatch — visible failure (502 / SSRF
-// rejection / restart cascade), not a silent drop into activity_logs
-// where the agent might never look. Loud > silent, recoverable > lost.
+// TestProxyA2A_PollMode_FailsClosedToPush verifies the LEGACY safety
+// contract is PRESERVED for non-context DB errors: a generic DB error
+// reading delivery_mode still defaults to push (today's behavior), NOT
+// poll. Failing to push means a poll-mode workspace briefly attempts a
+// real dispatch — visible failure (502 / SSRF rejection / restart
+// cascade), not a silent drop into activity_logs where the agent might
+// never look. Loud > silent, recoverable > lost.
+//
+// internal#497 narrows the fail-closed change to *context* errors only
+// (the actual ce2db75f regression vector); generic DB errors keep this
+// long-standing fail-open-to-push contract. The ctx-error fail-closed is
+// covered by TestLookupDeliveryMode_ContextCanceled_FailsClosed.
func TestProxyA2A_PollMode_FailsClosedToPush(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t) // empty Redis — forces resolveAgentURL DB lookup
@@ -2251,7 +2257,8 @@ func TestProxyA2A_PollMode_FailsClosedToPush(t *testing.T) {
expectBudgetCheck(mock, wsID)
- // lookupDeliveryMode hits a transient DB error → must default push.
+ // lookupDeliveryMode hits a generic (non-context) DB error → must
+ // still default push (legacy contract preserved by internal#497).
mock.ExpectQuery("SELECT delivery_mode FROM workspaces WHERE id").
WithArgs(wsID).
WillReturnError(sql.ErrConnDone)
@@ -2275,7 +2282,7 @@ func TestProxyA2A_PollMode_FailsClosedToPush(t *testing.T) {
var resp map[string]interface{}
_ = json.Unmarshal(w.Body.Bytes(), &resp)
if resp["status"] == "queued" {
- t.Errorf("DB error on delivery_mode lookup silently queued the request — must fail-closed-to-push, got body: %s", w.Body.String())
+ t.Errorf("generic DB error on delivery_mode lookup silently queued the request — must fail-open-to-push, got body: %s", w.Body.String())
}
}
@@ -2284,6 +2291,37 @@ func TestProxyA2A_PollMode_FailsClosedToPush(t *testing.T) {
}
}
+// TestLookupDeliveryMode_ContextCanceled_FailsClosed is the internal#497
+// regression test for the SECONDARY defect. It pins the exact invariant
+// that hid the ce2db75f regression for 5 days: when the delivery_mode read
+// fails because the context was cancelled (precisely what happened in the
+// detached delegation goroutine running on a returned request context),
+// lookupDeliveryMode MUST return an error and MUST NOT silently return
+// "push". Returning push there is what skipped the poll-mode short-circuit
+// and silently dropped 100% of poll-mode peer deliveries.
+//
+// A pre-cancelled context makes QueryRowContext fail with
+// context.Canceled deterministically — no DB rows are mocked because the
+// query never reaches a result.
+func TestLookupDeliveryMode_ContextCanceled_FailsClosed(t *testing.T) {
+ mock := setupTestDB(t)
+ // The query fails on the cancelled ctx before matching; provide a
+ // permissive expectation so sqlmock doesn't complain about the attempt.
+ mock.ExpectQuery("SELECT delivery_mode FROM workspaces WHERE id").
+ WillReturnError(context.Canceled)
+
+ ctx, cancel := context.WithCancel(context.Background())
+ cancel() // simulate the HTTP handler having returned (request ctx dead)
+
+ mode, err := lookupDeliveryMode(ctx, "ws-poll-peer")
+ if err == nil {
+ t.Fatalf("internal#497 regression: lookupDeliveryMode swallowed a context error and returned mode=%q with nil err — this is the exact 5-day silent-misrouting vector", mode)
+ }
+ if mode == models.DeliveryModePush {
+ t.Errorf("internal#497 regression: context error must NOT default to push (got mode=%q)", mode)
+ }
+}
+
// ==================== a2aClient ResponseHeaderTimeout config ====================
func TestA2AClientResponseHeaderTimeout(t *testing.T) {
diff --git a/workspace-server/internal/handlers/agent_card_reconcile.go b/workspace-server/internal/handlers/agent_card_reconcile.go
new file mode 100644
index 000000000..b5fac314f
--- /dev/null
+++ b/workspace-server/internal/handlers/agent_card_reconcile.go
@@ -0,0 +1,113 @@
+package handlers
+
+import "encoding/json"
+
+// agent_card_reconcile.go — server-side repair for the fleet-wide
+// agent-card identity gap.
+//
+// Root cause: the runtime builds its AgentCard from config.name
+// (workspace/main.py:198), and config.name is read from the
+// CP-regenerated /configs/config.yaml whose `name:` field is the raw
+// workspace UUID — NOT the friendly name the operator sees. The friendly
+// name IS captured: POST /workspaces and PATCH /workspaces/:id (the
+// canvas Details tab) write it to the trusted workspaces.name DB column.
+// But /registry/register stores the runtime-supplied card verbatim
+// (registry.go: `agent_card = EXCLUDED.agent_card`), so the stored card
+// served at /.well-known/agent-card.json and returned to peers via
+// agent_card_url ends up with name = UUID, description = "", role = null.
+//
+// Fix shape (deliberately minimal, no contract weakening): when the
+// runtime-supplied card's `name` is empty or equals the workspace UUID
+// (the placeholder the runtime had no better value for), the PLATFORM —
+// not the agent — substitutes the friendly value from the trusted
+// workspaces row. Identity stays platform-controlled: the agent never
+// gains the ability to self-set its own name/role; the platform sources
+// it from the operator-controlled DB column. We only ever FILL gaps
+// (empty / UUID-placeholder); a card that already carries a real
+// friendly name is never downgraded.
+//
+// list_peers / the /registry/:id/peers endpoint already resolve display
+// names from workspaces.name directly (discovery.go / mcp_tools.go
+// `SELECT w.id, w.name, ...`), so peer_name in delivered message tags
+// was already correct — this fix closes the remaining surface: the
+// agent_card blob itself (canvas Agent Card / Skills view, peer
+// agent_card_url fetches, the well-known card).
+//
+// description / role degrade discovery the same way: an empty
+// description and null role give peers nothing to reason about. We
+// default description from the (now reconciled) name when blank and
+// role from workspaces.role when the operator set one.
+
+// reconcileAgentCardIdentity patches identity gaps in a runtime-supplied
+// agent card from the trusted workspace DB row. It returns the
+// (possibly rewritten) card bytes and whether anything changed. On any
+// failure (malformed JSON, nothing to fill) it returns the input bytes
+// unchanged with changed=false so the caller can store them verbatim —
+// this is strictly no-worse-than-before, never a regression.
+//
+// Pure function: no DB / HTTP / globals, so it is exhaustively
+// unit-testable (agent_card_reconcile_test.go) without booting the
+// handler or a sqlmock.
+func reconcileAgentCardIdentity(card json.RawMessage, workspaceID, dbName, dbRole string) (json.RawMessage, bool) {
+ var m map[string]any
+ if err := json.Unmarshal(card, &m); err != nil || m == nil {
+ // Malformed card — not this function's job to reject it (the
+ // upsert stores it as-is and downstream readers handle bad
+ // JSON). Return verbatim so byte-for-byte behaviour is
+ // preserved on the failure path.
+ return card, false
+ }
+
+ changed := false
+
+ // name: fill only when empty or the UUID placeholder. A dbName that
+ // is itself the UUID is a placeholder row (registry.go INSERT seeds
+ // name = id before the canvas sets a friendly one) — not a friendly
+ // name, so it is not an eligible source.
+ cardName, _ := m["name"].(string)
+ if (cardName == "" || cardName == workspaceID) &&
+ dbName != "" && dbName != workspaceID {
+ m["name"] = dbName
+ changed = true
+ }
+
+ // description: when blank, default to the (reconciled) name so peers
+ // and the canvas Agent Card view have a non-empty human label
+ // instead of "". Mirrors the runtime's own
+ // `config.description or config.name` fallback (main.py:199) but
+ // applied to the registry copy where the runtime's fallback was the
+ // UUID.
+ if desc, _ := m["description"].(string); desc == "" {
+ if n, _ := m["name"].(string); n != "" && n != workspaceID {
+ m["description"] = n
+ changed = true
+ }
+ }
+
+ // role: surface the operator-set workspaces.role when the card
+ // carries none. Discovery (peer_role) and the canvas Role row read
+ // workspaces.role directly; this just makes the standalone card
+ // self-describing too. Never overwrite a role the card already has.
+ if dbRole != "" {
+ if r, ok := m["role"].(string); !ok || r == "" {
+ m["role"] = dbRole
+ changed = true
+ }
+ }
+
+ if !changed {
+ // No-op: return the original bytes untouched so callers that
+ // compare/store get byte-identical input (re-marshalling would
+ // reorder keys for no reason).
+ return card, false
+ }
+
+ out, err := json.Marshal(m)
+ if err != nil {
+ // Re-marshal of a map we just unmarshalled should never fail;
+ // if it somehow does, fall back to the verbatim input rather
+ // than storing nothing.
+ return card, false
+ }
+ return out, true
+}
diff --git a/workspace-server/internal/handlers/agent_card_reconcile_test.go b/workspace-server/internal/handlers/agent_card_reconcile_test.go
new file mode 100644
index 000000000..75d5976b6
--- /dev/null
+++ b/workspace-server/internal/handlers/agent_card_reconcile_test.go
@@ -0,0 +1,166 @@
+package handlers
+
+import (
+ "encoding/json"
+ "testing"
+)
+
+// TestReconcileAgentCardIdentity covers the server-side backfill that
+// repairs the fleet-wide agent-card identity gap (internal#XXX): the
+// runtime POSTs /registry/register with agent_card.name = the workspace
+// UUID (because the CP-regenerated /configs/config.yaml sets name: )
+// while the trusted workspaces.name DB column — the value the canvas
+// Details tab shows and lets the operator edit — holds the friendly
+// name ("Claude Code Agent"). The platform reconciles them from the DB
+// row (NOT from the agent — identity stays platform-controlled, not
+// self-mutable).
+func TestReconcileAgentCardIdentity(t *testing.T) {
+ const wsID = "3b81321b-1ec7-488c-96f7-72c42a968da6"
+
+ tests := []struct {
+ name string
+ card string
+ dbName string
+ dbRole string
+ wantName string
+ wantDesc string
+ wantRole string
+ wantChanged bool
+ }{
+ {
+ name: "name is the workspace UUID — backfill from DB",
+ card: `{"name":"3b81321b-1ec7-488c-96f7-72c42a968da6","description":"","capabilities":{"streaming":true}}`,
+ dbName: "Claude Code Agent",
+ dbRole: "",
+ wantName: "Claude Code Agent",
+ wantDesc: "Claude Code Agent",
+ wantRole: "",
+ wantChanged: true,
+ },
+ {
+ name: "empty name — backfill from DB",
+ card: `{"name":"","description":"x"}`,
+ dbName: "ops-agent",
+ dbRole: "sre",
+ wantName: "ops-agent",
+ wantDesc: "x",
+ wantRole: "sre",
+ wantChanged: true,
+ },
+ {
+ name: "role null in card, DB has role — backfill role only",
+ card: `{"name":"Reviewer","description":"Senior reviewer"}`,
+ dbName: "Reviewer",
+ dbRole: "code-reviewer",
+ wantName: "Reviewer",
+ wantDesc: "Senior reviewer",
+ wantRole: "code-reviewer",
+ wantChanged: true,
+ },
+ {
+ name: "card already has a real friendly name — do NOT clobber it",
+ // A richer card (e.g. an external channel agent) must win;
+ // the platform only fills gaps, never downgrades.
+ card: `{"name":"Claude Code (channel)","description":"Local Claude Code session bridged","role":"assistant"}`,
+ dbName: "hongming-pc",
+ dbRole: "operator",
+ wantName: "Claude Code (channel)",
+ wantDesc: "Local Claude Code session bridged",
+ wantRole: "assistant",
+ wantChanged: false,
+ },
+ {
+ name: "no DB name available — leave UUID name untouched (no worse than before)",
+ card: `{"name":"3b81321b-1ec7-488c-96f7-72c42a968da6","description":""}`,
+ dbName: "",
+ dbRole: "",
+ wantName: "3b81321b-1ec7-488c-96f7-72c42a968da6",
+ wantDesc: "",
+ wantRole: "",
+ wantChanged: false,
+ },
+ {
+ name: "dbName equals UUID (placeholder row) — not a friendly name, leave untouched",
+ card: `{"name":"3b81321b-1ec7-488c-96f7-72c42a968da6"}`,
+ dbName: "3b81321b-1ec7-488c-96f7-72c42a968da6",
+ dbRole: "",
+ wantName: "3b81321b-1ec7-488c-96f7-72c42a968da6",
+ wantDesc: "",
+ wantRole: "",
+ wantChanged: false,
+ },
+ {
+ name: "malformed card JSON — return unchanged, no panic",
+ card: `{not json`,
+ dbName: "Claude Code Agent",
+ dbRole: "",
+ wantChanged: false,
+ },
+ }
+
+ for _, tc := range tests {
+ t.Run(tc.name, func(t *testing.T) {
+ out, changed := reconcileAgentCardIdentity(
+ json.RawMessage(tc.card), wsID, tc.dbName, tc.dbRole,
+ )
+ if changed != tc.wantChanged {
+ t.Fatalf("changed = %v, want %v", changed, tc.wantChanged)
+ }
+ if !tc.wantChanged {
+ // Unchanged path must return the input bytes verbatim.
+ if string(out) != tc.card {
+ t.Fatalf("unchanged path mutated bytes:\n got %s\n want %s", out, tc.card)
+ }
+ return
+ }
+ var got map[string]any
+ if err := json.Unmarshal(out, &got); err != nil {
+ t.Fatalf("output not valid JSON: %v (%s)", err, out)
+ }
+ if g, _ := got["name"].(string); g != tc.wantName {
+ t.Errorf("name = %q, want %q", g, tc.wantName)
+ }
+ if g, _ := got["description"].(string); g != tc.wantDesc {
+ t.Errorf("description = %q, want %q", g, tc.wantDesc)
+ }
+ if tc.wantRole != "" {
+ if g, _ := got["role"].(string); g != tc.wantRole {
+ t.Errorf("role = %q, want %q", g, tc.wantRole)
+ }
+ }
+ })
+ }
+}
+
+// TestReconcileAgentCardIdentity_PreservesOtherFields ensures the
+// reconcile is a minimal in-place patch — capabilities, version,
+// skills and any unknown future fields survive untouched.
+func TestReconcileAgentCardIdentity_PreservesOtherFields(t *testing.T) {
+ card := `{"name":"ws-uuid","description":"","version":"1.0.0",` +
+ `"capabilities":{"streaming":true,"pushNotifications":true},` +
+ `"skills":[{"id":"a","name":"a"}],"configuration_status":"ready"}`
+ out, changed := reconcileAgentCardIdentity(
+ json.RawMessage(card), "ws-uuid", "Friendly Name", "",
+ )
+ if !changed {
+ t.Fatal("expected changed = true")
+ }
+ var got map[string]any
+ if err := json.Unmarshal(out, &got); err != nil {
+ t.Fatalf("invalid JSON: %v", err)
+ }
+ if got["version"] != "1.0.0" {
+ t.Errorf("version not preserved: %v", got["version"])
+ }
+ if got["configuration_status"] != "ready" {
+ t.Errorf("configuration_status not preserved: %v", got["configuration_status"])
+ }
+ caps, ok := got["capabilities"].(map[string]any)
+ if !ok || caps["streaming"] != true {
+ t.Errorf("capabilities not preserved: %v", got["capabilities"])
+ }
+ skills, ok := got["skills"].([]any)
+ if !ok || len(skills) != 1 {
+ t.Errorf("skills not preserved: %v", got["skills"])
+ }
+}
diff --git a/workspace-server/internal/handlers/delegation.go b/workspace-server/internal/handlers/delegation.go
index beaa88cf5..3d1b6e5db 100644
--- a/workspace-server/internal/handlers/delegation.go
+++ b/workspace-server/internal/handlers/delegation.go
@@ -163,8 +163,32 @@ func (h *DelegationHandler) Delegate(c *gin.Context) {
},
})
- // Fire-and-forget: send A2A in background goroutine
- go h.executeDelegation(ctx, sourceID, body.TargetID, delegationID, a2aBody)
+ // Fire-and-forget: send A2A in a background goroutine.
+ //
+ // internal#497 — the goroutine MUST NOT inherit the HTTP request's
+ // cancellation. `ctx` here is c.Request.Context(); the handler returns
+ // 202 a few lines below, which cancels that context immediately. Before
+ // this fix (regression ce2db75f) executeDelegation ran on the
+ // request-scoped ctx, so every DB op + proxy call in the detached
+ // goroutine failed `context canceled` the instant the 202 was written.
+ // That silently broke 100% of A2A peer delegations fleet-wide since
+ // 2026-05-12 (poll-mode peers never got their a2a_receive inbox row;
+ // lookupDeliveryMode swallowed the ctx error and defaulted to push).
+ //
+ // context.WithoutCancel detaches cancellation/deadline while PRESERVING
+ // all context values (trace/correlation/tenant ids that proxyA2ARequest
+ // and the broadcaster read off ctx) — this is the established pattern in
+ // this package (a2a_proxy.go:850, a2a_proxy_helpers.go:525,
+ // registry.go:822). The 30-minute ceiling matches the prior internal
+ // budget executeDelegation used before ce2db75f and the proxy's own
+ // absolute agent-dispatch ceiling (a2a_proxy.go forwardCtx).
+ delegationCtx, cancelDelegation := context.WithTimeout(
+ context.WithoutCancel(ctx), 30*time.Minute,
+ )
+ go func() {
+ defer cancelDelegation()
+ h.executeDelegation(delegationCtx, sourceID, body.TargetID, delegationID, a2aBody)
+ }()
// Broadcast event so canvas shows delegation in real-time
h.broadcaster.RecordAndBroadcast(ctx, string(events.EventDelegationSent), sourceID, map[string]interface{}{
diff --git a/workspace-server/internal/handlers/delegation_test.go b/workspace-server/internal/handlers/delegation_test.go
index fcd17eec8..223b8de77 100644
--- a/workspace-server/internal/handlers/delegation_test.go
+++ b/workspace-server/internal/handlers/delegation_test.go
@@ -16,6 +16,65 @@ import (
"github.com/gin-gonic/gin"
)
+// ---------- internal#497 regression: detached goroutine ctx must outlive the handler ----------
+
+// TestDelegate_DetachedContext_SurvivesRequestCancellation pins the
+// load-bearing invariant that regression ce2db75f violated: the context
+// handed to executeDelegation in the fire-and-forget goroutine must NOT be
+// cancelled when the HTTP handler returns 202 (which cancels
+// c.Request.Context()). Before the fix, executeDelegation ran on the
+// request-scoped ctx, so every DB op + proxy call failed `context
+// canceled` the instant the 202 was written — silently breaking 100% of
+// A2A peer delegations fleet-wide since 2026-05-12.
+//
+// This test asserts the exact ctx-derivation contract used by Delegate
+// (context.WithoutCancel(parent) + a timeout budget): the derived context
+// (a) stays alive after the parent is cancelled, and (b) still carries
+// parent values (trace/correlation/tenant ids the downstream proxy +
+// broadcaster read off ctx). It is intentionally DB-free and fast.
+func TestDelegate_DetachedContext_SurvivesRequestCancellation(t *testing.T) {
+ type ctxKey string
+ const traceKey ctxKey = "trace-id"
+
+ // Simulate c.Request.Context() carrying a correlation value.
+ parent, cancelParent := context.WithCancel(
+ context.WithValue(context.Background(), traceKey, "trace-abc-123"),
+ )
+
+ // Exact derivation Delegate uses for the detached goroutine.
+ delegationCtx, cancelDelegation := context.WithTimeout(
+ context.WithoutCancel(parent), 30*time.Minute,
+ )
+ defer cancelDelegation()
+
+ // The HTTP handler "returns 202" → request context is cancelled.
+ cancelParent()
+
+ if err := parent.Err(); err == nil {
+ t.Fatal("precondition: parent context should be cancelled after the handler returns")
+ }
+
+ // (a) Cancellation MUST NOT propagate to the detached context.
+ select {
+ case <-delegationCtx.Done():
+ t.Fatalf("regression: detached delegation ctx was cancelled by the handler returning (err=%v) — executeDelegation would fail every DB op with `context canceled`", delegationCtx.Err())
+ default:
+ // alive — correct
+ }
+
+ // (b) Parent values MUST still be readable (WithoutCancel preserves
+ // values; trace/correlation/tenant ids the proxy + broadcaster use).
+ if got, _ := delegationCtx.Value(traceKey).(string); got != "trace-abc-123" {
+ t.Errorf("detached ctx lost the parent trace value: got %q, want %q", got, "trace-abc-123")
+ }
+
+ // And it still has a real deadline (the 30m budget), so it is not an
+ // unbounded background context.
+ if _, hasDeadline := delegationCtx.Deadline(); !hasDeadline {
+ t.Error("detached ctx must carry the 30-minute timeout budget, but has no deadline")
+ }
+}
+
// ---------- Delegate: missing target_id → 400 ----------
func TestDelegate_MissingTargetID(t *testing.T) {
diff --git a/workspace-server/internal/handlers/handlers_test.go b/workspace-server/internal/handlers/handlers_test.go
index 33a039a1c..35323ec29 100644
--- a/workspace-server/internal/handlers/handlers_test.go
+++ b/workspace-server/internal/handlers/handlers_test.go
@@ -8,6 +8,7 @@ import (
"fmt"
"net/http"
"net/http/httptest"
+ "sync"
"testing"
"time"
@@ -22,8 +23,39 @@ import (
"github.com/redis/go-redis/v9"
)
+// liveTestHandlers tracks every WorkspaceHandler built during the test
+// binary's lifetime so setupTestDB can drain their in-flight goAsync
+// goroutines (notably the detached RestartByID restart cycle, which
+// reads the global db.DB) BEFORE restoring db.DB. Without this drain a
+// fire-and-forget restart goroutine spawned by one test outlives that
+// test and races the db.DB swap in a later test's t.Cleanup — the
+// 0x...d548 data race on platform/internal/db.DB.
+var (
+ liveTestHandlersMu sync.Mutex
+ liveTestHandlers []*WorkspaceHandler
+)
+
func init() {
gin.SetMode(gin.TestMode)
+ newHandlerHook = func(h *WorkspaceHandler) {
+ liveTestHandlersMu.Lock()
+ liveTestHandlers = append(liveTestHandlers, h)
+ liveTestHandlersMu.Unlock()
+ }
+}
+
+// drainTestAsync waits for every tracked handler's goAsync goroutines to
+// finish. Called from setupTestDB's cleanup before db.DB is restored so
+// no detached restart/provision goroutine is mid-read of db.DB when the
+// pointer is swapped.
+func drainTestAsync() {
+ liveTestHandlersMu.Lock()
+ handlers := make([]*WorkspaceHandler, len(liveTestHandlers))
+ copy(handlers, liveTestHandlers)
+ liveTestHandlersMu.Unlock()
+ for _, h := range handlers {
+ h.waitAsyncForTest()
+ }
}
// setupTestDB creates a sqlmock DB and assigns it to the global db.DB.
@@ -42,7 +74,16 @@ func setupTestDB(t *testing.T) sqlmock.Sqlmock {
}
prevDB := db.DB
db.DB = mockDB
- t.Cleanup(func() { db.DB = prevDB; mockDB.Close() })
+ t.Cleanup(func() {
+ // Drain detached async goroutines (e.g. goAsync(RestartByID),
+ // which reads db.DB in runRestartCycle before its provisioner
+ // gate) BEFORE swapping db.DB back. Doing the restore first
+ // would let an in-flight restart goroutine read db.DB while
+ // this line writes it — the data race this guards against.
+ drainTestAsync()
+ db.DB = prevDB
+ mockDB.Close()
+ })
// Disable SSRF checks for the duration of this test only. Restore
// the previous state via t.Cleanup so that TestIsSafeURL_* tests
diff --git a/workspace-server/internal/handlers/registry.go b/workspace-server/internal/handlers/registry.go
index 65a853058..48b9c9285 100644
--- a/workspace-server/internal/handlers/registry.go
+++ b/workspace-server/internal/handlers/registry.go
@@ -327,7 +327,33 @@ func (h *RegistryHandler) Register(c *gin.Context) {
}
}
- agentCardStr := string(payload.AgentCard)
+ // Reconcile the runtime-supplied card's identity fields against the
+ // trusted workspaces row before storing. The runtime builds its card
+ // from config.name, which the CP-regenerated /configs/config.yaml
+ // sets to the workspace UUID — so without this the stored card
+ // served at /.well-known/agent-card.json and returned to peers via
+ // agent_card_url has name = UUID, description = "", role = null even
+ // though the operator-controlled workspaces.name holds the friendly
+ // name the canvas shows. We only FILL gaps from the DB (never
+ // downgrade a card that already carries a real name); identity stays
+ // platform-controlled — the agent cannot self-set these. Best-effort:
+ // a lookup failure leaves the card exactly as the runtime sent it
+ // (no-worse-than-before). See agent_card_reconcile.go.
+ reconciledCard := payload.AgentCard
+ {
+ var dbName, dbRole sql.NullString
+ if qErr := db.DB.QueryRowContext(ctx,
+ `SELECT name, role FROM workspaces WHERE id = $1`, payload.ID,
+ ).Scan(&dbName, &dbRole); qErr == nil {
+ if rc, did := reconcileAgentCardIdentity(
+ payload.AgentCard, payload.ID, dbName.String, dbRole.String,
+ ); did {
+ reconciledCard = rc
+ log.Printf("Registry register: reconciled agent_card identity for %s from workspaces row", payload.ID)
+ }
+ }
+ }
+ agentCardStr := string(reconciledCard)
// urlForUpsert: poll-mode workspaces don't need a URL. Empty input
// becomes NULL via sql.NullString so the row's URL stays clean (the
@@ -413,10 +439,12 @@ func (h *RegistryHandler) Register(c *gin.Context) {
}
}
- // Broadcast WORKSPACE_ONLINE
+ // Broadcast WORKSPACE_ONLINE — use the reconciled card so the canvas
+ // Agent Card view live-updates with the friendly name, matching what
+ // was just persisted (not the runtime's raw UUID-name card).
if err := h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceOnline), payload.ID, map[string]interface{}{
"url": cachedURL,
- "agent_card": payload.AgentCard,
+ "agent_card": reconciledCard,
"delivery_mode": effectiveMode,
}); err != nil {
log.Printf("Registry broadcast error: %v", err)
diff --git a/workspace-server/internal/handlers/restart_signals.go b/workspace-server/internal/handlers/restart_signals.go
index 7c4c900ac..88ff480a3 100644
--- a/workspace-server/internal/handlers/restart_signals.go
+++ b/workspace-server/internal/handlers/restart_signals.go
@@ -56,8 +56,10 @@ const (
// (an externally routable address) is used directly.
func (h *WorkspaceHandler) gracefulPreRestart(ctx context.Context, workspaceID string) {
// Non-blocking send — don't stall the restart cycle.
- // Run in a detached goroutine so the caller (runRestartCycle) can
- // proceed to stopForRestart without waiting.
+ // Run in a tracked async goroutine (goAsync, not bare `go`) so the
+ // caller (runRestartCycle) can proceed to stopForRestart without
+ // waiting, while the test harness can still drain it before swapping
+ // the global db.DB (resolveAgentURLForRestartSignal reads db.DB).
h.goAsync(func() {
signalCtx, cancel := context.WithTimeout(context.Background(), restartSignalTimeout)
defer cancel()
diff --git a/workspace-server/internal/handlers/template_files_agent_home_stub_test.go b/workspace-server/internal/handlers/template_files_agent_home_stub_test.go
new file mode 100644
index 000000000..2609cc78c
--- /dev/null
+++ b/workspace-server/internal/handlers/template_files_agent_home_stub_test.go
@@ -0,0 +1,117 @@
+package handlers
+
+// template_files_agent_home_stub_test.go — pins the Phase-1 stub
+// contract for the /agent-home root added by internal#425 RFC.
+//
+// Today (pre-Phase-2b), every Files API verb against `?root=/agent-home`
+// must return HTTP 501 with the canonical pending-message body. The
+// stub MUST NOT:
+// 1. Hit the DB (the workspace might not even exist yet from the
+// canvas's POV — the root selector is testable without one).
+// 2. Touch the EIC tunnel / Docker / template-dir paths — those
+// would 500/404/[] depending on the env and confuse the canvas.
+// 3. Accept writes/deletes that the future docker-exec backend
+// would reject — fail closed.
+//
+// When Phase 2b lands, this file gets replaced by a real
+// docker-exec dispatch test; the stub-message constant in
+// templates.go disappears.
+
+import (
+ "net/http"
+ "net/http/httptest"
+ "strings"
+ "testing"
+
+ "github.com/gin-gonic/gin"
+)
+
+// TestAgentHomeAllowedRoot pins that /agent-home is in the allowedRoots
+// set. Without this, a future refactor that drops the key would
+// silently degrade the canvas root selector to a 400 instead of the
+// stub 501.
+func TestAgentHomeAllowedRoot(t *testing.T) {
+ if !allowedRoots["/agent-home"] {
+ t.Fatal("/agent-home must be in allowedRoots — RFC #425 contract")
+ }
+}
+
+// TestAgentHomeStub_AllVerbs_Return501 pins the canonical stub
+// response across all four verbs. Each must:
+//
+// - status 501
+// - body contains the canonical "/agent-home not implemented" prefix
+// - NOT contain "workspace not found" (proves we short-circuit before
+// the DB lookup)
+//
+// Driven as a table to keep symmetry — adding a fifth verb in the
+// future means adding one row here.
+func TestAgentHomeStub_AllVerbs_Return501(t *testing.T) {
+ cases := []struct {
+ name string
+ method string
+ invoke func(c *gin.Context)
+ }{
+ {
+ name: "ListFiles",
+ method: "GET",
+ invoke: func(c *gin.Context) { (&TemplatesHandler{}).ListFiles(c) },
+ },
+ {
+ name: "ReadFile",
+ method: "GET",
+ invoke: func(c *gin.Context) { (&TemplatesHandler{}).ReadFile(c) },
+ },
+ {
+ name: "WriteFile",
+ method: "PUT",
+ invoke: func(c *gin.Context) { (&TemplatesHandler{}).WriteFile(c) },
+ },
+ {
+ name: "DeleteFile",
+ method: "DELETE",
+ invoke: func(c *gin.Context) { (&TemplatesHandler{}).DeleteFile(c) },
+ },
+ }
+
+ for _, tc := range cases {
+ t.Run(tc.name, func(t *testing.T) {
+ w := httptest.NewRecorder()
+ c, _ := gin.CreateTestContext(w)
+ c.Params = gin.Params{
+ {Key: "id", Value: "ws-stub"},
+ // Path param without leading slash so DeleteFile's
+ // filepath.IsAbs guard doesn't 400 before the root
+ // dispatch runs. The List/Read/Write paths strip the
+ // leading slash themselves and accept either form.
+ {Key: "path", Value: "notes.md"},
+ }
+ // WriteFile binds JSON; provide a minimal valid body so the
+ // short-circuit isn't masked by the bind-error path.
+ var body string
+ if tc.method == "PUT" {
+ body = `{"content":"x"}`
+ }
+ c.Request = httptest.NewRequest(
+ tc.method,
+ "/workspaces/ws-stub/files/notes.md?root=/agent-home",
+ strings.NewReader(body),
+ )
+ if body != "" {
+ c.Request.Header.Set("Content-Type", "application/json")
+ }
+
+ tc.invoke(c)
+
+ if w.Code != http.StatusNotImplemented {
+ t.Fatalf("expected 501, got %d: %s", w.Code, w.Body.String())
+ }
+ if !strings.Contains(w.Body.String(), "/agent-home not implemented") {
+ t.Errorf("body should contain canonical stub message; got %s", w.Body.String())
+ }
+ if strings.Contains(w.Body.String(), "workspace not found") {
+ t.Errorf("stub leaked through to DB lookup; body=%s", w.Body.String())
+ }
+ })
+ }
+}
diff --git a/workspace-server/internal/handlers/template_files_eic.go b/workspace-server/internal/handlers/template_files_eic.go
index 87a90b104..8ef8028d1 100644
--- a/workspace-server/internal/handlers/template_files_eic.go
+++ b/workspace-server/internal/handlers/template_files_eic.go
@@ -19,6 +19,7 @@ package handlers
import (
"bytes"
"context"
+ "errors"
"fmt"
"log"
"os"
@@ -357,6 +358,28 @@ func writeFileViaEIC(ctx context.Context, instanceID, runtime, root, relPath str
var stderr bytes.Buffer
sshCmd.Stderr = &stderr
if err := sshCmd.Run(); err != nil {
+ // When the per-op context deadline (eicFileOpTimeout) fires,
+ // exec.CommandContext SIGKILLs the ssh subprocess and Run()
+ // returns the bare "signal: killed" with empty stderr. That
+ // surfaced to the canvas as an opaque
+ // `500 {"error":"ssh install: signal: killed ()"}` which gave
+ // the operator no idea the workspace was simply mid-provision
+ // with a slow/unready EIC tunnel (internal#423). Detect the
+ // deadline explicitly and return an actionable message instead
+ // — the EIC mechanism, timeout value, and success path are all
+ // unchanged; this only improves the error a stuck write emits.
+ if cerr := ctx.Err(); cerr != nil {
+ reason := "timed out after " + eicFileOpTimeout.String()
+ if errors.Is(cerr, context.Canceled) && !errors.Is(cerr, context.DeadlineExceeded) {
+ reason = "was cancelled"
+ }
+ return fmt.Errorf(
+ "ssh install: EIC tunnel to workspace %s — "+
+ "the workspace may still be provisioning (slow/unready SSH); "+
+ "retry once it is online, or apply provider credentials via "+
+ "Settings → Secrets (encrypted, does not use this file-write path)",
+ reason)
+ }
return fmt.Errorf("ssh install: %w (%s)", err, strings.TrimSpace(stderr.String()))
}
log.Printf("writeFileViaEIC: ws instance=%s runtime=%s root=%s wrote %d bytes → %s",
diff --git a/workspace-server/internal/handlers/template_files_eic_write_timeout_test.go b/workspace-server/internal/handlers/template_files_eic_write_timeout_test.go
new file mode 100644
index 000000000..edef99479
--- /dev/null
+++ b/workspace-server/internal/handlers/template_files_eic_write_timeout_test.go
@@ -0,0 +1,71 @@
+package handlers
+
+// template_files_eic_write_timeout_test.go — pins the actionable-error
+// behavior added for internal#423.
+//
+// When the per-op context deadline (eicFileOpTimeout) fires,
+// exec.CommandContext SIGKILLs the ssh subprocess and Run() returns the
+// bare "signal: killed" with empty stderr. Before the fix that surfaced
+// to the canvas as an opaque `500 {"error":"ssh install: signal:
+// killed ()"}` — useless to an operator whose workspace was simply
+// mid-provision with a slow/unready EIC tunnel. The fix detects the
+// deadline explicitly (errors.Is(ctx.Err(), context.DeadlineExceeded))
+// and returns a message that names the cause and the
+// Settings → Secrets workaround.
+
+import (
+ "context"
+ "strings"
+ "testing"
+ "time"
+)
+
+// TestWriteFileViaEIC_DeadlineExceeded_ActionableError stubs
+// withEICTunnel so the *real* inner closure runs against a context that
+// has already exceeded its deadline. The ssh subprocess fails (no real
+// sshd on the fake port) and ctx.Err() == DeadlineExceeded, so the new
+// branch must fire and produce an actionable message — NOT the opaque
+// "signal: killed ()" string the canvas used to show.
+func TestWriteFileViaEIC_DeadlineExceeded_ActionableError(t *testing.T) {
+ prev := withEICTunnel
+ withEICTunnel = func(_ context.Context, instanceID string, fn func(s eicSSHSession) error) error {
+ // Run the real inner closure. It closes over the ctx that
+ // writeFileViaEIC derived from our already-cancelled parent, so
+ // the ssh subprocess is killed immediately and ctx.Err()
+ // resolves — exactly the eicFileOpTimeout-expiry shape.
+ return fn(eicSSHSession{
+ instanceID: instanceID,
+ osUser: "ubuntu",
+ localPort: 1, // nothing listening → ssh fails fast
+ keyPath: "/nonexistent/key",
+ })
+ }
+ t.Cleanup(func() { withEICTunnel = prev })
+
+ // Drive the real writeFileViaEIC. Pass a parent whose deadline has
+ // already passed: the context.WithTimeout(ctx, eicFileOpTimeout)
+ // derived inside writeFileViaEIC inherits the expired parent
+ // deadline, so ctx.Err() == context.DeadlineExceeded by the time
+ // the killed ssh subprocess returns — the exact production shape
+ // (eicFileOpTimeout expiry), exercised deterministically.
+ parent, cancel := context.WithDeadline(context.Background(), time.Now().Add(-time.Second))
+ defer cancel()
+
+ err := writeFileViaEIC(parent, "i-test", "claude-code", "/configs", "config.yaml", []byte("model: sonnet\n"))
+ if err == nil {
+ t.Fatalf("expected an error from a killed ssh subprocess, got nil")
+ }
+ msg := err.Error()
+
+ // Must NOT leak the opaque bare-signal string to the operator.
+ if strings.Contains(msg, "signal: killed ()") {
+ t.Fatalf("error still surfaces the opaque %q form: %q", "signal: killed ()", msg)
+ }
+ // Must name the cause and the Secrets workaround so the canvas
+ // shows something actionable.
+ for _, want := range []string{"timed out", "provisioning", "Settings", "Secrets"} {
+ if !strings.Contains(msg, want) {
+ t.Errorf("actionable error missing %q; got: %q", want, msg)
+ }
+ }
+}
diff --git a/workspace-server/internal/handlers/templates.go b/workspace-server/internal/handlers/templates.go
index 3f41dbb4d..0258419f7 100644
--- a/workspace-server/internal/handlers/templates.go
+++ b/workspace-server/internal/handlers/templates.go
@@ -18,11 +18,35 @@ import (
)
// allowedRoots are the container paths that the Files API can browse.
+//
+// `/agent-home` (added 2026-05-15, internal#425 RFC) is the container's
+// own $HOME — `/root` for openclaw, `/home/agent` for claude-code/hermes
+// — browsed via `docker exec` rather than host-side `find`. The
+// dispatch is stubbed today (returns 501); full implementation lands in
+// Phase 2b of the RFC. The allowedRoots key is added now so the canvas
+// can design its root-selector UI against the final shape and the
+// stub-vs-full transition is server-side only.
var allowedRoots = map[string]bool{
- "/configs": true,
- "/workspace": true,
- "/home": true,
- "/plugins": true,
+ "/configs": true,
+ "/workspace": true,
+ "/home": true,
+ "/plugins": true,
+ "/agent-home": true,
+}
+
+// agentHomeStubMessage is the body returned by every Files API verb
+// when `?root=/agent-home` is requested before Phase 2b lands. Keep the
+// status code 501 (Not Implemented) — the route exists, the verb is
+// understood, but the handler is unimplemented. Distinguishes from
+// 400/404 so a canvas behind a less-current server can render a clean
+// "feature pending" state instead of a generic error.
+const agentHomeStubMessage = "/agent-home not implemented yet (internal#425 RFC Phase 2b — docker-exec backend pending)"
+
+// isAgentHomeStubRequest returns true when the request targets the
+// stubbed /agent-home root. Centralised so every verb in this file
+// short-circuits with the same response shape.
+func isAgentHomeStubRequest(rootPath string) bool {
+ return rootPath == "/agent-home"
}
// maxUploadFiles limits the number of files in a single import/replace.
@@ -224,7 +248,14 @@ func (h *TemplatesHandler) ListFiles(c *gin.Context) {
// ?depth= — max depth to recurse (default: 1, max: 5)
rootPath := c.DefaultQuery("root", "/configs")
if !allowedRoots[rootPath] {
- c.JSON(http.StatusBadRequest, gin.H{"error": "root must be one of: /configs, /workspace, /home, /plugins"})
+ c.JSON(http.StatusBadRequest, gin.H{"error": "root must be one of: /configs, /workspace, /home, /plugins, /agent-home"})
+ return
+ }
+ // /agent-home dispatch is stubbed pre-Phase-2b. Short-circuit before
+ // the DB lookup + EIC dance so a canvas exercising the new root key
+ // gets a clean 501 instead of a half-effort response.
+ if isAgentHomeStubRequest(rootPath) {
+ c.JSON(http.StatusNotImplemented, gin.H{"error": agentHomeStubMessage})
return
}
subPath := c.DefaultQuery("path", "")
@@ -393,7 +424,11 @@ func (h *TemplatesHandler) ReadFile(c *gin.Context) {
ctx := c.Request.Context()
rootPath := c.DefaultQuery("root", "/configs")
if !allowedRoots[rootPath] {
- c.JSON(http.StatusBadRequest, gin.H{"error": "root must be one of: /configs, /workspace, /home, /plugins"})
+ c.JSON(http.StatusBadRequest, gin.H{"error": "root must be one of: /configs, /workspace, /home, /plugins, /agent-home"})
+ return
+ }
+ if isAgentHomeStubRequest(rootPath) {
+ c.JSON(http.StatusNotImplemented, gin.H{"error": agentHomeStubMessage})
return
}
@@ -506,7 +541,11 @@ func (h *TemplatesHandler) WriteFile(c *gin.Context) {
ctx := c.Request.Context()
rootPath := c.DefaultQuery("root", "/configs")
if !allowedRoots[rootPath] {
- c.JSON(http.StatusBadRequest, gin.H{"error": "root must be one of: /configs, /workspace, /home, /plugins"})
+ c.JSON(http.StatusBadRequest, gin.H{"error": "root must be one of: /configs, /workspace, /home, /plugins, /agent-home"})
+ return
+ }
+ if isAgentHomeStubRequest(rootPath) {
+ c.JSON(http.StatusNotImplemented, gin.H{"error": agentHomeStubMessage})
return
}
var wsName, instanceID, runtime string
@@ -583,7 +622,11 @@ func (h *TemplatesHandler) DeleteFile(c *gin.Context) {
ctx := c.Request.Context()
rootPath := c.DefaultQuery("root", "/configs")
if !allowedRoots[rootPath] {
- c.JSON(http.StatusBadRequest, gin.H{"error": "root must be one of: /configs, /workspace, /home, /plugins"})
+ c.JSON(http.StatusBadRequest, gin.H{"error": "root must be one of: /configs, /workspace, /home, /plugins, /agent-home"})
+ return
+ }
+ if isAgentHomeStubRequest(rootPath) {
+ c.JSON(http.StatusNotImplemented, gin.H{"error": agentHomeStubMessage})
return
}
var wsName, instanceID, runtime string
diff --git a/workspace-server/internal/handlers/tokens.go b/workspace-server/internal/handlers/tokens.go
index c41f8c518..4ce7be815 100644
--- a/workspace-server/internal/handlers/tokens.go
+++ b/workspace-server/internal/handlers/tokens.go
@@ -10,8 +10,20 @@ import (
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/wsauth"
"github.com/gin-gonic/gin"
+ "github.com/google/uuid"
)
+// validWorkspaceID returns true when id is a syntactically valid UUID.
+// workspace_id is a `uuid` column; passing a non-UUID (e.g. the canvas
+// "global" sentinel sent when no node is selected) makes Postgres raise
+// `invalid input syntax for type uuid`, which previously leaked as an
+// opaque 500. Reject up front with a clean 400 instead. Mirrors the
+// uuid.Parse guard already used in handlers/activity.go.
+func validWorkspaceID(id string) bool {
+ _, err := uuid.Parse(id)
+ return err == nil
+}
+
// TokenHandler exposes user-facing token management for workspaces.
// Routes: GET/POST/DELETE /workspaces/:id/tokens (behind WorkspaceAuth).
type TokenHandler struct{}
@@ -31,6 +43,10 @@ type tokenListItem struct {
// never the plaintext or hash).
func (h *TokenHandler) List(c *gin.Context) {
workspaceID := c.Param("id")
+ if !validWorkspaceID(workspaceID) {
+ c.JSON(http.StatusBadRequest, gin.H{"error": "invalid workspace id"})
+ return
+ }
limit := 50
if v := c.Query("limit"); v != "" {
@@ -53,6 +69,7 @@ func (h *TokenHandler) List(c *gin.Context) {
LIMIT $2 OFFSET $3
`, workspaceID, limit, offset)
if err != nil {
+ log.Printf("tokens: list query failed for workspace %s: %v", workspaceID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to list tokens"})
return
}
@@ -85,6 +102,10 @@ const maxTokensPerWorkspace = 50
// exactly once in the response — it cannot be recovered afterwards.
func (h *TokenHandler) Create(c *gin.Context) {
workspaceID := c.Param("id")
+ if !validWorkspaceID(workspaceID) {
+ c.JSON(http.StatusBadRequest, gin.H{"error": "invalid workspace id"})
+ return
+ }
// Rate limit: max active tokens per workspace
var count int
@@ -117,6 +138,10 @@ func (h *TokenHandler) Create(c *gin.Context) {
func (h *TokenHandler) Revoke(c *gin.Context) {
workspaceID := c.Param("id")
tokenID := c.Param("tokenId")
+ if !validWorkspaceID(workspaceID) {
+ c.JSON(http.StatusBadRequest, gin.H{"error": "invalid workspace id"})
+ return
+ }
result, err := db.DB.ExecContext(c.Request.Context(), `
UPDATE workspace_auth_tokens
diff --git a/workspace-server/internal/handlers/tokens_sqlmock_test.go b/workspace-server/internal/handlers/tokens_sqlmock_test.go
index b0166293e..b3a2d9a52 100644
--- a/workspace-server/internal/handlers/tokens_sqlmock_test.go
+++ b/workspace-server/internal/handlers/tokens_sqlmock_test.go
@@ -41,6 +41,15 @@ import (
func init() { gin.SetMode(gin.TestMode) }
+// Workspace IDs are validated as UUIDs up front (tokens.go validWorkspaceID),
+// so handler tests must pass syntactically valid UUIDs. Fixed values keep
+// sqlmock WithArgs assertions deterministic.
+const (
+ wsUUID1 = "11111111-1111-1111-1111-111111111111"
+ wsUUID2 = "22222222-2222-2222-2222-222222222222"
+ wsUUID3 = "33333333-3333-3333-3333-333333333333"
+)
+
// withMockDB swaps `db.DB` for a sqlmock and returns the mock plus a
// restore func. Tests use this in place of setupTokenTestDB which
// skips on a missing real DB.
@@ -81,13 +90,13 @@ func TestTokenHandler_List_HappyPath(t *testing.T) {
created := time.Date(2026, 4, 1, 12, 0, 0, 0, time.UTC)
last := created.Add(time.Hour)
mock.ExpectQuery(`SELECT id, prefix, created_at, last_used_at\s+FROM workspace_auth_tokens`).
- WithArgs("ws-1", 50, 0).
+ WithArgs(wsUUID1, 50, 0).
WillReturnRows(sqlmock.NewRows([]string{"id", "prefix", "created_at", "last_used_at"}).
AddRow("tok-1", "abc12345", created, last).
AddRow("tok-2", "def67890", created, nil))
w := makeReq(t, NewTokenHandler().List, "GET",
- "/workspaces/ws-1/tokens", gin.Params{{Key: "id", Value: "ws-1"}})
+ "/workspaces/ws-1/tokens", gin.Params{{Key: "id", Value: wsUUID1}})
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
@@ -121,7 +130,7 @@ func TestTokenHandler_List_EmptyResult(t *testing.T) {
WillReturnRows(sqlmock.NewRows([]string{"id", "prefix", "created_at", "last_used_at"}))
w := makeReq(t, NewTokenHandler().List, "GET",
- "/workspaces/ws-2/tokens", gin.Params{{Key: "id", Value: "ws-2"}})
+ "/workspaces/ws-2/tokens", gin.Params{{Key: "id", Value: wsUUID2}})
if w.Code != http.StatusOK {
t.Fatalf("expected 200 on empty list, got %d", w.Code)
@@ -146,7 +155,7 @@ func TestTokenHandler_List_QueryError(t *testing.T) {
WillReturnError(errors.New("connection refused"))
w := makeReq(t, NewTokenHandler().List, "GET",
- "/workspaces/ws-3/tokens", gin.Params{{Key: "id", Value: "ws-3"}})
+ "/workspaces/ws-3/tokens", gin.Params{{Key: "id", Value: wsUUID3}})
if w.Code != http.StatusInternalServerError {
t.Errorf("query error must surface as 500, got %d", w.Code)
@@ -158,13 +167,13 @@ func TestTokenHandler_List_RespectsLimit(t *testing.T) {
defer cleanup()
mock.ExpectQuery(`SELECT id, prefix, created_at, last_used_at`).
- WithArgs("ws-1", 10, 5).
+ WithArgs(wsUUID1, 10, 5).
WillReturnRows(sqlmock.NewRows([]string{"id", "prefix", "created_at", "last_used_at"}))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest("GET", "/workspaces/ws-1/tokens?limit=10&offset=5", nil)
- c.Params = gin.Params{{Key: "id", Value: "ws-1"}}
+ c.Params = gin.Params{{Key: "id", Value: wsUUID1}}
NewTokenHandler().List(c)
if w.Code != http.StatusOK {
@@ -186,7 +195,7 @@ func TestTokenHandler_List_ScanError(t *testing.T) {
AddRow("tok-1", "abc", "not-a-timestamp", nil))
w := makeReq(t, NewTokenHandler().List, "GET",
- "/workspaces/ws-1/tokens", gin.Params{{Key: "id", Value: "ws-1"}})
+ "/workspaces/ws-1/tokens", gin.Params{{Key: "id", Value: wsUUID1}})
if w.Code != http.StatusInternalServerError {
t.Errorf("scan error must surface as 500, got %d: %s", w.Code, w.Body.String())
@@ -201,11 +210,11 @@ func TestTokenHandler_Create_RateLimited(t *testing.T) {
// Count query returns 50 (== max) → 429.
mock.ExpectQuery(`SELECT COUNT\(\*\) FROM workspace_auth_tokens`).
- WithArgs("ws-1").
+ WithArgs(wsUUID1).
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(50))
w := makeReq(t, NewTokenHandler().Create, "POST",
- "/workspaces/ws-1/tokens", gin.Params{{Key: "id", Value: "ws-1"}})
+ "/workspaces/ws-1/tokens", gin.Params{{Key: "id", Value: wsUUID1}})
if w.Code != http.StatusTooManyRequests {
t.Errorf("max active tokens should 429, got %d", w.Code)
@@ -225,7 +234,7 @@ func TestTokenHandler_Create_IssueFails(t *testing.T) {
WillReturnError(errors.New("disk full"))
w := makeReq(t, NewTokenHandler().Create, "POST",
- "/workspaces/ws-1/tokens", gin.Params{{Key: "id", Value: "ws-1"}})
+ "/workspaces/ws-1/tokens", gin.Params{{Key: "id", Value: wsUUID1}})
if w.Code != http.StatusInternalServerError {
t.Errorf("IssueToken DB error must 500, got %d", w.Code)
@@ -242,7 +251,7 @@ func TestTokenHandler_Create_HappyPath(t *testing.T) {
WillReturnResult(sqlmock.NewResult(1, 1))
w := makeReq(t, NewTokenHandler().Create, "POST",
- "/workspaces/ws-1/tokens", gin.Params{{Key: "id", Value: "ws-1"}})
+ "/workspaces/ws-1/tokens", gin.Params{{Key: "id", Value: wsUUID1}})
if w.Code != http.StatusCreated {
t.Fatalf("expected 201, got %d: %s", w.Code, w.Body.String())
@@ -257,7 +266,7 @@ func TestTokenHandler_Create_HappyPath(t *testing.T) {
if body.AuthToken == "" {
t.Errorf("auth_token must be present and non-empty in response")
}
- if body.WorkspaceID != "ws-1" {
+ if body.WorkspaceID != wsUUID1 {
t.Errorf("workspace_id mismatch: %q", body.WorkspaceID)
}
}
@@ -269,12 +278,12 @@ func TestTokenHandler_Revoke_HappyPath(t *testing.T) {
defer cleanup()
mock.ExpectExec(`UPDATE workspace_auth_tokens\s+SET revoked_at = now\(\)`).
- WithArgs("tok-1", "ws-1").
+ WithArgs("tok-1", wsUUID1).
WillReturnResult(sqlmock.NewResult(0, 1))
w := makeReq(t, NewTokenHandler().Revoke, "DELETE",
"/workspaces/ws-1/tokens/tok-1", gin.Params{
- {Key: "id", Value: "ws-1"},
+ {Key: "id", Value: wsUUID1},
{Key: "tokenId", Value: "tok-1"},
})
@@ -289,12 +298,12 @@ func TestTokenHandler_Revoke_NotFound(t *testing.T) {
// 0 rows affected → token not found OR already revoked.
mock.ExpectExec(`UPDATE workspace_auth_tokens`).
- WithArgs("tok-ghost", "ws-1").
+ WithArgs("tok-ghost", wsUUID1).
WillReturnResult(sqlmock.NewResult(0, 0))
w := makeReq(t, NewTokenHandler().Revoke, "DELETE",
"/workspaces/ws-1/tokens/tok-ghost", gin.Params{
- {Key: "id", Value: "ws-1"},
+ {Key: "id", Value: wsUUID1},
{Key: "tokenId", Value: "tok-ghost"},
})
@@ -312,7 +321,7 @@ func TestTokenHandler_Revoke_DBError(t *testing.T) {
w := makeReq(t, NewTokenHandler().Revoke, "DELETE",
"/workspaces/ws-1/tokens/tok-1", gin.Params{
- {Key: "id", Value: "ws-1"},
+ {Key: "id", Value: wsUUID1},
{Key: "tokenId", Value: "tok-1"},
})
@@ -321,6 +330,59 @@ func TestTokenHandler_Revoke_DBError(t *testing.T) {
}
}
+// ---- UUID validation (regression: "global" sentinel 500) ------------
+
+// The canvas Settings → Workspace Tokens tab sent the literal sentinel
+// "global" as the workspace id when no node was selected. workspace_id
+// is a `uuid` column, so the query raised
+// `invalid input syntax for type uuid: "global"` which leaked as an
+// opaque 500. List/Create/Revoke now reject any non-UUID id with a
+// clean 400 before touching the DB. No DB expectation is set on the
+// mock — a DB hit would fail ExpectationsWereMet, proving short-circuit.
+func TestTokenHandler_RejectsNonUUIDWorkspaceID(t *testing.T) {
+ h := NewTokenHandler()
+ cases := []struct {
+ name string
+ run func(c *gin.Context)
+ method string
+ params gin.Params
+ }{
+ {"List", h.List, "GET", gin.Params{{Key: "id", Value: "global"}}},
+ {"Create", h.Create, "POST", gin.Params{{Key: "id", Value: "global"}}},
+ {"Revoke", h.Revoke, "DELETE", gin.Params{
+ {Key: "id", Value: "global"},
+ {Key: "tokenId", Value: "tok-1"},
+ }},
+ }
+ for _, tc := range cases {
+ t.Run(tc.name, func(t *testing.T) {
+ mock, cleanup := withMockDB(t)
+ defer cleanup()
+
+ w := makeReq(t, tc.run, tc.method,
+ "/workspaces/global/tokens", tc.params)
+
+ if w.Code != http.StatusBadRequest {
+ t.Fatalf("%s with non-UUID id must 400, got %d: %s",
+ tc.name, w.Code, w.Body.String())
+ }
+ var body struct {
+ Error string `json:"error"`
+ }
+ _ = json.Unmarshal(w.Body.Bytes(), &body)
+ if body.Error != "invalid workspace id" {
+ t.Errorf("%s: want error=%q, got %q",
+ tc.name, "invalid workspace id", body.Error)
+ }
+ // No query/exec was expected → if the handler hit the DB
+ // this fails, proving the guard short-circuits before SQL.
+ if err := mock.ExpectationsWereMet(); err != nil {
+ t.Errorf("%s leaked a DB call past the uuid guard: %v", tc.name, err)
+ }
+ })
+ }
+}
+
// Compile-time noise removal: the imports list pulls in the sql /
// driver packages and the silenced ctx so a future scenario that
// needs them doesn't have to re-add the import. Documented here so
diff --git a/workspace-server/internal/handlers/tokens_test.go b/workspace-server/internal/handlers/tokens_test.go
index 1b62e1064..bf6d66030 100644
--- a/workspace-server/internal/handlers/tokens_test.go
+++ b/workspace-server/internal/handlers/tokens_test.go
@@ -11,6 +11,7 @@ import (
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/wsauth"
"github.com/gin-gonic/gin"
+ "github.com/google/uuid"
)
func init() { gin.SetMode(gin.TestMode) }
@@ -167,11 +168,14 @@ func TestTokenHandler_RevokeWrongWorkspace(t *testing.T) {
h := NewTokenHandler()
- // Try to revoke with a different workspace ID — should 404
+ // Try to revoke with a different (valid-UUID) workspace ID that does
+ // not own the token — should 404. A valid UUID is required so this
+ // exercises the ownership branch, not the up-front uuid-shape 400.
+ otherWS := uuid.NewString()
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
- c.Params = gin.Params{{Key: "id", Value: "wrong-workspace-id"}, {Key: "tokenId", Value: tokenID}}
- c.Request = httptest.NewRequest("DELETE", "/workspaces/wrong/tokens/"+tokenID, nil)
+ c.Params = gin.Params{{Key: "id", Value: otherWS}, {Key: "tokenId", Value: tokenID}}
+ c.Request = httptest.NewRequest("DELETE", "/workspaces/"+otherWS+"/tokens/"+tokenID, nil)
h.Revoke(c)
if w.Code != http.StatusNotFound {
diff --git a/workspace-server/internal/handlers/workspace.go b/workspace-server/internal/handlers/workspace.go
index f0e478739..781741aad 100644
--- a/workspace-server/internal/handlers/workspace.go
+++ b/workspace-server/internal/handlers/workspace.go
@@ -80,6 +80,15 @@ type WorkspaceHandler struct {
asyncWG sync.WaitGroup
}
+// newHandlerHook, when non-nil, is invoked for every WorkspaceHandler
+// created via NewWorkspaceHandler. It is nil in production (zero cost);
+// the test harness sets it so setupTestDB can drain every handler's
+// in-flight async goroutines before swapping the global db.DB. Without
+// this, a detached restart goroutine (maybeMarkContainerDead ->
+// goAsync(RestartByID) -> runRestartCycle reads db.DB) races the
+// db.DB restore in another test's t.Cleanup.
+var newHandlerHook func(*WorkspaceHandler)
+
func (h *WorkspaceHandler) goAsync(fn func()) {
h.asyncWG.Add(1)
go func() {
@@ -108,6 +117,9 @@ func NewWorkspaceHandler(b events.EventEmitter, p *provisioner.Provisioner, plat
if p != nil {
h.provisioner = p
}
+ if newHandlerHook != nil {
+ newHandlerHook(h)
+ }
return h
}
diff --git a/workspace-server/internal/handlers/workspace_restart.go b/workspace-server/internal/handlers/workspace_restart.go
index 985b9ca56..c2ab5828e 100644
--- a/workspace-server/internal/handlers/workspace_restart.go
+++ b/workspace-server/internal/handlers/workspace_restart.go
@@ -237,10 +237,10 @@ func (h *WorkspaceHandler) Restart(c *gin.Context) {
// the silent-drop bugs PRs #2811/#2824 closed). RestartWorkspaceAuto
// enforces CP-FIRST ordering matching the other dispatchers — see
// docs/architecture/backends.md.
- go func() {
+ h.goAsync(func() {
h.RestartWorkspaceAutoOpts(context.Background(), id, templatePath, configFiles, payload, resetClaudeSession)
- }()
- go h.sendRestartContext(id, restartData)
+ })
+ h.goAsync(func() { h.sendRestartContext(id, restartData) })
c.JSON(http.StatusOK, gin.H{"status": "provisioning", "config_dir": configLabel, "reset_session": resetClaudeSession})
}
@@ -610,7 +610,9 @@ func (h *WorkspaceHandler) runRestartCycle(workspaceID string) {
h.provisionWorkspaceAutoSync(workspaceID, "", nil, payload)
// sendRestartContext is a one-way notification to the new container; safe
// to fire async — the next restart cycle won't depend on it completing.
- go h.sendRestartContext(workspaceID, restartData)
+ // Tracked via goAsync so the test harness can drain it before the
+ // global db.DB swap (sendRestartContext reads db.DB).
+ h.goAsync(func() { h.sendRestartContext(workspaceID, restartData) })
}
// Pause handles POST /workspaces/:id/pause
diff --git a/workspace-server/internal/provisioner/cp_provisioner.go b/workspace-server/internal/provisioner/cp_provisioner.go
index 224b8ca20..8f6f0c557 100644
--- a/workspace-server/internal/provisioner/cp_provisioner.go
+++ b/workspace-server/internal/provisioner/cp_provisioner.go
@@ -178,12 +178,21 @@ func (p *CPProvisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string,
// /admin/liveness and other admin-gated platform endpoints (core#831).
// p.adminToken is read from os.Getenv("ADMIN_TOKEN") at provisioner creation;
// it is also used for CP→platform HTTP auth but those are separate concerns.
- env := cfg.EnvVars
- if p.adminToken != "" {
- env = make(map[string]string, len(cfg.EnvVars)+1)
- for k, v := range cfg.EnvVars {
- env[k] = v
+ //
+ // Forensic #145 hardening: tenant workspaces run on EC2 via this path, so
+ // the SCM-write-token denylist (see buildContainerEnv) is enforced here
+ // too. Always build a filtered copy — never pass cfg.EnvVars through
+ // verbatim — so a latent persona-merged GITEA_TOKEN can't reach the
+ // tenant container regardless of whether ADMIN_TOKEN is set.
+ env := make(map[string]string, len(cfg.EnvVars)+1)
+ for k, v := range cfg.EnvVars {
+ if isSCMWriteTokenKey(k) {
+ log.Printf("CPProvisioner.Start: dropped SCM-write credential %q from tenant workspace env (forensic #145 guard)", k)
+ continue
}
+ env[k] = v
+ }
+ if p.adminToken != "" {
env["ADMIN_TOKEN"] = p.adminToken
}
// Collect template files and generated configs, with OFFSEC-010 guards:
diff --git a/workspace-server/internal/provisioner/provisioner.go b/workspace-server/internal/provisioner/provisioner.go
index f4ca31c57..6c895867e 100644
--- a/workspace-server/internal/provisioner/provisioner.go
+++ b/workspace-server/internal/provisioner/provisioner.go
@@ -643,6 +643,28 @@ func ValidateWorkspaceAccess(access, workspacePath string) error {
}
}
+// scmWriteTokenKeys is the explicit denylist of environment variable names
+// that carry a Git SCM *write* credential (push / merge / approve). These
+// must never reach a tenant workspace container — see the forensic #145
+// rationale in buildContainerEnv. Kept as an exact-match set rather than a
+// substring/prefix heuristic so the guard is auditable and can't silently
+// over-strip a legitimately-named var.
+var scmWriteTokenKeys = map[string]struct{}{
+ "GITEA_TOKEN": {},
+ "GITHUB_TOKEN": {},
+ "GH_TOKEN": {}, // gh CLI honours GH_TOKEN as a GITHUB_TOKEN alias
+ "GITLAB_TOKEN": {},
+ "GL_TOKEN": {}, // glab CLI alias
+ "BITBUCKET_TOKEN": {},
+}
+
+// isSCMWriteTokenKey reports whether an env var name is a known Git SCM
+// write credential that must be stripped from tenant workspace env.
+func isSCMWriteTokenKey(key string) bool {
+ _, ok := scmWriteTokenKeys[key]
+ return ok
+}
+
// buildContainerEnv assembles the initial environment variables injected
// into every workspace container.
//
@@ -679,6 +701,21 @@ func buildContainerEnv(cfg WorkspaceConfig) []string {
env = append(env, fmt.Sprintf("AWARENESS_URL=%s", cfg.AwarenessURL))
}
for k, v := range cfg.EnvVars {
+ // Forensic #145 hardening: tenant workspace containers run
+ // agent-controlled code and must NEVER receive a Git SCM *write*
+ // credential. Without merge/approve creds in-container the
+ // two-eyes review gate is structurally self-bypass-proof — an
+ // agent that forges an approval has no token to act on it. A
+ // latent path exists (loadPersonaEnvFile merges a per-role
+ // persona `GITEA_TOKEN` into cfg.EnvVars when MOLECULE_PERSONA_ROOT
+ // is set on a tenant host); it is inert today (persona dirs are
+ // operator-host-only) but unguarded. Strip SCM-write tokens here
+ // by construction so the invariant holds regardless of whether
+ // that path ever becomes reachable.
+ if isSCMWriteTokenKey(k) {
+ log.Printf("buildContainerEnv: dropped SCM-write credential %q from workspace env (forensic #145 guard)", k)
+ continue
+ }
env = append(env, fmt.Sprintf("%s=%s", k, v))
}
// Inject ADMIN_TOKEN from the platform server's environment so workspace
diff --git a/workspace-server/internal/provisioner/provisioner_test.go b/workspace-server/internal/provisioner/provisioner_test.go
index 815c47cb8..9d0067c33 100644
--- a/workspace-server/internal/provisioner/provisioner_test.go
+++ b/workspace-server/internal/provisioner/provisioner_test.go
@@ -725,10 +725,15 @@ func TestBuildContainerEnv_AwarenessOnlyWhenBothSet(t *testing.T) {
}
func TestBuildContainerEnv_CustomEnvVarsAppended(t *testing.T) {
+ // NOTE: this test previously asserted GITHUB_TOKEN passed through
+ // verbatim. That assertion encoded the forensic #145 latent leak as
+ // expected behavior. Post-guard, ordinary custom env still flows but
+ // SCM-write credentials are stripped — see
+ // TestBuildContainerEnv_StripsSCMWriteTokens for the negative assertion.
cfg := WorkspaceConfig{
WorkspaceID: "ws-x",
PlatformURL: "http://localhost:8080",
- EnvVars: map[string]string{"CUSTOM": "value", "GITHUB_TOKEN": "fake-token-for-test"},
+ EnvVars: map[string]string{"CUSTOM": "value", "ANTHROPIC_API_KEY": "sk-not-an-scm-token"},
}
env := buildContainerEnv(cfg)
seen := map[string]string{}
@@ -741,8 +746,8 @@ func TestBuildContainerEnv_CustomEnvVarsAppended(t *testing.T) {
if seen["CUSTOM"] != "value" {
t.Errorf("CUSTOM env missing, got env=%v", env)
}
- if seen["GITHUB_TOKEN"] != "fake-token-for-test" {
- t.Errorf("GITHUB_TOKEN env missing, got env=%v", env)
+ if seen["ANTHROPIC_API_KEY"] != "sk-not-an-scm-token" {
+ t.Errorf("non-SCM custom env must still pass through, got env=%v", env)
}
// Built-in defaults still present
if seen["MOLECULE_URL"] == "" {
@@ -750,6 +755,129 @@ func TestBuildContainerEnv_CustomEnvVarsAppended(t *testing.T) {
}
}
+// ---------- forensic #145: SCM-write-token denylist guard ----------
+
+// TestBuildContainerEnv_StripsSCMWriteTokens is the core negative
+// assertion: a tenant workspace env constructed via buildContainerEnv MUST
+// NOT contain any Git SCM *write* credential, regardless of how it got into
+// cfg.EnvVars. This proves the two-eyes review gate stays structurally
+// self-bypass-proof — an agent in-container has no merge/approve token to
+// act on a forged approval. See forensic #145.
+//
+// This test FAILS on the pre-guard code (where buildContainerEnv passed
+// cfg.EnvVars through verbatim) and PASSES once the denylist filter is in
+// place — i.e. the guard is proven by construction, not by environment
+// accident.
+func TestBuildContainerEnv_StripsSCMWriteTokens(t *testing.T) {
+ scmTokens := []string{
+ "GITEA_TOKEN", "GITHUB_TOKEN", "GH_TOKEN",
+ "GITLAB_TOKEN", "GL_TOKEN", "BITBUCKET_TOKEN",
+ }
+
+ t.Run("normal path — SCM tokens explicitly set in EnvVars", func(t *testing.T) {
+ envVars := map[string]string{"CUSTOM": "ok", "ANTHROPIC_API_KEY": "sk-keep"}
+ for _, k := range scmTokens {
+ envVars[k] = "leaked-write-credential-" + k
+ }
+ cfg := WorkspaceConfig{
+ WorkspaceID: "ws-tenant",
+ PlatformURL: "http://localhost:8080",
+ Tier: 2,
+ EnvVars: envVars,
+ }
+ assertNoSCMWriteToken(t, buildContainerEnv(cfg), scmTokens)
+
+ // Sanity: non-SCM custom env is NOT collateral-damaged by the filter.
+ if !envContains(buildContainerEnv(cfg), "CUSTOM=ok") {
+ t.Errorf("filter must not strip non-SCM custom env")
+ }
+ if !envContains(buildContainerEnv(cfg), "ANTHROPIC_API_KEY=sk-keep") {
+ t.Errorf("filter must not strip non-SCM API keys")
+ }
+ })
+
+ t.Run("persona-file path — simulates loadPersonaEnvFile merge", func(t *testing.T) {
+ // The latent path: handlers.loadPersonaEnvFile() merges a per-role
+ // persona env file (carrying GITEA_USER, GITEA_TOKEN, …) into the
+ // workspace env map when MOLECULE_PERSONA_ROOT is set on a tenant
+ // host. We can't invoke that cross-package helper here, but its
+ // observable effect is exactly "a GITEA_TOKEN appears in
+ // cfg.EnvVars". Constructing that condition directly proves the
+ // guard holds even if the latent path becomes reachable.
+ cfg := WorkspaceConfig{
+ WorkspaceID: "ws-tenant",
+ PlatformURL: "http://localhost:8080",
+ Tier: 2,
+ EnvVars: map[string]string{
+ // Persona identity fields that are SAFE to keep (read-only
+ // identity, not a write credential):
+ "GITEA_USER": "backend-engineer",
+ "GITEA_USER_EMAIL": "backend-engineer@agents.moleculesai.app",
+ // The credential that must be stripped:
+ "GITEA_TOKEN": "persona-merged-write-pat",
+ "GITEA_TOKEN_SCOPES": "write:repository",
+ },
+ }
+ got := buildContainerEnv(cfg)
+ assertNoSCMWriteToken(t, got, scmTokens)
+ // Non-credential persona identity may still flow through — only the
+ // write token is the denied surface.
+ if !envContains(got, "GITEA_USER=backend-engineer") {
+ t.Errorf("non-credential persona identity (GITEA_USER) should not be stripped")
+ }
+ })
+}
+
+// TestCPProvisionerEnv_StripsSCMWriteTokens covers the tenant-EC2 path:
+// CPProvisioner.Start builds the env map the control plane forwards to the
+// EC2 workspace container. The same forensic #145 denylist must hold there.
+func TestCPProvisionerEnv_StripsSCMWriteTokens(t *testing.T) {
+ // isSCMWriteTokenKey is the single source of truth shared by both
+ // buildContainerEnv (local Docker) and CPProvisioner.Start (tenant EC2).
+ // Assert it classifies every known SCM-write var as denied and leaves
+ // ordinary / read-only-identity vars alone.
+ for _, k := range []string{
+ "GITEA_TOKEN", "GITHUB_TOKEN", "GH_TOKEN",
+ "GITLAB_TOKEN", "GL_TOKEN", "BITBUCKET_TOKEN",
+ } {
+ if !isSCMWriteTokenKey(k) {
+ t.Errorf("isSCMWriteTokenKey(%q) = false, want true (SCM-write credential must be denied)", k)
+ }
+ }
+ for _, k := range []string{
+ "GITEA_USER", "GITEA_USER_EMAIL", "ANTHROPIC_API_KEY",
+ "CUSTOM", "PLATFORM_URL", "ADMIN_TOKEN", "",
+ } {
+ if isSCMWriteTokenKey(k) {
+ t.Errorf("isSCMWriteTokenKey(%q) = true, want false (must not over-strip non-SCM env)", k)
+ }
+ }
+}
+
+func assertNoSCMWriteToken(t *testing.T, env []string, scmTokens []string) {
+ t.Helper()
+ for _, e := range env {
+ key := e
+ if i := strings.IndexByte(e, '='); i >= 0 {
+ key = e[:i]
+ }
+ for _, banned := range scmTokens {
+ if key == banned {
+ t.Errorf("SCM-write credential %q leaked into workspace env (forensic #145 invariant violated): %q", banned, e)
+ }
+ }
+ }
+}
+
+func envContains(env []string, want string) bool {
+ for _, e := range env {
+ if e == want {
+ return true
+ }
+ }
+ return false
+}
+
// ---------- buildWorkspaceMount — #65 workspace_access ----------
func TestBuildWorkspaceMount_SelectionMatrix(t *testing.T) {
diff --git a/workspace-server/internal/secrets/patterns.go b/workspace-server/internal/secrets/patterns.go
new file mode 100644
index 000000000..13356af7a
--- /dev/null
+++ b/workspace-server/internal/secrets/patterns.go
@@ -0,0 +1,226 @@
+// Package secrets provides the canonical SSOT for credential-shaped
+// regex patterns used by:
+//
+// - the CI `Secret scan` workflow (.gitea/workflows/secret-scan.yml)
+// - the runtime's bundled pre-commit hook
+// (molecule-ai-workspace-runtime/molecule_runtime/scripts/pre-commit-checks.sh)
+// - the upcoming Phase 2b docker-exec Files API backend, which has
+// to refuse to surface files whose path OR content matches a
+// credential shape (RFC internal#425, Hongming 2026-05-15)
+//
+// Before this package, the same regex set lived as duplicate bash
+// arrays in two unrelated repos; adding a pattern required editing
+// both, and pattern drift was caught only via secret-scan workflow
+// failures on PRs that had unrelated changes (#2090-class incident
+// vector). Centralising in Go makes the Files API the SSOT, with the
+// YAML + bash arrays generated/asserted from this package so drift
+// is detected at CI time, not at exfiltration time.
+//
+// This file is Phase 2a of the internal#425 RFC. Phase 2b will import
+// `Patterns` from `template_files_docker_exec.go` to gate
+// `listFilesViaDockerExec` / `readFileViaDockerExec` against
+// secret-shaped paths AND content. Until 2b lands, the package has
+// one consumer: this package's own unit tests, which pin the regex
+// strings so a refactor that drops or weakens one is caught here.
+package secrets
+
+import (
+ "fmt"
+ "regexp"
+ "sync"
+)
+
+// Pattern is one named credential shape — a human label plus the
+// compiled regex. The label appears in CI error output ("matched:
+// github-pat") so an operator can identify the family without seeing
+// the actual matched bytes (echoing the bytes widens the blast radius
+// per the secret-scan workflow's recovery prose).
+type Pattern struct {
+ // Name is a short kebab-case identifier (e.g. "github-pat",
+ // "anthropic-api-key"). Stable across versions — consumers may
+ // switch on it.
+ Name string
+ // Description is a one-line human-readable explanation of what
+ // the pattern matches. Used in CI error messages and the Files
+ // API "" placeholder rationale.
+ Description string
+ // regexSource is the regex literal in Go-RE2 syntax. Stored as a
+ // string so the slice declaration below stays readable; compiled
+ // once via sync.Once into a *regexp.Regexp.
+ regexSource string
+}
+
+// Patterns is the canonical credential-shape regex set.
+//
+// Adding a pattern here:
+//
+// 1. Add a new Pattern{} entry below with a kebab-case Name, a
+// one-line Description, and the regex literal. Anchor on a
+// low-false-positive prefix.
+// 2. Add a positive + negative test case in patterns_test.go.
+// 3. Mirror the regex string into:
+// a. .gitea/workflows/secret-scan.yml SECRET_PATTERNS array
+// b. molecule-ai-workspace-runtime/molecule_runtime/scripts/pre-commit-checks.sh
+// (or wait for the codegen target that consumes this slice — TBD
+// follow-up; tracked in the Phase 2a PR description.)
+//
+// The order is: alphabetical within each provider family, families
+// grouped by ecosystem (GitHub family, AI-provider family, chat
+// family, cloud family). Keep this stable so diffs are reviewable.
+var Patterns = []Pattern{
+ // --- GitHub token family ---
+ {
+ Name: "github-pat-classic",
+ Description: "GitHub personal access token (classic)",
+ regexSource: `ghp_[A-Za-z0-9]{36,}`,
+ },
+ {
+ Name: "github-app-installation-token",
+ Description: "GitHub App installation token (#2090 vector)",
+ regexSource: `ghs_[A-Za-z0-9]{36,}`,
+ },
+ {
+ Name: "github-oauth-user-to-server",
+ Description: "GitHub OAuth user-to-server token",
+ regexSource: `gho_[A-Za-z0-9]{36,}`,
+ },
+ {
+ Name: "github-oauth-user",
+ Description: "GitHub OAuth user token",
+ regexSource: `ghu_[A-Za-z0-9]{36,}`,
+ },
+ {
+ Name: "github-oauth-refresh",
+ Description: "GitHub OAuth refresh token",
+ regexSource: `ghr_[A-Za-z0-9]{36,}`,
+ },
+ {
+ Name: "github-pat-fine-grained",
+ Description: "GitHub fine-grained personal access token",
+ regexSource: `github_pat_[A-Za-z0-9_]{82,}`,
+ },
+
+ // --- AI-provider API key family ---
+ {
+ Name: "anthropic-api-key",
+ Description: "Anthropic API key",
+ regexSource: `sk-ant-[A-Za-z0-9_-]{40,}`,
+ },
+ {
+ Name: "openai-project-key",
+ Description: "OpenAI project API key",
+ regexSource: `sk-proj-[A-Za-z0-9_-]{40,}`,
+ },
+ {
+ Name: "openai-service-account-key",
+ Description: "OpenAI service-account API key",
+ regexSource: `sk-svcacct-[A-Za-z0-9_-]{40,}`,
+ },
+ {
+ Name: "minimax-api-key",
+ Description: "MiniMax API key (F1088 vector)",
+ regexSource: `sk-cp-[A-Za-z0-9_-]{60,}`,
+ },
+
+ // --- Chat-platform token family ---
+ {
+ Name: "slack-token",
+ Description: "Slack token (xoxb/xoxa/xoxp/xoxr/xoxs)",
+ regexSource: `xox[baprs]-[A-Za-z0-9-]{20,}`,
+ },
+
+ // --- Cloud-provider credential family ---
+ {
+ Name: "aws-access-key-id",
+ Description: "AWS access key ID",
+ regexSource: `AKIA[0-9A-Z]{16}`,
+ },
+ {
+ Name: "aws-sts-temp-access-key-id",
+ Description: "AWS STS temporary access key ID",
+ regexSource: `ASIA[0-9A-Z]{16}`,
+ },
+}
+
+// compiledOnce protects the lazy build of compiledPatterns. We compile
+// lazily so package init is cheap; callers pay only on first match
+// (typically once per workspace-server boot).
+var (
+ compiledOnce sync.Once
+ compiledPatterns []*compiledPattern
+ compileErr error
+)
+
+type compiledPattern struct {
+ Name string
+ Description string
+ Re *regexp.Regexp
+}
+
+// compileAll compiles every Pattern.regexSource into a *regexp.Regexp.
+// Called once via compiledOnce. Any compile failure here is a build
+// bug (the unit tests assert each regex compiles) — surfacing via
+// returned error so callers don't panic in request handling.
+func compileAll() {
+ out := make([]*compiledPattern, 0, len(Patterns))
+ for _, p := range Patterns {
+ re, err := regexp.Compile(p.regexSource)
+ if err != nil {
+ compileErr = fmt.Errorf("secrets: pattern %q failed to compile: %w", p.Name, err)
+ return
+ }
+ out = append(out, &compiledPattern{Name: p.Name, Description: p.Description, Re: re})
+ }
+ compiledPatterns = out
+}
+
+// ScanBytes returns a non-nil Match if any pattern matches anywhere
+// inside b. Returns (nil, nil) on no match. Returns (nil, err) only
+// if a regex in the package fails to compile — that's a build bug,
+// not a runtime data issue.
+//
+// Match contains the pattern Name + Description so the caller can
+// emit a path-or-content-denial rationale WITHOUT round-tripping the
+// matched bytes (which would defeat the purpose). The matched bytes
+// stay inside this function.
+//
+// The Files API Phase 2b backend will call ScanBytes on:
+//
+// - the absolute path string (catches a file literally named
+// `ghs_abc.txt`)
+// - the file content (catches a credential pasted into a workspace
+// file by an agent or user — the Files API refuses to surface it
+// and the canvas renders "")
+//
+// Ordering: patterns are tried in declaration order. First match
+// wins. This means narrower patterns (e.g. `sk-svcacct-…`) should
+// appear in `Patterns` before broader ones (`sk-…`) — today there's
+// no overlap, so order is descriptive only.
+func ScanBytes(b []byte) (*Match, error) {
+ compiledOnce.Do(compileAll)
+ if compileErr != nil {
+ return nil, compileErr
+ }
+ for _, cp := range compiledPatterns {
+ if cp.Re.Match(b) {
+ return &Match{Name: cp.Name, Description: cp.Description}, nil
+ }
+ }
+ return nil, nil
+}
+
+// ScanString is the string-input convenience wrapper around ScanBytes.
+// Identical semantics — the body never copies, []byte(s) is a
+// zero-copy reinterpret for the regex matcher.
+func ScanString(s string) (*Match, error) {
+ return ScanBytes([]byte(s))
+}
+
+// Match describes which pattern caught a value. Deliberately does
+// NOT include the matched substring — callers must not echo it.
+type Match struct {
+ // Name is the pattern's kebab-case identifier (e.g. "github-pat-classic").
+ Name string
+ // Description is the human-readable line for UI / log surfaces.
+ Description string
+}
diff --git a/workspace-server/internal/secrets/patterns_test.go b/workspace-server/internal/secrets/patterns_test.go
new file mode 100644
index 000000000..100a875e2
--- /dev/null
+++ b/workspace-server/internal/secrets/patterns_test.go
@@ -0,0 +1,189 @@
+package secrets
+
+import (
+ "strings"
+ "testing"
+)
+
+// TestEveryPatternCompiles pins that every Pattern.regexSource is a
+// valid Go-RE2 expression. Without this, a bad regex would silently
+// disable ScanBytes for everything after it (the lazy compile would
+// set compileErr and ScanBytes would return that error every call).
+func TestEveryPatternCompiles(t *testing.T) {
+ for _, p := range Patterns {
+ if p.Name == "" {
+ t.Errorf("pattern with empty Name: regex=%q", p.regexSource)
+ }
+ if p.Description == "" {
+ t.Errorf("pattern %q has empty Description", p.Name)
+ }
+ }
+ // Force compile + check error.
+ if _, err := ScanBytes([]byte("placeholder")); err != nil {
+ t.Fatalf("ScanBytes init failed: %v", err)
+ }
+}
+
+// TestNoDuplicateNames — a duplicate pattern Name would make the
+// "first match wins" semantics surprising to readers and any caller
+// switching on Match.Name (none today but adding the guard is cheap).
+func TestNoDuplicateNames(t *testing.T) {
+ seen := map[string]bool{}
+ for _, p := range Patterns {
+ if seen[p.Name] {
+ t.Errorf("duplicate pattern Name: %q", p.Name)
+ }
+ seen[p.Name] = true
+ }
+}
+
+// TestKnownPatternsAllPresent — pins which specific Name values are
+// expected. A future refactor that renames or removes one without
+// updating consumers (CI workflow, runtime pre-commit hook, Files
+// API Phase 2b backend) would silently widen the leak surface.
+// Failing here forces the rename to be intentional.
+func TestKnownPatternsAllPresent(t *testing.T) {
+ expected := []string{
+ "github-pat-classic",
+ "github-app-installation-token",
+ "github-oauth-user-to-server",
+ "github-oauth-user",
+ "github-oauth-refresh",
+ "github-pat-fine-grained",
+ "anthropic-api-key",
+ "openai-project-key",
+ "openai-service-account-key",
+ "minimax-api-key",
+ "slack-token",
+ "aws-access-key-id",
+ "aws-sts-temp-access-key-id",
+ }
+ got := map[string]bool{}
+ for _, p := range Patterns {
+ got[p.Name] = true
+ }
+ for _, want := range expected {
+ if !got[want] {
+ t.Errorf("expected pattern %q missing from Patterns slice", want)
+ }
+ }
+}
+
+// TestPositiveMatches — for each pattern, supply a representative
+// shape and assert ScanBytes returns a Match with the right Name.
+// These are TEST FIXTURES, not real credentials — each is the
+// pattern's prefix + a long-enough trailing run of placeholder chars.
+// `EXAMPLE` is sprinkled in to make grep-finds in CI logs obviously
+// fake to a human reader (matches saved memory
+// feedback_assert_exact_not_substring: tighten by Name not body).
+func TestPositiveMatches(t *testing.T) {
+ cases := []struct {
+ fixture string
+ expectedName string
+ }{
+ {"ghp_EXAMPLE111122223333444455556666777788889999", "github-pat-classic"},
+ {"ghs_EXAMPLE111122223333444455556666777788889999", "github-app-installation-token"},
+ {"gho_EXAMPLE111122223333444455556666777788889999", "github-oauth-user-to-server"},
+ {"ghu_EXAMPLE111122223333444455556666777788889999", "github-oauth-user"},
+ {"ghr_EXAMPLE111122223333444455556666777788889999", "github-oauth-refresh"},
+ {"github_pat_EXAMPLE" + strings.Repeat("1", 80), "github-pat-fine-grained"},
+ {"sk-ant-EXAMPLE" + strings.Repeat("1", 40), "anthropic-api-key"},
+ {"sk-proj-EXAMPLE" + strings.Repeat("1", 40), "openai-project-key"},
+ {"sk-svcacct-EXAMPLE" + strings.Repeat("1", 40), "openai-service-account-key"},
+ {"sk-cp-EXAMPLE" + strings.Repeat("1", 60), "minimax-api-key"},
+ {"xoxb-" + strings.Repeat("a", 25), "slack-token"},
+ {"xoxa-" + strings.Repeat("a", 25), "slack-token"},
+ // AWS regex requires [0-9A-Z]{16} — uppercase + digits only.
+ {"AKIA1234567890ABCDEF", "aws-access-key-id"},
+ {"ASIA1234567890ABCDEF", "aws-sts-temp-access-key-id"},
+ }
+ for _, tc := range cases {
+ t.Run(tc.expectedName, func(t *testing.T) {
+ m, err := ScanBytes([]byte(tc.fixture))
+ if err != nil {
+ t.Fatalf("ScanBytes(%q) errored: %v", tc.fixture, err)
+ }
+ if m == nil {
+ t.Fatalf("ScanBytes(%q) returned no match — expected %q", tc.fixture, tc.expectedName)
+ }
+ if m.Name != tc.expectedName {
+ t.Errorf("ScanBytes(%q) matched %q; expected %q", tc.fixture, m.Name, tc.expectedName)
+ }
+ })
+ }
+}
+
+// TestNegativeShapes — strings that look credential-adjacent but
+// shouldn't match (too short, wrong prefix, missing trailing bytes).
+// Failing here means a pattern is too loose, which would generate
+// false-positive denial in Files API and false-positive workflow
+// failures in CI.
+func TestNegativeShapes(t *testing.T) {
+ cases := []string{
+ // Too-short variants — anchored on the length suffix.
+ "ghp_tooshort",
+ "ghs_alsoshort1234",
+ "github_pat_short",
+ "sk-ant-short",
+ "sk-cp-not-enough-bytes-here",
+ // Looks like one of the prefixes but isn't (different letter).
+ "gha_EXAMPLE_thirty_six_or_more_chars_here_xxx",
+ // Slack family — wrong letter after xox.
+ "xoxz-aaaaaaaaaaaaaaaaaaaaaaaaa",
+ // AWS-shaped but wrong length suffix.
+ "AKIATOOSHORT",
+ // Empty / whitespace.
+ "",
+ " ",
+ // Plain prose mentioning the prefix as part of a longer word.
+ "see also `ghp_HOWTO.md` in the repo",
+ }
+ for _, c := range cases {
+ t.Run(c, func(t *testing.T) {
+ m, err := ScanBytes([]byte(c))
+ if err != nil {
+ t.Fatalf("ScanBytes(%q) errored: %v", c, err)
+ }
+ if m != nil {
+ t.Errorf("ScanBytes(%q) unexpectedly matched %q", c, m.Name)
+ }
+ })
+ }
+}
+
+// TestScanString_NoOp — sanity-check ScanString is the zero-copy
+// wrapper around ScanBytes. Without this, a future refactor that
+// makes ScanString do its own thing (e.g. accidentally normalise
+// case) would diverge silently.
+func TestScanString_NoOp(t *testing.T) {
+ in := "ghp_EXAMPLE111122223333444455556666777788889999"
+ m1, err1 := ScanBytes([]byte(in))
+ if err1 != nil {
+ t.Fatalf("ScanBytes errored: %v", err1)
+ }
+ m2, err2 := ScanString(in)
+ if err2 != nil {
+ t.Fatalf("ScanString errored: %v", err2)
+ }
+ if m1 == nil || m2 == nil {
+ t.Fatalf("expected matches; got bytes=%+v string=%+v", m1, m2)
+ }
+ if m1.Name != m2.Name {
+ t.Errorf("ScanString and ScanBytes returned different Names: %q vs %q", m1.Name, m2.Name)
+ }
+}
+
+// TestMatch_NoRoundtrip — assert the Match struct does NOT include
+// the matched substring as a field. Adding such a field would
+// regress the "matched bytes never leave ScanBytes" invariant that
+// makes this package safe to call from log/UI surfaces. This is a
+// reflection-light contract test — checks the field names statically.
+func TestMatch_NoRoundtrip(t *testing.T) {
+ var m Match
+ // If someone adds a `Matched string` (or similar) field, this
+ // test reads as the canonical place to update + reconsider.
+ _ = m.Name
+ _ = m.Description
+ // The two-field shape is part of the public contract; new fields
+ // require deliberation about whether they leak the secret value.
+}
diff --git a/workspace/a2a_mcp_server.py b/workspace/a2a_mcp_server.py
index ce27e982a..432aa41bf 100644
--- a/workspace/a2a_mcp_server.py
+++ b/workspace/a2a_mcp_server.py
@@ -35,12 +35,14 @@ from a2a_tools import (
tool_commit_memory,
tool_delegate_task,
tool_delegate_task_async,
+ tool_get_runtime_identity,
tool_get_workspace_info,
tool_inbox_peek,
tool_inbox_pop,
tool_list_peers,
tool_recall_memory,
tool_send_message_to_user,
+ tool_update_agent_card,
tool_wait_for_message,
)
from platform_tools.registry import TOOLS as _PLATFORM_TOOL_SPECS
@@ -130,6 +132,10 @@ async def handle_tool_call(name: str, arguments: dict) -> str:
return await tool_get_workspace_info(
source_workspace_id=arguments.get("source_workspace_id") or None,
)
+ elif name == "get_runtime_identity":
+ return await tool_get_runtime_identity()
+ elif name == "update_agent_card":
+ return await tool_update_agent_card(arguments.get("card"))
elif name == "commit_memory":
return await tool_commit_memory(
arguments.get("content", ""),
diff --git a/workspace/a2a_tools.py b/workspace/a2a_tools.py
index eb26e622f..b6c87e606 100644
--- a/workspace/a2a_tools.py
+++ b/workspace/a2a_tools.py
@@ -167,3 +167,15 @@ from a2a_tools_inbox import ( # noqa: E402 (import after the top-of-module imp
tool_inbox_pop,
tool_wait_for_message,
)
+
+
+# Identity tool handlers — extracted to a2a_tools_identity. Ports the
+# two T4-tier MCP tools (``tool_get_runtime_identity`` +
+# ``tool_update_agent_card``) from molecule-ai-workspace-runtime PR#17.
+# That repo is mirror-only (reference_runtime_repo_is_mirror_only);
+# this is the canonical edit point, and the wheel mirror is
+# regenerated by publish-runtime.yml on merge.
+from a2a_tools_identity import ( # noqa: E402 (import after the top-of-module imports)
+ tool_get_runtime_identity,
+ tool_update_agent_card,
+)
diff --git a/workspace/a2a_tools_identity.py b/workspace/a2a_tools_identity.py
new file mode 100644
index 000000000..cec89ed00
--- /dev/null
+++ b/workspace/a2a_tools_identity.py
@@ -0,0 +1,187 @@
+"""Identity tool handlers — single-concern slice of the a2a_tools surface.
+
+Owns the two MCP tools that close the T4-tier workspace owner-permission
+gaps reported via the canvas:
+
+ * ``tool_get_runtime_identity`` — env-only; returns model, model_provider,
+ molecule_model, anthropic_base_url, tier, workspace_id, runtime
+ (ADAPTER_MODULE). No HTTP call. Always permitted by RBAC — even
+ read-only agents may know what model they are.
+
+ * ``tool_update_agent_card`` — POSTs the card to ``/registry/update-card``
+ with the workspace's own bearer (same auth path as ``tool_commit_memory``
+ via ``a2a_tools_rbac.auth_headers_for_heartbeat``). The platform
+ replaces the stored card and broadcasts an ``agent_card_updated``
+ event so the canvas reflects the new card live. Gated on
+ ``memory.write`` capability via the existing RBAC permission map so
+ read-only roles can't silently rewrite the platform card.
+
+Both originated as a port of molecule-ai-workspace-runtime PR#17
+(``feat(mcp): add update_agent_card + get_runtime_identity tools``).
+The mirror-only PR#17 was closed without merge per
+``reference_runtime_repo_is_mirror_only``; the canonical edit point is
+this monorepo at ``workspace/`` and the wheel mirror is regenerated
+automatically by the publish-runtime workflow.
+
+Imports the auth-header primitive from ``a2a_tools_rbac`` (iter 4a) —
+NOT from ``a2a_tools`` — to avoid a circular import with the
+kitchen-sink re-export module.
+"""
+from __future__ import annotations
+
+import json
+import os
+from typing import Any
+
+import httpx
+
+from a2a_client import PLATFORM_URL
+from a2a_tools_rbac import (
+ auth_headers_for_heartbeat as _auth_headers_for_heartbeat,
+ check_memory_write_permission as _check_memory_write_permission,
+)
+
+
+def _runtime_identity_payload() -> dict[str, Any]:
+ """Build the identity dict — env-only, no I/O.
+
+ Factored out from ``tool_get_runtime_identity`` so tests can assert
+ against the exact key set without re-parsing JSON. The MCP tool
+ handler ``tool_get_runtime_identity`` is the only public caller in
+ production; tests call this helper directly.
+ """
+ return {
+ "model": os.environ.get("MODEL", ""),
+ "model_provider": os.environ.get("MODEL_PROVIDER", ""),
+ "molecule_model": os.environ.get("MOLECULE_MODEL", ""),
+ "anthropic_base_url": os.environ.get("ANTHROPIC_BASE_URL", ""),
+ "tier": os.environ.get("TIER", ""),
+ "workspace_id": os.environ.get("WORKSPACE_ID", ""),
+ # Adapter module is the closest thing the runtime has to a
+ # "template slug" — e.g. "adapter" for claude-code-default,
+ # "hermes" for hermes-template, etc. Picked from
+ # $ADAPTER_MODULE env baked by each template's Dockerfile.
+ "runtime": os.environ.get("ADAPTER_MODULE", ""),
+ }
+
+
+async def tool_get_runtime_identity() -> str:
+ """Return this runtime's identity — model, provider, tier, IDs.
+
+ Env-only; no HTTP call. Useful so the agent can answer "what model
+ am I?" correctly instead of guessing from a stale system prompt
+ that the operator may have changed between boots.
+
+ Returns the identity as a JSON-encoded string (the dispatch contract
+ every MCP tool in this module follows). Tests that want to assert
+ individual fields can call ``_runtime_identity_payload()`` directly,
+ or ``json.loads`` the return value.
+
+ Always permitted by RBAC — there is no sensitive information here
+ that isn't already available to the process via ``os.environ``.
+ The point of the tool is to surface those env values to the agent
+ layer in a stable, documented shape rather than expecting every
+ agent runtime to know to ``echo $MODEL``.
+ """
+ return json.dumps(_runtime_identity_payload(), indent=2)
+
+
+async def tool_update_agent_card(card: Any) -> str:
+ """Update this workspace's agent_card on the platform.
+
+ POSTs the provided card to ``/registry/update-card`` with the
+ workspace's own bearer token (same auth path as ``tool_commit_memory``
+ and ``tool_get_workspace_info``). The platform validates required
+ fields server-side, replaces the stored card, and broadcasts an
+ ``agent_card_updated`` event so the canvas updates live.
+
+ Args:
+ card: A JSON-serialisable object (typically a dict) holding the
+ new card. The platform validates required fields server-side.
+
+ Returns:
+ JSON-encoded string. Body:
+ - ``{"success": true, "status": "updated"}`` on success;
+ - ``{"success": false, "error": "", "status_code": }``
+ on platform error;
+ - ``{"success": false, "error": ""}`` on local validation
+ (non-dict card, missing WORKSPACE_ID, network error).
+
+ Permission gate: this tool requires the ``memory.write`` RBAC
+ capability — same gate as ``tool_commit_memory``. The check runs
+ inline rather than at the dispatcher layer to keep ``a2a_mcp_server``
+ permission-agnostic (the gate sits with the implementation, not the
+ transport). Read-only roles get a clear error string back instead
+ of a 403 from the platform.
+
+ We re-check ``isinstance(card, dict)`` here defensively rather than
+ trust the MCP schema validator alone — the schema only constrains
+ the transport, not the in-process call surface used by tests and
+ sibling modules.
+ """
+ payload = await _update_agent_card_impl(card)
+ return json.dumps(payload, indent=2)
+
+
+async def _update_agent_card_impl(card: Any) -> dict[str, Any]:
+ """Dict-returning core of ``tool_update_agent_card``.
+
+ Split out so tests can assert against the raw dict shape (status
+ codes, error messages) without re-parsing JSON on every assertion.
+ The string-returning ``tool_update_agent_card`` is a thin wrapper
+ invoked by the MCP dispatcher.
+ """
+ # RBAC: require memory.write permission. Same gate as
+ # tool_commit_memory (the agent already needs this capability to
+ # persist anything outbound). Read-only roles can still call
+ # get_runtime_identity / get_workspace_info to introspect — those
+ # are env-only / read-only and have no inline gate.
+ if not _check_memory_write_permission():
+ return {
+ "success": False,
+ "error": (
+ "RBAC — this workspace does not have the 'memory.write' "
+ "permission required to update the agent_card."
+ ),
+ }
+ if not isinstance(card, dict):
+ return {
+ "success": False,
+ "error": "card must be a JSON object (dict)",
+ }
+ ws_id = os.environ.get("WORKSPACE_ID", "")
+ if not ws_id:
+ return {
+ "success": False,
+ "error": "WORKSPACE_ID env not set; cannot identify caller",
+ }
+ try:
+ async with httpx.AsyncClient(timeout=10.0) as client:
+ resp = await client.post(
+ f"{PLATFORM_URL}/registry/update-card",
+ json={"workspace_id": ws_id, "agent_card": card},
+ headers=_auth_headers_for_heartbeat(),
+ )
+ if resp.status_code == 200:
+ body: dict[str, Any] = {}
+ try:
+ body = resp.json()
+ except Exception:
+ pass
+ return {
+ "success": True,
+ "status": body.get("status", "updated"),
+ }
+ # Non-200 — surface what the platform returned.
+ error_msg = ""
+ try:
+ error_msg = resp.json().get("error", "") or resp.text
+ except Exception:
+ error_msg = resp.text
+ return {
+ "success": False,
+ "status_code": resp.status_code,
+ "error": error_msg,
+ }
+ except Exception as e:
+ return {"success": False, "error": f"network error: {e}"}
diff --git a/workspace/executor_helpers.py b/workspace/executor_helpers.py
index aba334f9c..52ae41b46 100644
--- a/workspace/executor_helpers.py
+++ b/workspace/executor_helpers.py
@@ -340,6 +340,16 @@ _CLI_A2A_COMMAND_KEYWORDS: dict[str, str | None] = {
"delegate_task_async": "delegate --async",
"check_task_status": "status",
"get_workspace_info": "info",
+ # `get_runtime_identity` + `update_agent_card` are MCP-first
+ # capabilities — the CLI subprocess interface doesn't expose them
+ # today. `get_runtime_identity` is env-only and an agent on a
+ # CLI-only runtime can already `echo $MODEL` etc, so there's no
+ # functional gap. `update_agent_card` requires a JSON object
+ # argument that wouldn't survive a positional-arg shell invocation
+ # cleanly. Mapped to None — flip to a keyword if a2a_cli grows
+ # `identity` / `card` subcommands in the future.
+ "get_runtime_identity": None,
+ "update_agent_card": None,
# `broadcast_message` is not exposed via the CLI subprocess interface
# today — it's an MCP-first capability. If a2a_cli grows a `broadcast`
# subcommand, map it here and the alignment test will gate the change.
diff --git a/workspace/platform_tools/registry.py b/workspace/platform_tools/registry.py
index 6550c9e7d..c5b1f08e6 100644
--- a/workspace/platform_tools/registry.py
+++ b/workspace/platform_tools/registry.py
@@ -57,12 +57,14 @@ from a2a_tools import (
tool_commit_memory,
tool_delegate_task,
tool_delegate_task_async,
+ tool_get_runtime_identity,
tool_get_workspace_info,
tool_inbox_peek,
tool_inbox_pop,
tool_list_peers,
tool_recall_memory,
tool_send_message_to_user,
+ tool_update_agent_card,
tool_wait_for_message,
)
@@ -289,6 +291,61 @@ _GET_WORKSPACE_INFO = ToolSpec(
section=A2A_SECTION,
)
+_GET_RUNTIME_IDENTITY = ToolSpec(
+ name="get_runtime_identity",
+ short=(
+ "Return this runtime's identity — model, model_provider, tier, "
+ "workspace_id, runtime template. Reads from process env; no HTTP call."
+ ),
+ when_to_use=(
+ "Use this to answer 'what model am I?' truthfully instead of "
+ "guessing from a stale system prompt — the operator may have "
+ "routed you to a different model via persona env between boots. "
+ "Always permitted by RBAC: even read-only agents may know what "
+ "model they are. Distinct from get_workspace_info — that one "
+ "calls the platform for ID/role/tier/parent (workspace metadata); "
+ "this one returns the live process env (MODEL, MODEL_PROVIDER, "
+ "MOLECULE_MODEL, ANTHROPIC_BASE_URL, TIER, WORKSPACE_ID, "
+ "ADAPTER_MODULE)."
+ ),
+ input_schema={"type": "object", "properties": {}},
+ impl=tool_get_runtime_identity,
+ section=A2A_SECTION,
+)
+
+_UPDATE_AGENT_CARD = ToolSpec(
+ name="update_agent_card",
+ short=(
+ "Replace this workspace's agent_card on the platform. The "
+ "platform validates required fields and broadcasts an "
+ "agent_card_updated event so the canvas reflects the change live."
+ ),
+ when_to_use=(
+ "Use when the workspace's capabilities, skills, description, or "
+ "name change and the canvas display needs to follow. The "
+ "platform stores the new card and pushes an "
+ "``agent_card_updated`` event to subscribers. Gated behind the "
+ "``memory.write`` RBAC capability — read-only roles cannot "
+ "rewrite the card. Tier-1+ owners always have this capability."
+ ),
+ input_schema={
+ "type": "object",
+ "properties": {
+ "card": {
+ "type": "object",
+ "description": (
+ "The new agent_card object (name, version, "
+ "description, skills, etc). Server-side validation "
+ "rejects payloads missing required fields."
+ ),
+ },
+ },
+ "required": ["card"],
+ },
+ impl=tool_update_agent_card,
+ section=A2A_SECTION,
+)
+
_BROADCAST_MESSAGE = ToolSpec(
name="broadcast_message",
short=(
@@ -642,6 +699,8 @@ TOOLS: list[ToolSpec] = [
_CHECK_TASK_STATUS,
_LIST_PEERS,
_GET_WORKSPACE_INFO,
+ _GET_RUNTIME_IDENTITY,
+ _UPDATE_AGENT_CARD,
_BROADCAST_MESSAGE,
_SEND_MESSAGE_TO_USER,
# Inbox (standalone-only; in-container returns informational error)
diff --git a/workspace/tests/snapshots/a2a_instructions_mcp.txt b/workspace/tests/snapshots/a2a_instructions_mcp.txt
index 3f0213e1b..92de32fa6 100644
--- a/workspace/tests/snapshots/a2a_instructions_mcp.txt
+++ b/workspace/tests/snapshots/a2a_instructions_mcp.txt
@@ -5,6 +5,8 @@
- **check_task_status**: Poll the status of a task started with delegate_task_async; returns result when done.
- **list_peers**: List the workspaces this agent can communicate with — name, ID, status, role for each.
- **get_workspace_info**: Get this workspace's own info — ID, name, role, tier, parent, status.
+- **get_runtime_identity**: Return this runtime's identity — model, model_provider, tier, workspace_id, runtime template. Reads from process env; no HTTP call.
+- **update_agent_card**: Replace this workspace's agent_card on the platform. The platform validates required fields and broadcasts an agent_card_updated event so the canvas reflects the change live.
- **broadcast_message**: Send a message to ALL agent workspaces in the org simultaneously. Requires broadcast_enabled=true on this workspace (set by user/admin).
- **send_message_to_user**: Send a message directly to the user's canvas chat — pushed instantly via WebSocket. Use this to: (1) acknowledge a task immediately ('Got it, I'll start working on this'), (2) send interim progress updates while doing long work, (3) deliver follow-up results after delegation completes, (4) attach files (zip, pdf, csv, image) for the user to download via the `attachments` field (NEVER paste file URLs in `message`). The message appears in the user's chat as if you're proactively reaching out.
- **wait_for_message**: Block until the next inbound message (canvas user OR peer agent) arrives, or until ``timeout_secs`` elapses.
@@ -27,6 +29,12 @@ Call this first when you need to delegate but don't know the target's ID. Access
### get_workspace_info
Use to introspect your own identity (e.g. before reporting back to the user, or to determine whether you're a tier-0 root that can write GLOBAL memory).
+### get_runtime_identity
+Use this to answer 'what model am I?' truthfully instead of guessing from a stale system prompt — the operator may have routed you to a different model via persona env between boots. Always permitted by RBAC: even read-only agents may know what model they are. Distinct from get_workspace_info — that one calls the platform for ID/role/tier/parent (workspace metadata); this one returns the live process env (MODEL, MODEL_PROVIDER, MOLECULE_MODEL, ANTHROPIC_BASE_URL, TIER, WORKSPACE_ID, ADAPTER_MODULE).
+
+### update_agent_card
+Use when the workspace's capabilities, skills, description, or name change and the canvas display needs to follow. The platform stores the new card and pushes an ``agent_card_updated`` event to subscribers. Gated behind the ``memory.write`` RBAC capability — read-only roles cannot rewrite the card. Tier-1+ owners always have this capability.
+
### broadcast_message
Use for urgent, org-wide signals: critical status changes, emergency stop instructions, coordinated task announcements. Every non-removed workspace receives the message in its activity log (poll-mode agents see it on their next poll; push-mode canvases get a real-time banner). This tool returns an error if broadcast_enabled is false — a user or admin must enable it via the workspace abilities settings first.
diff --git a/workspace/tests/test_a2a_tools_identity.py b/workspace/tests/test_a2a_tools_identity.py
new file mode 100644
index 000000000..ca8b4dc11
--- /dev/null
+++ b/workspace/tests/test_a2a_tools_identity.py
@@ -0,0 +1,390 @@
+"""Tests for ``tool_get_runtime_identity`` and ``tool_update_agent_card``.
+
+These two MCP tools close the T4-tier workspace owner-permission gaps
+reported via the canvas:
+
+ - the agent could not update its own ``agent_card`` (no MCP tool
+ wrapped the existing ``POST /registry/update-card`` endpoint);
+ - the agent could not identify which model it was running (the
+ ``MODEL`` env var is injected by ``provisioner.workspace_provision``
+ but nothing surfaced it back to the agent).
+
+Ported from molecule-ai-workspace-runtime PR#17 (mirror-only repo;
+canonical edit point per ``reference_runtime_repo_is_mirror_only``).
+Adapted to core's conventions:
+
+ * tool functions return ``str`` (JSON-encoded), matching every other
+ tool in ``a2a_tools_*`` modules. Tests ``json.loads`` to inspect.
+ * permission check ``memory.write`` runs inline in
+ ``tool_update_agent_card`` (same pattern as
+ ``a2a_tools_memory.tool_commit_memory``).
+ * ``WORKSPACE_ID`` is read directly from ``os.environ`` — core does
+ not have the runtime's validated-cache layer (``molecule_runtime.
+ builtin_tools.validation``).
+"""
+from __future__ import annotations
+
+import json
+
+import pytest
+
+
+# --- Drift gate: re-export aliases on a2a_tools ------------------------------
+
+class TestBackCompatAliases:
+ """Pin that ``a2a_tools.tool_*`` resolves to the same callable as
+ ``a2a_tools_identity.tool_*``. Refactor wrapping (e.g. a doc-string
+ wrapper that loses the function identity) silently breaks call
+ sites that ``patch("a2a_tools.tool_update_agent_card", ...)`` —
+ this gate makes that drift fail fast."""
+
+ def test_tool_get_runtime_identity_alias(self):
+ import a2a_tools
+ import a2a_tools_identity
+ assert a2a_tools.tool_get_runtime_identity is a2a_tools_identity.tool_get_runtime_identity
+
+ def test_tool_update_agent_card_alias(self):
+ import a2a_tools
+ import a2a_tools_identity
+ assert a2a_tools.tool_update_agent_card is a2a_tools_identity.tool_update_agent_card
+
+
+# --- tool_get_runtime_identity ----------------------------------------------
+
+class TestGetRuntimeIdentity:
+ """The tool returns env-derived runtime identity. No HTTP call."""
+
+ @pytest.mark.asyncio
+ async def test_returns_all_known_env_fields(self, monkeypatch):
+ from a2a_tools_identity import tool_get_runtime_identity
+
+ monkeypatch.setenv("MODEL", "claude-opus-4-7")
+ monkeypatch.setenv("MODEL_PROVIDER", "anthropic")
+ monkeypatch.setenv("TIER", "T4")
+ monkeypatch.setenv("WORKSPACE_ID", "ws-abc")
+ monkeypatch.setenv("ADAPTER_MODULE", "adapter")
+ monkeypatch.setenv("MOLECULE_MODEL", "claude-opus-4-7")
+ monkeypatch.setenv("ANTHROPIC_BASE_URL", "https://api.anthropic.com")
+
+ out = await tool_get_runtime_identity()
+ # MCP tools return JSON-encoded strings (matches the contract
+ # every other tool_* in a2a_tools_* uses).
+ assert isinstance(out, str)
+ parsed = json.loads(out)
+
+ assert parsed["model"] == "claude-opus-4-7"
+ assert parsed["model_provider"] == "anthropic"
+ assert parsed["tier"] == "T4"
+ assert parsed["workspace_id"] == "ws-abc"
+ assert parsed["runtime"] == "adapter"
+ assert parsed["molecule_model"] == "claude-opus-4-7"
+ assert parsed["anthropic_base_url"] == "https://api.anthropic.com"
+
+ @pytest.mark.asyncio
+ async def test_missing_env_returns_empty_strings(self, monkeypatch):
+ """Tool MUST NOT raise when env vars are absent — every key is
+ present but the value is the empty string. The agent then knows
+ the slot exists but is unset."""
+ from a2a_tools_identity import tool_get_runtime_identity
+
+ for var in (
+ "MODEL", "MODEL_PROVIDER", "TIER", "WORKSPACE_ID",
+ "ADAPTER_MODULE", "MOLECULE_MODEL", "ANTHROPIC_BASE_URL",
+ ):
+ monkeypatch.delenv(var, raising=False)
+
+ parsed = json.loads(await tool_get_runtime_identity())
+ assert parsed["model"] == ""
+ assert parsed["model_provider"] == ""
+ assert parsed["tier"] == ""
+ assert parsed["workspace_id"] == ""
+ assert parsed["runtime"] == ""
+ assert parsed["molecule_model"] == ""
+ assert parsed["anthropic_base_url"] == ""
+
+ @pytest.mark.asyncio
+ async def test_no_http_call_made(self, monkeypatch):
+ """``get_runtime_identity`` is env-only — must not open
+ httpx.AsyncClient even if the call would otherwise succeed.
+ Tripwire any client construction."""
+ import httpx
+
+ from a2a_tools_identity import tool_get_runtime_identity
+
+ class _Tripwire:
+ def __init__(self, *_a, **_kw):
+ raise AssertionError(
+ "tool_get_runtime_identity must not open httpx.AsyncClient"
+ )
+
+ monkeypatch.setattr(httpx, "AsyncClient", _Tripwire)
+ # Must not raise.
+ await tool_get_runtime_identity()
+
+ @pytest.mark.asyncio
+ async def test_helper_dict_matches_string_payload(self, monkeypatch):
+ """``_runtime_identity_payload`` is the dict-returning helper
+ used by both the public tool and tests. Verify the public tool
+ json.dumps the same dict — no field is dropped or renamed by
+ the encoding step."""
+ from a2a_tools_identity import (
+ _runtime_identity_payload,
+ tool_get_runtime_identity,
+ )
+
+ monkeypatch.setenv("MODEL", "claude-opus-4-7")
+ monkeypatch.setenv("TIER", "T4")
+ monkeypatch.setenv("WORKSPACE_ID", "ws-helper-check")
+
+ helper = _runtime_identity_payload()
+ tool_str = await tool_get_runtime_identity()
+ assert json.loads(tool_str) == helper
+
+
+# --- tool_update_agent_card -------------------------------------------------
+
+
+class _MockResponse:
+ def __init__(self, status_code: int, payload: dict):
+ self.status_code = status_code
+ self._payload = payload
+ self.text = json.dumps(payload)
+
+ def json(self):
+ return self._payload
+
+
+class _MockClient:
+ """Drop-in for httpx.AsyncClient context manager.
+
+ Records the URL + json body + headers the tool POSTed so the test
+ can assert against them. Returns the canned _MockResponse passed
+ in at construction time.
+ """
+
+ def __init__(self, *, response: _MockResponse, captured: dict):
+ self._response = response
+ self._captured = captured
+
+ async def __aenter__(self):
+ return self
+
+ async def __aexit__(self, *_args):
+ return False
+
+ async def post(self, url, *, json=None, headers=None, **_kw): # noqa: A002
+ self._captured["url"] = url
+ self._captured["json"] = json
+ self._captured["headers"] = headers
+ return self._response
+
+
+@pytest.fixture
+def _grant_memory_write(monkeypatch):
+ """Force the inline RBAC gate inside ``tool_update_agent_card`` to
+ succeed. The gate calls
+ ``a2a_tools_rbac.check_memory_write_permission`` which inspects
+ ``$MOLECULE_ROLES`` / the role table; the patch sidesteps that
+ machinery so tests can focus on the platform-call shape.
+ """
+ import a2a_tools_identity
+ monkeypatch.setattr(
+ a2a_tools_identity, "_check_memory_write_permission", lambda: True
+ )
+
+
+class TestUpdateAgentCard:
+ @pytest.mark.asyncio
+ async def test_posts_to_registry_update_card(
+ self, monkeypatch, _grant_memory_write,
+ ):
+ """Hits POST {PLATFORM_URL}/registry/update-card with the
+ workspace bearer and the {workspace_id, agent_card} body shape
+ the platform handler expects (workspace-server
+ ``internal/handlers/registry.go``)."""
+ import a2a_tools_identity
+
+ monkeypatch.setenv("WORKSPACE_ID", "ws-42")
+ # Ensure PLATFORM_URL re-import sees a deterministic value —
+ # a2a_client imports it at module load so we patch the symbol
+ # on a2a_tools_identity directly (the module's own reference).
+ monkeypatch.setattr(a2a_tools_identity, "PLATFORM_URL", "http://test.invalid")
+
+ captured: dict = {}
+ response = _MockResponse(200, {"status": "updated"})
+
+ def _client_factory(*_a, **_kw):
+ return _MockClient(response=response, captured=captured)
+
+ monkeypatch.setattr(a2a_tools_identity.httpx, "AsyncClient", _client_factory)
+ monkeypatch.setattr(
+ a2a_tools_identity, "_auth_headers_for_heartbeat",
+ lambda: {"Authorization": "Bearer ws-token-xyz"},
+ )
+
+ card = {"name": "agent-foo", "version": "0.1.0", "description": "demo"}
+ result_str = await a2a_tools_identity.tool_update_agent_card(card)
+ result = json.loads(result_str)
+
+ # URL: PLATFORM_URL + /registry/update-card
+ assert captured["url"] == "http://test.invalid/registry/update-card"
+
+ # The platform handler expects {workspace_id, agent_card}; the
+ # agent_card is the raw object the agent submitted.
+ body = captured["json"]
+ assert body["workspace_id"] == "ws-42"
+ assert body["agent_card"] == card
+
+ # Auth header from auth_headers_for_heartbeat is forwarded
+ # verbatim — same path commit_memory uses.
+ assert captured["headers"]["Authorization"] == "Bearer ws-token-xyz"
+
+ assert result["success"] is True
+ assert result["status"] == "updated"
+
+ @pytest.mark.asyncio
+ async def test_propagates_server_error(
+ self, monkeypatch, _grant_memory_write,
+ ):
+ """Non-200 from platform surfaces as a structured error to the
+ agent. The agent sees {success:false, status_code, error} and
+ can decide whether to retry, fall back, or escalate."""
+ import a2a_tools_identity
+
+ monkeypatch.setenv("WORKSPACE_ID", "ws-42")
+ monkeypatch.setattr(a2a_tools_identity, "PLATFORM_URL", "http://test.invalid")
+
+ captured: dict = {}
+ response = _MockResponse(400, {"error": "invalid card"})
+
+ monkeypatch.setattr(
+ a2a_tools_identity.httpx, "AsyncClient",
+ lambda *a, **kw: _MockClient(response=response, captured=captured),
+ )
+ monkeypatch.setattr(
+ a2a_tools_identity, "_auth_headers_for_heartbeat", lambda: {},
+ )
+
+ result = json.loads(
+ await a2a_tools_identity.tool_update_agent_card({"name": "x"})
+ )
+ assert result["success"] is False
+ assert result["status_code"] == 400
+ assert "invalid card" in str(result["error"]).lower()
+
+ @pytest.mark.asyncio
+ async def test_rejects_non_dict_card(self, _grant_memory_write):
+ """The MCP schema constrains transport callers to pass a dict;
+ in-process callers (tests, sibling modules) can still pass any
+ type. Reject non-dict defensively so the platform isn't asked
+ to validate JSON-encoded strings or lists."""
+ from a2a_tools_identity import tool_update_agent_card
+
+ result = json.loads(await tool_update_agent_card("not-a-dict"))
+ assert result["success"] is False
+ assert "dict" in str(result["error"]).lower()
+
+ @pytest.mark.asyncio
+ async def test_workspace_id_missing_returns_error(
+ self, monkeypatch, _grant_memory_write,
+ ):
+ """If WORKSPACE_ID is not set the tool refuses to issue the
+ request — it would otherwise POST with an empty workspace_id
+ and let the platform return a confusing 400."""
+ from a2a_tools_identity import tool_update_agent_card
+
+ monkeypatch.delenv("WORKSPACE_ID", raising=False)
+
+ result = json.loads(await tool_update_agent_card({"name": "x"}))
+ assert result["success"] is False
+ assert "workspace_id" in str(result["error"]).lower()
+
+ @pytest.mark.asyncio
+ async def test_denies_when_memory_write_permission_missing(self, monkeypatch):
+ """The agent's RBAC role must grant ``memory.write`` to update
+ the card. Read-only roles get an RBAC error string back
+ immediately, never touching the platform."""
+ import a2a_tools_identity
+
+ monkeypatch.setenv("WORKSPACE_ID", "ws-42")
+ monkeypatch.setattr(
+ a2a_tools_identity, "_check_memory_write_permission", lambda: False,
+ )
+
+ # Tripwire httpx — must not be called when RBAC denies.
+ import httpx
+
+ class _Tripwire:
+ def __init__(self, *_a, **_kw):
+ raise AssertionError("RBAC denial must short-circuit before httpx call")
+
+ monkeypatch.setattr(httpx, "AsyncClient", _Tripwire)
+
+ result = json.loads(
+ await a2a_tools_identity.tool_update_agent_card({"name": "x"}),
+ )
+ assert result["success"] is False
+ assert "memory.write" in str(result["error"]).lower()
+
+ @pytest.mark.asyncio
+ async def test_network_exception_returns_structured_error(
+ self, monkeypatch, _grant_memory_write,
+ ):
+ """A network exception (DNS failure, connect timeout, etc) is
+ wrapped into a structured error dict instead of bubbling up
+ to the MCP transport layer."""
+ import a2a_tools_identity
+
+ monkeypatch.setenv("WORKSPACE_ID", "ws-42")
+ monkeypatch.setattr(a2a_tools_identity, "PLATFORM_URL", "http://test.invalid")
+
+ class _ExplodingClient:
+ async def __aenter__(self):
+ return self
+
+ async def __aexit__(self, *_a):
+ return False
+
+ async def post(self, *_a, **_kw):
+ raise RuntimeError("simulated DNS failure")
+
+ monkeypatch.setattr(
+ a2a_tools_identity.httpx, "AsyncClient",
+ lambda *a, **kw: _ExplodingClient(),
+ )
+
+ result = json.loads(
+ await a2a_tools_identity.tool_update_agent_card({"name": "x"})
+ )
+ assert result["success"] is False
+ assert "network" in str(result["error"]).lower()
+
+
+# --- Registry contract ------------------------------------------------------
+
+
+class TestRegistryContract:
+ """Pin the new tools' registration in platform_tools.registry. The
+ structural tests in ``test_platform_tools.py`` already check
+ registry↔MCP alignment; these are tighter assertions specific to
+ the two new tools so a future contributor deleting one entry sees
+ a focused failure."""
+
+ def test_get_runtime_identity_in_registry(self):
+ from platform_tools.registry import by_name
+ spec = by_name("get_runtime_identity")
+ assert spec.section == "a2a"
+ # No input parameters — env-only call.
+ assert spec.input_schema == {"type": "object", "properties": {}}
+ # impl points at the actual tool function, not a shim.
+ from a2a_tools_identity import tool_get_runtime_identity
+ assert spec.impl is tool_get_runtime_identity
+
+ def test_update_agent_card_in_registry(self):
+ from platform_tools.registry import by_name
+ spec = by_name("update_agent_card")
+ assert spec.section == "a2a"
+ assert "card" in spec.input_schema["properties"]
+ assert spec.input_schema["required"] == ["card"]
+ from a2a_tools_identity import tool_update_agent_card
+ assert spec.impl is tool_update_agent_card