diff --git a/.github/workflows/e2e-api.yml b/.github/workflows/e2e-api.yml
index bc9e629b..782cbedc 100644
--- a/.github/workflows/e2e-api.yml
+++ b/.github/workflows/e2e-api.yml
@@ -172,6 +172,9 @@ jobs:
- name: Run poll-mode + since_id cursor E2E (#2339)
if: needs.detect-changes.outputs.api == 'true'
run: bash tests/e2e/test_poll_mode_e2e.sh
+ - name: Run poll-mode chat upload E2E (RFC #2891)
+ if: needs.detect-changes.outputs.api == 'true'
+ run: bash tests/e2e/test_poll_mode_chat_upload_e2e.sh
- name: Dump platform log on failure
if: failure() && needs.detect-changes.outputs.api == 'true'
run: cat workspace-server/platform.log || true
diff --git a/canvas/src/app/orgs/page.tsx b/canvas/src/app/orgs/page.tsx
index 3c5576ef..a137ac2e 100644
--- a/canvas/src/app/orgs/page.tsx
+++ b/canvas/src/app/orgs/page.tsx
@@ -18,7 +18,7 @@
// quick bounce between signup and either Checkout or the tenant UI.
import { useEffect, useState } from "react";
-import { fetchSession, redirectToLogin, type Session } from "@/lib/auth";
+import { fetchSession, redirectToLogin, signOut, type Session } from "@/lib/auth";
import { PLATFORM_URL } from "@/lib/api";
import { formatCredits, pillTone, bannerKind } from "@/lib/credits";
import { TermsGate } from "@/components/TermsGate";
@@ -129,7 +129,7 @@ export default function OrgsPage() {
return : null} />;
}
return (
-
+
{justCheckedOut && }
{orgs.map((o) => (
@@ -160,11 +160,21 @@ function CheckoutBanner() {
);
}
-function Shell({ children }: { children: React.ReactNode }) {
+function Shell({
+ children,
+ session,
+}: {
+ children: React.ReactNode;
+ // Optional: when present, the header renders the signed-in email +
+ // a Sign-out button. The empty-state Shell call doesn't have a
+ // session in scope, so accept null and skip the header chrome there.
+ session?: Session | null;
+}) {
return (
+ {session ? : null}
Your organizations
Each org is an isolated Molecule workspace.
@@ -177,6 +187,40 @@ function Shell({ children }: { children: React.ReactNode }) {
);
}
+// AccountBar renders the signed-in email + a Sign-out button at the
+// top of the page. Without this the user has no way to log out — the
+// /cp/auth/signout endpoint exists on the control plane but no UI ever
+// called it. Reported externally on 2026-05-05; this is the fix.
+//
+// Click → calls signOut() which POSTs /cp/auth/signout (clears the
+// WorkOS session cookie + revokes at the provider) then bounces to
+// /cp/auth/login. The signOut helper is best-effort — even on a 5xx
+// or network failure the redirect fires so the user never gets stuck
+// on an authed-looking page after they clicked Sign out.
+function AccountBar({ session }: { session: Session }) {
+ const [signingOut, setSigningOut] = useState(false);
+ return (
+
+ {session.email}
+
+
+ );
+}
+
// DataResidencyNotice surfaces where workspace data lives so EU-based
// signups can make an informed choice (GDPR Art. 13 disclosure
// requirement). Plain text, no icon — the goal is clarity, not
diff --git a/canvas/src/components/EmptyState.tsx b/canvas/src/components/EmptyState.tsx
index 2452ef1a..d54f1709 100644
--- a/canvas/src/components/EmptyState.tsx
+++ b/canvas/src/components/EmptyState.tsx
@@ -48,16 +48,21 @@ export function EmptyState() {
});
// "Create blank" bypasses templates entirely — no preflight, no
- // modal, just POST /workspaces with a default name and tier.
- // Deliberately NOT routed through useTemplateDeploy because it
- // has no `template.id` to deploy against.
+ // modal, just POST /workspaces with a default name. Deliberately
+ // NOT routed through useTemplateDeploy because it has no
+ // `template.id` to deploy against.
+ //
+ // tier is omitted so the backend picks a SaaS-aware default
+ // (T4 on SaaS, T3 on self-hosted — see WorkspaceHandler.DefaultTier).
+ // The previous hardcoded `tier: 2` shipped every fresh-tenant agent
+ // at Standard regardless of host, which surprised SaaS users whose
+ // CreateWorkspaceDialog already defaults to T4.
const createBlank = async () => {
setBlankCreating(true);
setBlankError(null);
try {
const ws = await api.post<{ id: string }>("/workspaces", {
name: "My First Agent",
- tier: 2,
canvas: firstDeployCoords(),
});
handleDeployed(ws.id);
diff --git a/canvas/src/components/tabs/ChatTab.tsx b/canvas/src/components/tabs/ChatTab.tsx
index 7da17b72..2d6ae908 100644
--- a/canvas/src/components/tabs/ChatTab.tsx
+++ b/canvas/src/components/tabs/ChatTab.tsx
@@ -286,6 +286,14 @@ function MyChatPanel({ workspaceId, data }: Props) {
const [error, setError] = useState(null);
const [confirmRestart, setConfirmRestart] = useState(false);
const bottomRef = useRef(null);
+ // First-mount scroll-to-bottom needs `behavior: "instant"` — long
+ // conversations smooth-animate for ~300ms which any concurrent
+ // re-render can interrupt, leaving the user stuck mid-conversation
+ // when the chat tab opens. Subsequent appends (new agent messages)
+ // keep `smooth` for the visual "landing" feel. Flipped the first
+ // time messages.length goes positive, so a workspace switch (which
+ // remounts ChatTab) gets a fresh instant jump too.
+ const hasInitialScrollRef = useRef(false);
// Lazy-load older history on scroll-up.
// - containerRef = the scrollable messages viewport
// - topRef = sentinel above the messages list; IO observes it
@@ -545,6 +553,15 @@ function MyChatPanel({ workspaceId, data }: Props) {
scrollAnchorRef.current = null;
return;
}
+ // Instant on first arrival of messages — smooth-scroll on a long
+ // conversation gets interrupted by concurrent renders and leaves
+ // the user stuck in the middle. After the first jump, subsequent
+ // appends animate as before.
+ if (!hasInitialScrollRef.current && messages.length > 0) {
+ hasInitialScrollRef.current = true;
+ bottomRef.current?.scrollIntoView({ behavior: "instant" as ScrollBehavior });
+ return;
+ }
bottomRef.current?.scrollIntoView({ behavior: "smooth" });
}, [messages]);
diff --git a/canvas/src/components/tabs/chat/AgentCommsPanel.tsx b/canvas/src/components/tabs/chat/AgentCommsPanel.tsx
index fc327ea0..074d96fc 100644
--- a/canvas/src/components/tabs/chat/AgentCommsPanel.tsx
+++ b/canvas/src/components/tabs/chat/AgentCommsPanel.tsx
@@ -1,6 +1,6 @@
"use client";
-import { useState, useEffect, useMemo, useRef } from "react";
+import { useState, useEffect, useLayoutEffect, useMemo, useRef, useCallback } from "react";
import ReactMarkdown from "react-markdown";
import remarkGfm from "remark-gfm";
import { api } from "@/lib/api";
@@ -184,13 +184,23 @@ function unwrapErrorText(raw: string | null): string {
export function AgentCommsPanel({ workspaceId }: { workspaceId: string }) {
const [messages, setMessages] = useState([]);
const [loading, setLoading] = useState(true);
+ const [loadError, setLoadError] = useState(null);
// Dedup by timestamp+type+peer to handle API load + WebSocket race
const seenKeys = useRef(new Set());
const bottomRef = useRef(null);
+ // Mirrors the my-chat scroll behaviour from ChatTab (PR #2903) —
+ // smooth-scroll on a long history gets interrupted by concurrent
+ // renders and lands the panel mid-conversation. Switch the first
+ // arrival to instant; subsequent appends animate.
+ const hasInitialScrollRef = useRef(false);
- // Load history
- useEffect(() => {
+ // Load history. Extracted so the error-state retry button can
+ // re-invoke without remount. ChatTab uses the same shape
+ // (loadInitial → loadError state → retry button).
+ const loadInitial = useCallback(() => {
setLoading(true);
+ setLoadError(null);
+ seenKeys.current.clear();
api.get(`/workspaces/${workspaceId}/activity?source=agent&limit=50`)
.then((entries) => {
const filtered = (entries ?? [])
@@ -234,10 +244,15 @@ export function AgentCommsPanel({ workspaceId }: { workspaceId: string }) {
// the .then body) — the panel just sat on the empty state
// with zero signal.
console.warn("AgentCommsPanel: load activity failed", err);
+ setLoadError(err instanceof Error ? err.message : String(err));
setLoading(false);
});
}, [workspaceId]);
+ useEffect(() => {
+ loadInitial();
+ }, [loadInitial]);
+
// Live updates routed through the global ReconnectingSocket. The
// previous pattern of `new WebSocket(WS_URL)` per panel had no
// onclose / no reconnect, so any drop (idle timeout, browser
@@ -358,7 +373,18 @@ export function AgentCommsPanel({ workspaceId }: { workspaceId: string }) {
} catch { /* ignore */ }
});
- useEffect(() => {
+ // useLayoutEffect (not useEffect) so the scroll runs BEFORE paint —
+ // otherwise the user sees the panel jump for one frame on every
+ // append. Mirrors ChatTab's MyChatPanel scroll block.
+ useLayoutEffect(() => {
+ if (!hasInitialScrollRef.current && messages.length > 0) {
+ // Instant on first arrival — smooth-scroll on a long history
+ // gets interrupted by concurrent renders and lands the panel
+ // mid-conversation (the chat-opens-in-middle bug class).
+ hasInitialScrollRef.current = true;
+ bottomRef.current?.scrollIntoView({ behavior: "instant" as ScrollBehavior });
+ return;
+ }
bottomRef.current?.scrollIntoView({ behavior: "smooth" });
}, [messages]);
@@ -366,6 +392,27 @@ export function AgentCommsPanel({ workspaceId }: { workspaceId: string }) {
return
Loading agent communications...
;
}
+ if (loadError !== null && messages.length === 0) {
+ // Mirrors ChatTab my-chat error UI — surfaces the load failure
+ // with a retry button instead of silently rendering empty state.
+ return (
+
+
+ Failed to load agent communications: {loadError}
+
+
+
+ );
+ }
+
if (messages.length === 0) {
return (
diff --git a/canvas/src/components/tabs/chat/__tests__/AgentCommsPanel.render.test.tsx b/canvas/src/components/tabs/chat/__tests__/AgentCommsPanel.render.test.tsx
new file mode 100644
index 00000000..80b37982
--- /dev/null
+++ b/canvas/src/components/tabs/chat/__tests__/AgentCommsPanel.render.test.tsx
@@ -0,0 +1,115 @@
+// @vitest-environment jsdom
+import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
+import { render, screen, fireEvent, waitFor } from "@testing-library/react";
+
+// API mock — tests can override per case via apiGetMock.mockImplementationOnce.
+const apiGetMock = vi.fn<(url: string) => Promise>();
+vi.mock("@/lib/api", () => ({
+ api: {
+ get: (url: string) => apiGetMock(url),
+ },
+}));
+
+// useSocketEvent — no-op for these render tests; live updates aren't
+// what we're verifying here.
+vi.mock("@/hooks/useSocketEvent", () => ({
+ useSocketEvent: () => {},
+}));
+
+// Canvas store — peer name resolution.
+vi.mock("@/store/canvas", () => ({
+ useCanvasStore: {
+ getState: () => ({
+ nodes: [
+ { id: "ws-self", data: { name: "Self" } },
+ { id: "ws-peer", data: { name: "Peer Agent" } },
+ ],
+ }),
+ },
+}));
+
+// Toaster shim — AgentCommsPanel imports showToast.
+vi.mock("../../Toaster", () => ({
+ showToast: vi.fn(),
+}));
+
+import { AgentCommsPanel } from "../AgentCommsPanel";
+
+// jsdom doesn't implement scrollIntoView. Tests that observe the call
+// install a spy here; tests that don't care still need a no-op stub
+// so the component doesn't throw.
+const scrollSpy = vi.fn<(opts?: ScrollIntoViewOptions | boolean) => void>();
+beforeEach(() => {
+ apiGetMock.mockReset();
+ scrollSpy.mockReset();
+ Element.prototype.scrollIntoView = scrollSpy as unknown as Element["scrollIntoView"];
+});
+
+afterEach(() => {
+ vi.clearAllMocks();
+});
+
+describe("AgentCommsPanel — initial-state parity with ChatTab my-chat", () => {
+ it("shows loading text while history fetch is in flight", () => {
+ apiGetMock.mockReturnValueOnce(new Promise(() => { /* never resolves */ }));
+ render();
+ expect(screen.getByText("Loading agent communications...")).toBeDefined();
+ });
+
+ it("renders error UI with a Retry button when the history fetch rejects", async () => {
+ apiGetMock.mockRejectedValueOnce(new Error("network down"));
+ render();
+
+ // Wait for the error state to render — loading→error transition is async.
+ const alert = await waitFor(() => screen.getByRole("alert"));
+ expect(alert.textContent).toMatch(/Failed to load agent communications/);
+ expect(alert.textContent).toMatch(/network down/);
+
+ // Retry button must be present and trigger a refetch.
+ const retry = screen.getByRole("button", { name: "Retry" });
+ apiGetMock.mockResolvedValueOnce([]); // success on retry
+ fireEvent.click(retry);
+
+ // Two calls total: initial load + retry. Pin via mock call count.
+ await waitFor(() => expect(apiGetMock.mock.calls.length).toBe(2));
+ });
+
+ it("falls back to empty-state copy when load succeeds with zero rows", async () => {
+ apiGetMock.mockResolvedValueOnce([]);
+ render();
+ await waitFor(() =>
+ expect(screen.getByText("No agent-to-agent communications yet.")).toBeDefined(),
+ );
+ });
+
+ it("scrollIntoView is called with behavior=instant on the first message arrival", async () => {
+ apiGetMock.mockResolvedValueOnce([
+ {
+ id: "act-1",
+ activity_type: "a2a_send",
+ source_id: "ws-self",
+ target_id: "ws-peer",
+ method: "message/send",
+ summary: "Delegating",
+ request_body: { message: { parts: [{ text: "hi" }] } },
+ response_body: null,
+ status: "ok",
+ created_at: "2026-04-25T18:00:00Z",
+ },
+ ]);
+ render();
+
+ // useLayoutEffect is what makes the first call instant — wait for
+ // the panel to render at least one message.
+ await waitFor(() => expect(scrollSpy.mock.calls.length).toBeGreaterThan(0));
+
+ // The pinned contract: SOME call uses behavior: "instant" — the
+ // first-arrival case. Subsequent appends use "smooth", but those
+ // can't fire here (no live update yet).
+ const sawInstant = scrollSpy.mock.calls.some((args) => {
+ const opts = args[0];
+ return typeof opts === "object" && opts !== null && "behavior" in opts && opts.behavior === "instant";
+ });
+ expect(sawInstant).toBe(true);
+ });
+});
diff --git a/canvas/src/lib/__tests__/auth.test.ts b/canvas/src/lib/__tests__/auth.test.ts
index ee74a521..5f9b76b3 100644
--- a/canvas/src/lib/__tests__/auth.test.ts
+++ b/canvas/src/lib/__tests__/auth.test.ts
@@ -2,7 +2,7 @@
* @vitest-environment jsdom
*/
import { describe, it, expect, vi, afterEach } from "vitest";
-import { fetchSession, redirectToLogin } from "../auth";
+import { fetchSession, redirectToLogin, signOut } from "../auth";
afterEach(() => {
vi.unstubAllGlobals();
@@ -110,3 +110,157 @@ describe("redirectToLogin", () => {
expect((window.location as unknown as { href: string }).href).toBe(signupHref);
});
});
+
+describe("signOut", () => {
+ // Helper — most tests need the same window.location stub.
+ function stubLocation(): void {
+ Object.defineProperty(window, "location", {
+ writable: true,
+ value: {
+ href: "https://acme.moleculesai.app/orgs",
+ pathname: "/orgs",
+ hostname: "acme.moleculesai.app",
+ protocol: "https:",
+ },
+ });
+ }
+
+ it("POSTs to /cp/auth/signout with credentials:include", async () => {
+ stubLocation();
+ const fetchMock = vi.fn().mockResolvedValue({
+ ok: true,
+ status: 200,
+ json: async () => ({ ok: true, logout_url: "" }),
+ });
+ vi.stubGlobal("fetch", fetchMock);
+
+ await signOut();
+
+ expect(fetchMock).toHaveBeenCalledTimes(1);
+ expect(fetchMock).toHaveBeenCalledWith(
+ expect.stringContaining("/cp/auth/signout"),
+ expect.objectContaining({ method: "POST", credentials: "include" }),
+ );
+ });
+
+ it("navigates to provider logout_url when the response includes one", async () => {
+ // The hosted-logout path is what actually breaks the SSO re-auth
+ // loop reported on PR #2913. Without this, AuthKit's browser
+ // cookie keeps the user signed in via SSO and any subsequent
+ // /cp/auth/login silently re-auths.
+ stubLocation();
+ const hostedLogout =
+ "https://api.workos.com/user_management/sessions/logout?session_id=cookie&return_to=https%3A%2F%2Fapp.moleculesai.app%2Forgs";
+ vi.stubGlobal(
+ "fetch",
+ vi.fn().mockResolvedValue({
+ ok: true,
+ status: 200,
+ json: async () => ({ ok: true, logout_url: hostedLogout }),
+ }),
+ );
+
+ await signOut();
+
+ const after = (window.location as unknown as { href: string }).href;
+ expect(after).toBe(hostedLogout);
+ });
+
+ it("falls back to /cp/auth/login when logout_url is empty (DisabledProvider / dev)", async () => {
+ // DisabledProvider returns "" — the local /cp/auth/login redirect
+ // works in dev/test where there's no SSO session to escape.
+ stubLocation();
+ vi.stubGlobal(
+ "fetch",
+ vi.fn().mockResolvedValue({
+ ok: true,
+ status: 200,
+ json: async () => ({ ok: true, logout_url: "" }),
+ }),
+ );
+
+ await signOut();
+
+ const after = (window.location as unknown as { href: string }).href;
+ // Tenant subdomain (acme.moleculesai.app) → auth origin is app.moleculesai.app.
+ expect(after).toBe("https://app.moleculesai.app/cp/auth/login");
+ });
+
+ it("redirects even when the POST fails so the user isn't stuck on an authed page", async () => {
+ // Critical UX invariant: clicking 'Sign out' MUST navigate away from
+ // the authenticated app, even if the network is down or the cookie
+ // is already invalid. Anything else looks like the button is
+ // broken — the precise complaint that triggered this fix.
+ stubLocation();
+ vi.stubGlobal("fetch", vi.fn().mockRejectedValue(new Error("network down")));
+
+ await signOut();
+
+ const after = (window.location as unknown as { href: string }).href;
+ expect(after).toBe("https://app.moleculesai.app/cp/auth/login");
+ });
+
+ it("redirects on 401 (session already invalid) just like 200", async () => {
+ // A user with an already-invalid cookie should still see the
+ // logout flow complete — no error, no stuck-on-app dead end.
+ // Note: 401 means res.ok=false → we don't read .json() at all,
+ // so a missing body is fine.
+ stubLocation();
+ vi.stubGlobal(
+ "fetch",
+ vi.fn().mockResolvedValue({
+ ok: false,
+ status: 401,
+ json: async () => ({}),
+ }),
+ );
+
+ await signOut();
+
+ const after = (window.location as unknown as { href: string }).href;
+ expect(after).toBe("https://app.moleculesai.app/cp/auth/login");
+ });
+
+ it("falls back to /cp/auth/login when the response body is malformed", async () => {
+ // Defensive parsing: a body that isn't valid JSON, or doesn't
+ // have logout_url, or has logout_url as the wrong type — none of
+ // these should strand the user on the authed page. Fallback path
+ // takes over.
+ stubLocation();
+ vi.stubGlobal(
+ "fetch",
+ vi.fn().mockResolvedValue({
+ ok: true,
+ status: 200,
+ json: async () => {
+ throw new Error("not json");
+ },
+ }),
+ );
+
+ await signOut();
+
+ const after = (window.location as unknown as { href: string }).href;
+ expect(after).toBe("https://app.moleculesai.app/cp/auth/login");
+ });
+
+ it("falls back to /cp/auth/login when logout_url is the wrong type", async () => {
+ // Even valid JSON should be type-checked: a non-string logout_url
+ // (e.g. server-side bug, version drift) must not crash or open-
+ // redirect the user.
+ stubLocation();
+ vi.stubGlobal(
+ "fetch",
+ vi.fn().mockResolvedValue({
+ ok: true,
+ status: 200,
+ json: async () => ({ ok: true, logout_url: 42 }),
+ }),
+ );
+
+ await signOut();
+
+ const after = (window.location as unknown as { href: string }).href;
+ expect(after).toBe("https://app.moleculesai.app/cp/auth/login");
+ });
+});
diff --git a/canvas/src/lib/auth.ts b/canvas/src/lib/auth.ts
index fe7c71ab..d091c2cb 100644
--- a/canvas/src/lib/auth.ts
+++ b/canvas/src/lib/auth.ts
@@ -67,3 +67,80 @@ export function redirectToLogin(screenHint: "sign-up" | "sign-in" = "sign-in"):
const dest = `${authOrigin}${AUTH_BASE}/${path}?return_to=${encodeURIComponent(returnTo)}`;
window.location.href = dest;
}
+
+/**
+ * signOut posts to /cp/auth/signout to clear the WorkOS session cookie
+ * + revoke at the provider, then navigates the browser to the
+ * provider-supplied hosted logout URL (so the provider's BROWSER-side
+ * SSO cookie is cleared too — without this, AuthKit silently re-auths
+ * via SSO on the next /cp/auth/login and the user is "still signed
+ * in" after pressing Sign out).
+ *
+ * Two-layer flow:
+ * 1. POST /cp/auth/signout → CP clears OUR session cookie + revokes
+ * session_id at the provider API. Response includes
+ * `logout_url` — the AuthKit hosted URL the BROWSER must navigate
+ * to so the provider's own browser cookie is cleared.
+ * 2. window.location.href = → AuthKit clears its
+ * session, then redirects the browser to the configured
+ * return_to (defaults to APP_URL/orgs).
+ *
+ * Best-effort by design: a 5xx, network failure, missing logout_url
+ * (DisabledProvider, dev), or stale cookie still results in the
+ * browser navigating away — leaving the user on a logged-in-looking
+ * page after they clicked "Sign out" is the worst possible UX. The
+ * fallback path navigates to /cp/auth/login on the auth origin, which
+ * works correctly in environments without a hosted logout flow (dev,
+ * tests, DisabledProvider).
+ *
+ * Throws nothing — callers can disable the button optimistically or
+ * await this and trust it returns. On a redirect-blocked test
+ * environment (jsdom under vitest) we still exit cleanly so unit tests
+ * can spy on the fetch call.
+ */
+export async function signOut(): Promise {
+ let logoutURL: string | undefined;
+ // Fire-and-tolerate the POST. credentials:include is mandatory cross-
+ // origin so the SaaS canvas (acme.moleculesai.app) can hit
+ // app.moleculesai.app/cp/auth/signout with the session cookie.
+ try {
+ const res = await fetch(`${getAuthOrigin()}${AUTH_BASE}/signout`, {
+ method: "POST",
+ credentials: "include",
+ });
+ if (res.ok) {
+ // Body shape: {"ok": true, "logout_url": "..."}. logout_url is
+ // empty for DisabledProvider (dev/local) — we fall back to
+ // /cp/auth/login below. Defensive parsing: a malformed body
+ // shouldn't strand the user on the authed page.
+ const body: unknown = await res.json().catch(() => null);
+ if (
+ body &&
+ typeof body === "object" &&
+ "logout_url" in body &&
+ typeof (body as { logout_url: unknown }).logout_url === "string" &&
+ (body as { logout_url: string }).logout_url
+ ) {
+ logoutURL = (body as { logout_url: string }).logout_url;
+ }
+ }
+ } catch {
+ // Ignore — we still redirect below.
+ }
+ if (typeof window === "undefined") return;
+ if (logoutURL) {
+ // Hosted logout: AuthKit clears its SSO cookie + redirects to
+ // return_to (configured server-side). This is the path that
+ // actually breaks the SSO re-auth loop.
+ window.location.href = logoutURL;
+ return;
+ }
+ // Fallback: no hosted logout (dev, DisabledProvider, network
+ // failure). Land on the login screen rather than the current URL:
+ // returning to a tenant URL after signout would just re-redirect
+ // through /cp/auth/login due to AuthGate. Send the user straight
+ // there with no return_to so they don't loop back into the org they
+ // just left.
+ const authOrigin = getAuthOrigin();
+ window.location.href = `${authOrigin}${AUTH_BASE}/login`;
+}
diff --git a/docs/agent-runtime/team-expansion.md b/docs/agent-runtime/team-expansion.md
deleted file mode 100644
index 5785dd13..00000000
--- a/docs/agent-runtime/team-expansion.md
+++ /dev/null
@@ -1,111 +0,0 @@
-# Team Expansion (Recursive Workspaces)
-
-When a workspace is expanded into a team, it gains sub-workspaces while its own agent remains as the **team lead** (coordinator). This is recursive — sub-workspaces can themselves be expanded into teams, infinitely deep.
-
-## How It Works
-
-When Developer PM is expanded into a team:
-
-```
-Business Core
- |
- +-- Developer PM (agent stays, becomes coordinator)
- |
- +-- Frontend Agent (sub-workspace, private scope)
- +-- Backend Agent (sub-workspace, private scope)
- +-- QA Agent (sub-workspace, private scope)
-```
-
-- Developer PM's agent **still exists** and acts as coordinator
-- Developer PM receives incoming A2A messages from Business Core
-- Developer PM's agent decides how to delegate to sub-workspaces
-- Sub-workspaces talk to Developer PM and to each other (same level)
-- Sub-workspaces **cannot** talk to Business Core or any workspace outside the team
-
-## Communication Rules
-
-| Direction | Allowed? | Example |
-|-----------|----------|---------|
-| Parent level -> team lead | Yes | Business Core -> Developer PM |
-| Team lead -> sub-workspaces | Yes | Developer PM -> Frontend Agent |
-| Sub-workspace -> team lead | Yes | Frontend Agent -> Developer PM |
-| Sub-workspace <-> sibling | Yes | Frontend Agent <-> Backend Agent |
-| Outside -> sub-workspace directly | No (403) | Business Core -> Frontend Agent |
-| Sub-workspace -> outside directly | No | Frontend Agent -> Business Core |
-
-The team lead (Developer PM) is the **only** bridge between the team's internal world and the outside.
-
-## Scoped Registry
-
-Sub-workspaces register in the platform registry but with a **private scope**. The registry knows about them but enforces access control.
-
-```
-Registry:
- Business Core :8001 scope: public
- Developer PM :8002 scope: public
- Frontend Agent :8010 scope: private, parent=Developer PM
- Backend Agent :8011 scope: private, parent=Developer PM
- QA Agent :8012 scope: private, parent=Developer PM
-```
-
-- The platform can always discover any workspace (for provisioning, monitoring)
-- The parent workspace can discover its sub-workspaces
-- Sub-workspaces can discover their siblings (same parent)
-- Outside workspaces get a **403 Forbidden** if they try to discover a private sub-workspace
-
-## How to Expand
-
-Expansion is triggered via `POST /workspaces/:id/expand`. The platform reads the `sub_workspaces` list from the workspace's config and provisions each one. On the canvas, users right-click a workspace node and select "Expand into team."
-
-Collapsing is the inverse: `POST /workspaces/:id/collapse`. Sub-workspaces are stopped and removed.
-
-## What Happens on Expansion
-
-When Developer PM is expanded into a team, the hierarchy changes but the outside view doesn't. Business Core's parent/child relationship to Developer PM is unaffected — Developer PM still responds to the same A2A endpoint.
-
-The events fired:
-- `WORKSPACE_EXPANDED` with the new `sub_workspace_ids` in the payload
-- `WORKSPACE_PROVISIONING` for each new sub-workspace
-- `WORKSPACE_ONLINE` for each sub-workspace as they come up
-
-Communication rules are automatically derived from the new hierarchy — no manual wiring needed.
-
-## Canvas Behavior
-
-- Children render as embedded mini-cards (`TeamMemberChip`) inside the parent node, not as separate canvas nodes
-- Each mini-card shows full status: gradient bar, name, tier badge, skills pills, active tasks, descendant count
-- **Recursive rendering** up to 3 levels deep (`MAX_NESTING_DEPTH = 3`) — sub-cards can contain their own "Team" sections
-- Parent node dynamically resizes: 210-280px (no children), 320-450px (children), 400-560px (grandchildren)
-- Eject button (sky-blue arrow icon) on hover extracts a child from the team
-- "Extract from Team" also available in the right-click context menu
-- Double-click a team node to zoom/fit to the parent area
-- The parent workspace node shows a badge with total descendant count
-
-## Collapsing a Team
-
-The inverse of expansion, triggered via `POST /workspaces/:id/collapse`:
-
-1. Each sub-workspace agent wraps up current work and writes a handoff document to memory
-2. Sub-workspaces are stopped and removed
-3. The team lead's agent goes back to handling everything directly
-4. A `WORKSPACE_COLLAPSED` event fires
-
-Sub-workspace memory is cleaned up based on backend (see [Memory — Cleanup](../architecture/memory.md#cleanup-on-workspace-deletion)).
-
-## Deleting a Team Workspace
-
-When a team workspace is deleted:
-1. Platform shows a warning listing all sub-workspaces that will be deleted
-2. User can **drag sub-workspaces out** of the team before confirming (promotes them to the parent level)
-3. On confirmation, cascade delete removes the parent and all remaining sub-workspaces
-4. `WORKSPACE_REMOVED` events fire for each deleted workspace
-
-## Related Docs
-
-- [Communication Rules](../api-protocol/communication-rules.md) — Full access control model
-- [Core Concepts](../product/core-concepts.md) — Workspace fundamentals
-- [System Prompt Structure](./system-prompt-structure.md) — How peer capabilities are injected
-- [Provisioner](../architecture/provisioner.md) — How sub-workspaces are deployed
-- [Registry & Heartbeat](../api-protocol/registry-and-heartbeat.md) — How registration works
-- [Event Log](../architecture/event-log.md) — Events fired during expansion
-- [Canvas UI](../frontend/canvas.md) — Visual behavior of teams
diff --git a/docs/api-reference.md b/docs/api-reference.md
index e1a75668..12e94a3c 100644
--- a/docs/api-reference.md
+++ b/docs/api-reference.md
@@ -41,8 +41,6 @@ Full contract: `docs/runbooks/admin-auth.md`.
| GET | /admin/workspaces/:id/test-token | admin_test_token.go — mint a fresh bearer token for E2E scripts; returns 404 unless `MOLECULE_ENV != production` or `MOLECULE_ENABLE_TEST_TOKENS=1` |
| GET/POST/DELETE | /admin/secrets[/:key] | secrets.go — legacy aliases for /settings/secrets |
| WS | /workspaces/:id/terminal | terminal.go |
-| POST | /workspaces/:id/expand | team.go |
-| POST | /workspaces/:id/collapse | team.go |
| POST/GET | /workspaces/:id/approvals | approvals.go |
| POST | /workspaces/:id/approvals/:id/decide | approvals.go |
| GET | /approvals/pending | approvals.go |
diff --git a/docs/architecture/molecule-technical-doc.md b/docs/architecture/molecule-technical-doc.md
index 0d9c653c..cd3dc957 100644
--- a/docs/architecture/molecule-technical-doc.md
+++ b/docs/architecture/molecule-technical-doc.md
@@ -336,8 +336,6 @@ This same logic governs: A2A delegation, memory scope enforcement, activity visi
| Method | Endpoint | Purpose |
|--------|----------|---------|
-| `POST` | `/workspaces/:id/expand` | Expand workspace into team (become coordinator) |
-| `POST` | `/workspaces/:id/collapse` | Collapse team back to single workspace |
### Files, Terminal, Templates, Bundles (8 endpoints)
diff --git a/docs/frontend/canvas.md b/docs/frontend/canvas.md
index 8d59c80f..fc103bd6 100644
--- a/docs/frontend/canvas.md
+++ b/docs/frontend/canvas.md
@@ -186,4 +186,3 @@ So the UI now exposes more operational failure state directly instead of silentl
- [Quickstart](../quickstart.md)
- [Platform API](../api-protocol/platform-api.md)
- [Workspace Runtime](../agent-runtime/workspace-runtime.md)
-- [Team Expansion](../agent-runtime/team-expansion.md)
diff --git a/docs/glossary.md b/docs/glossary.md
index f0343a38..b3535ae8 100644
--- a/docs/glossary.md
+++ b/docs/glossary.md
@@ -18,7 +18,7 @@ lands in the watch list with a colliding term, add a row here.
| **plugin** | A directory under `plugins/` packaging one or more skills or an MCP server wrapper, installable per-workspace via `POST /workspaces/:id/plugins`. Governed by `plugin.yaml`. | **Langflow**: a visual UI node / component in a flowchart. **CrewAI**: a Python-importable callable registered as a capability. |
| **agent** | A persistent containerized workspace running continuously — an identity with memory, a role, and a schedule. Not a one-shot invocation. | Most frameworks (AutoGPT, LangChain agents, OpenAI Assistants): a stateless function-call loop. No persistence between invocations unless explicitly checkpointed. |
| **flow** | A task execution within a workspace — a request enters, the agent runs tools, emits a response, logs activity. No explicit graph abstraction. | **Langflow**: a directed graph of nodes you author visually. **LangGraph**: a stateful graph of callable nodes. Our "flow" is an imperative timeline, not a graph. |
-| **team** | A named cluster of workspaces under a PM (org template `expand_team`). Used for role grouping in Canvas. | **CrewAI**: a "crew" is a sequence of agents that pass a task through a declared order. Our "team" is an org-chart abstraction, not an execution order. |
+| **team** | A named cluster of workspaces under a PM . Used for role grouping in Canvas. | **CrewAI**: a "crew" is a sequence of agents that pass a task through a declared order. Our "team" is an org-chart abstraction, not an execution order. |
| **skill** | A directory with `SKILL.md` that an agent invokes via the `Skill` tool. Skills are documentation + optional scripts that teach an agent a recipe. | **Anthropic Skills API**: nearly identical. **CrewAI tool**: closer to our plugin's MCP tool, not our skill. |
| **channel** | An outbound/inbound social integration (Telegram, Slack, …) per-workspace, wired in `workspace_channels`. | Slack's "channel": the container for messages. We use "channel" for the adapter + credentials, not the conversation itself. |
| **runtime** | The execution engine image tag for a workspace: one of `langgraph`, `claude-code`, `openclaw`, `crewai`, `autogen`, `deepagents`, `hermes`. | **LangGraph runtime**: the Python process running the graph. We use "runtime" for the Docker image + adapter pairing, not the inner process. |
diff --git a/docs/guides/mcp-server-setup.md b/docs/guides/mcp-server-setup.md
index aacc554a..5539ba97 100644
--- a/docs/guides/mcp-server-setup.md
+++ b/docs/guides/mcp-server-setup.md
@@ -166,8 +166,6 @@ list_workspaces
| MCP Tool | API Route | Method | Description |
|----------|-----------|--------|-------------|
-| `expand_team` | `/workspaces/:id/expand` | POST | Expand team node |
-| `collapse_team` | `/workspaces/:id/collapse` | POST | Collapse team node |
### Templates & Bundles
diff --git a/scripts/build_runtime_package.py b/scripts/build_runtime_package.py
index 84636c2b..e4ced302 100755
--- a/scripts/build_runtime_package.py
+++ b/scripts/build_runtime_package.py
@@ -73,11 +73,15 @@ TOP_LEVEL_MODULES = {
"executor_helpers",
"heartbeat",
"inbox",
+ "inbox_uploads",
"initial_prompt",
"internal_chat_uploads",
"internal_file_read",
"main",
"mcp_cli",
+ "mcp_heartbeat",
+ "mcp_inbox_pollers",
+ "mcp_workspace_resolver",
"molecule_ai_status",
"not_configured_handler",
"platform_auth",
diff --git a/tests/e2e/test_poll_mode_chat_upload_e2e.sh b/tests/e2e/test_poll_mode_chat_upload_e2e.sh
new file mode 100755
index 00000000..fbed604f
--- /dev/null
+++ b/tests/e2e/test_poll_mode_chat_upload_e2e.sh
@@ -0,0 +1,295 @@
+#!/usr/bin/env bash
+# E2E for poll-mode chat upload (RFC #2891 phases 1-5b).
+#
+# Round-trip: register a workspace as poll-mode (no callback URL) → POST a
+# multi-file chat upload → verify each file becomes (a) one
+# `chat_upload_receive` activity row and (b) one /pending-uploads row → fetch
+# the bytes back via the poll endpoint → ack → verify the row 404s on
+# subsequent fetch. Also pins cross-workspace bleed protection: workspace B
+# cannot read workspace A's pending uploads even with its own valid bearer.
+#
+# Why this exists separately from test_chat_upload_e2e.sh: that script
+# covers the PUSH path (the workspace's own /internal/chat/uploads/ingest).
+# This script covers the POLL path: the same canvas-side request lands on
+# the platform's pendinguploads.Storage instead, and the workspace fetches
+# it later. The two paths share zero handler code on the platform side, so
+# both need their own E2E.
+#
+# Requires: platform running on localhost:8080 with migrations applied.
+# bash workspace-server/scripts/dev-start.sh
+# bash workspace-server/scripts/run-migrations.sh
+#
+# Idempotent: each run uses fresh per-script workspace UUIDs so reruns
+# don't collide. Best-effort cleanup on EXIT — does NOT call
+# e2e_cleanup_all_workspaces (see
+# `feedback_never_run_cluster_cleanup_tests_on_live_platform.md`).
+
+set -euo pipefail
+
+source "$(dirname "$0")/_lib.sh"
+
+PASS=0
+FAIL=0
+TIMEOUT="${A2A_TIMEOUT:-30}"
+
+gen_uuid() {
+ if command -v uuidgen >/dev/null 2>&1; then
+ uuidgen | tr '[:upper:]' '[:lower:]'
+ else
+ python3 -c 'import uuid; print(uuid.uuid4())'
+ fi
+}
+WS_A="$(gen_uuid)"
+WS_B="$(gen_uuid)"
+
+# Per-run scratch dir collected under one trap so every assertion-failure
+# path drops the temp files it made (see test_chat_attachments_e2e.sh).
+TMPDIR_E2E=$(mktemp -d -t poll-chat-upload-e2e-XXXXXX)
+
+cleanup() {
+ local rc=$?
+ curl -s -X DELETE "$BASE/workspaces/$WS_A?confirm=true" >/dev/null 2>&1 || true
+ curl -s -X DELETE "$BASE/workspaces/$WS_B?confirm=true" >/dev/null 2>&1 || true
+ rm -rf "$TMPDIR_E2E"
+ exit $rc
+}
+trap cleanup EXIT INT TERM
+
+check() {
+ local desc="$1" expected="$2" actual="$3"
+ if echo "$actual" | grep -qF -- "$expected"; then
+ echo "PASS: $desc"
+ PASS=$((PASS + 1))
+ else
+ echo "FAIL: $desc"
+ echo " expected to contain: $expected"
+ echo " got: $(echo "$actual" | head -10)"
+ FAIL=$((FAIL + 1))
+ fi
+}
+
+check_eq() {
+ local desc="$1" expected="$2" actual="$3"
+ if [ "$actual" = "$expected" ]; then
+ echo "PASS: $desc"
+ PASS=$((PASS + 1))
+ else
+ echo "FAIL: $desc"
+ echo " expected: $expected"
+ echo " got: $actual"
+ FAIL=$((FAIL + 1))
+ fi
+}
+
+echo "=== Poll-Mode Chat Upload E2E ==="
+echo " base: $BASE"
+echo " workspace A: $WS_A"
+echo " workspace B: $WS_B"
+echo ""
+
+# ---------- Phase 1: register poll-mode workspace ----------
+echo "--- Phase 1: Register poll-mode workspace A ---"
+
+REG_A=$(curl -s -X POST "$BASE/registry/register" \
+ -H "Content-Type: application/json" \
+ -d "{
+ \"id\": \"$WS_A\",
+ \"delivery_mode\": \"poll\",
+ \"agent_card\": {\"name\": \"poll-chat-upload-test-a\"}
+ }")
+check "register accepts poll mode without URL" '"status":"registered"' "$REG_A"
+TOK_A=$(echo "$REG_A" | e2e_extract_token || true)
+[ -n "$TOK_A" ] || { echo "FAIL: no auth_token in register response (ws A)"; FAIL=$((FAIL + 1)); exit 1; }
+
+# ---------- Phase 2: multi-file chat upload ----------
+echo ""
+echo "--- Phase 2: POST /chat/uploads with two files ---"
+
+FILE1="$TMPDIR_E2E/alpha.txt"
+FILE2="$TMPDIR_E2E/beta.txt"
+EXPECTED1="alpha-secret-$(openssl rand -hex 4)"
+EXPECTED2="beta-secret-$(openssl rand -hex 4)"
+printf '%s' "$EXPECTED1" > "$FILE1"
+printf '%s' "$EXPECTED2" > "$FILE2"
+
+UPLOAD=$(curl -s -X POST "$BASE/workspaces/$WS_A/chat/uploads" \
+ -H "Authorization: Bearer $TOK_A" \
+ -F "files=@$FILE1;filename=alpha.txt;type=text/plain" \
+ -F "files=@$FILE2;filename=beta.txt;type=text/plain" \
+ -w "\nHTTP_CODE=%{http_code}\n")
+UPLOAD_CODE=$(echo "$UPLOAD" | grep -oE 'HTTP_CODE=[0-9]+' | cut -d= -f2)
+UPLOAD_BODY=$(echo "$UPLOAD" | sed '/^HTTP_CODE=/,$d')
+
+check_eq "upload returns 200" "200" "$UPLOAD_CODE"
+check "upload response has files array" '"files":' "$UPLOAD_BODY"
+
+# Pull file_ids out of the URI in the response. URI shape is
+# `platform-pending:/` — proves the response came from the
+# poll-mode branch, not the push-mode internal-ingest branch.
+URI1=$(echo "$UPLOAD_BODY" | python3 -c 'import sys,json; d=json.load(sys.stdin); print(d["files"][0]["uri"])')
+URI2=$(echo "$UPLOAD_BODY" | python3 -c 'import sys,json; d=json.load(sys.stdin); print(d["files"][1]["uri"])')
+check "URI 1 has platform-pending: scheme" "platform-pending:$WS_A/" "$URI1"
+check "URI 2 has platform-pending: scheme" "platform-pending:$WS_A/" "$URI2"
+
+FID1="${URI1##*/}"
+FID2="${URI2##*/}"
+[ -n "$FID1" ] && [ -n "$FID2" ] || { echo "FAIL: could not extract file IDs"; FAIL=$((FAIL + 1)); exit 1; }
+echo " file_id 1: $FID1"
+echo " file_id 2: $FID2"
+
+# ---------- Phase 3: activity rows visible to the workspace ----------
+echo ""
+echo "--- Phase 3: /activity shows two chat_upload_receive rows ---"
+
+# activity_logs INSERTs run in a goroutine — give them a moment.
+sleep 1
+ACT=$(curl -s --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
+ "$BASE/workspaces/$WS_A/activity?type=a2a_receive&limit=20")
+check "activity feed has the alpha file" "$FID1" "$ACT"
+check "activity feed has the beta file" "$FID2" "$ACT"
+check "activity rows tagged chat_upload_receive" '"method":"chat_upload_receive"' "$ACT"
+check "activity rows record alpha mimetype" '"mimeType":"text/plain"' "$ACT"
+
+CHAT_UPLOAD_COUNT=$(echo "$ACT" | python3 -c '
+import json, sys
+rows = json.load(sys.stdin)
+n = sum(1 for r in rows if (r.get("method") or "") == "chat_upload_receive")
+print(n)
+')
+check_eq "exactly two chat_upload_receive rows" "2" "$CHAT_UPLOAD_COUNT"
+
+# ---------- Phase 4: GET /pending-uploads/:file_id/content ----------
+echo ""
+echo "--- Phase 4: Fetch content for each pending upload ---"
+
+GOT1=$(curl -s --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
+ "$BASE/workspaces/$WS_A/pending-uploads/$FID1/content")
+check_eq "alpha bytes round-trip" "$EXPECTED1" "$GOT1"
+
+GOT2=$(curl -s --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
+ "$BASE/workspaces/$WS_A/pending-uploads/$FID2/content")
+check_eq "beta bytes round-trip" "$EXPECTED2" "$GOT2"
+
+# Mimetype + Content-Disposition headers should match what was uploaded.
+HEAD1=$(curl -s -D - -o /dev/null --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
+ "$BASE/workspaces/$WS_A/pending-uploads/$FID1/content")
+check "alpha response carries text/plain Content-Type" "Content-Type: text/plain" "$HEAD1"
+check "alpha response carries Content-Disposition with filename" 'filename="alpha.txt"' "$HEAD1"
+
+# ---------- Phase 5: idempotent re-fetch (until ack) ----------
+echo ""
+echo "--- Phase 5: Re-fetch before ack returns the same bytes ---"
+
+RE_GOT1=$(curl -s --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
+ "$BASE/workspaces/$WS_A/pending-uploads/$FID1/content")
+check_eq "re-fetch returns same alpha bytes" "$EXPECTED1" "$RE_GOT1"
+
+# ---------- Phase 6: ack each row ----------
+echo ""
+echo "--- Phase 6: Ack each pending upload ---"
+
+ACK1=$(curl -s -X POST --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
+ "$BASE/workspaces/$WS_A/pending-uploads/$FID1/ack")
+check "alpha ack returns acked:true" '"acked":true' "$ACK1"
+
+ACK2=$(curl -s -X POST --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
+ "$BASE/workspaces/$WS_A/pending-uploads/$FID2/ack")
+check "beta ack returns acked:true" '"acked":true' "$ACK2"
+
+# Re-ack should still 200 (idempotent — the row's gone but the workspace's
+# at-least-once intent was already honored, and the second ack hits the
+# raced path which also returns 200).
+RE_ACK1=$(curl -s -w '\n%{http_code}' -X POST --max-time "$TIMEOUT" \
+ -H "Authorization: Bearer $TOK_A" \
+ "$BASE/workspaces/$WS_A/pending-uploads/$FID1/ack")
+RE_ACK1_CODE=$(printf '%s' "$RE_ACK1" | tail -n1)
+# Acked rows return 404 on Get-before-Ack (the row's still in the table
+# but Get filters acked_at IS NULL); workspace would not normally re-ack
+# since it already saw the success. Accept both 200 and 404 here so the
+# test pins the contract without being brittle on the inner ordering.
+case "$RE_ACK1_CODE" in
+ 200|404)
+ echo "PASS: re-ack returns 200 or 404 ($RE_ACK1_CODE)"
+ PASS=$((PASS + 1))
+ ;;
+ *)
+ echo "FAIL: re-ack returned unexpected $RE_ACK1_CODE"
+ FAIL=$((FAIL + 1))
+ ;;
+esac
+
+# ---------- Phase 7: GET content after ack returns 404 ----------
+echo ""
+echo "--- Phase 7: Acked file 404s on subsequent fetch ---"
+
+POST_ACK=$(curl -s -w '\n%{http_code}' --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
+ "$BASE/workspaces/$WS_A/pending-uploads/$FID1/content")
+POST_ACK_CODE=$(printf '%s' "$POST_ACK" | tail -n1)
+check_eq "acked alpha returns HTTP 404" "404" "$POST_ACK_CODE"
+
+# ---------- Phase 8: cross-workspace bleed protection ----------
+echo ""
+echo "--- Phase 8: Workspace B cannot read workspace A's pending uploads ---"
+
+# Stage a fresh upload on workspace A so we have an UN-acked row to probe.
+PROBE_FILE="$TMPDIR_E2E/probe.txt"
+printf '%s' "probe-bytes-$(openssl rand -hex 4)" > "$PROBE_FILE"
+PROBE_UP=$(curl -s -X POST "$BASE/workspaces/$WS_A/chat/uploads" \
+ -H "Authorization: Bearer $TOK_A" \
+ -F "files=@$PROBE_FILE;filename=probe.txt;type=text/plain")
+PROBE_FID=$(echo "$PROBE_UP" | python3 -c 'import sys,json; d=json.load(sys.stdin); print(d["files"][0]["uri"].split("/")[-1])')
+[ -n "$PROBE_FID" ] || { echo "FAIL: probe upload returned no file_id"; FAIL=$((FAIL + 1)); exit 1; }
+
+# Register a SECOND poll-mode workspace and capture its bearer.
+REG_B=$(curl -s -X POST "$BASE/registry/register" \
+ -H "Content-Type: application/json" \
+ -d "{
+ \"id\": \"$WS_B\",
+ \"delivery_mode\": \"poll\",
+ \"agent_card\": {\"name\": \"poll-chat-upload-test-b\"}
+ }")
+check "second workspace registers" '"status":"registered"' "$REG_B"
+TOK_B=$(echo "$REG_B" | e2e_extract_token || true)
+[ -n "$TOK_B" ] || { echo "FAIL: no auth_token (ws B)"; FAIL=$((FAIL + 1)); exit 1; }
+
+# B's bearer hitting B's URL with A's file_id → 404 (handler checks the row's
+# workspace_id matches the URL :id, not the bearer's workspace).
+CROSS_RESP=$(curl -s -w '\n%{http_code}' --max-time "$TIMEOUT" \
+ -H "Authorization: Bearer $TOK_B" \
+ "$BASE/workspaces/$WS_B/pending-uploads/$PROBE_FID/content")
+CROSS_CODE=$(printf '%s' "$CROSS_RESP" | tail -n1)
+check_eq "B's URL with A's file_id returns 404" "404" "$CROSS_CODE"
+
+# B's bearer hitting A's URL → 401 (wsAuth pins bearer to :id). This is the
+# strictest cross-workspace check: a presented-but-wrong bearer is rejected
+# in EVERY platform posture (dev-mode fail-open only triggers when no bearer
+# is presented at all — invalid tokens always 401).
+WRONG_BEARER=$(curl -s -w '\n%{http_code}' --max-time "$TIMEOUT" \
+ -H "Authorization: Bearer $TOK_B" \
+ "$BASE/workspaces/$WS_A/pending-uploads/$PROBE_FID/content")
+WRONG_CODE=$(printf '%s' "$WRONG_BEARER" | tail -n1)
+check_eq "B's bearer on A's URL returns 401" "401" "$WRONG_CODE"
+
+# NB: a fully bearerless request to /pending-uploads/:fid/content returns
+# 401 ONLY when the platform has MOLECULE_ENV != development (production /
+# staging). On local-dev with MOLECULE_ENV=development the wsauth middleware
+# fail-opens for bearerless requests so the canvas at :3000 can talk to the
+# platform at :8080 without per-call token plumbing — see middleware/
+# devmode.go. The strict bearerless-401 contract is covered by the wsauth
+# unit + middleware tests; we don't reassert it here because the result
+# depends on platform posture, not the poll-mode upload contract.
+
+# ---------- Phase 9: invalid file_id rejected at the URL parser ----------
+echo ""
+echo "--- Phase 9: Invalid file_id returns 400 ---"
+
+BAD_FID=$(curl -s -w '\n%{http_code}' --max-time "$TIMEOUT" \
+ -H "Authorization: Bearer $TOK_A" \
+ "$BASE/workspaces/$WS_A/pending-uploads/not-a-uuid/content")
+BAD_FID_CODE=$(printf '%s' "$BAD_FID" | tail -n1)
+check_eq "invalid file_id UUID returns 400" "400" "$BAD_FID_CODE"
+
+# ---------- Results ----------
+echo ""
+echo "=== Results: $PASS passed, $FAIL failed ==="
+[ "$FAIL" -eq 0 ]
diff --git a/tests/harness/compose.yml b/tests/harness/compose.yml
index debbb675..e209287d 100644
--- a/tests/harness/compose.yml
+++ b/tests/harness/compose.yml
@@ -94,6 +94,13 @@ services:
CP_UPSTREAM_URL: "http://cp-stub:9090"
RATE_LIMIT: "1000"
CANVAS_PROXY_URL: "http://localhost:3000"
+ # Memory v2 sidecar (PR #2906) bundles the plugin into the
+ # tenant image and starts it before the main server. The plugin
+ # runs `CREATE EXTENSION vector` on first boot, which fails on
+ # the harness's plain postgres:15-alpine (no pgvector). The
+ # harness doesn't exercise memory features, so disable the
+ # sidecar via the entrypoint's documented escape hatch.
+ MEMORY_PLUGIN_DISABLE: "1"
networks: [harness-net]
healthcheck:
test: ["CMD-SHELL", "wget -q -O- http://localhost:8080/health || exit 1"]
@@ -142,6 +149,13 @@ services:
CP_UPSTREAM_URL: "http://cp-stub:9090"
RATE_LIMIT: "1000"
CANVAS_PROXY_URL: "http://localhost:3000"
+ # Memory v2 sidecar (PR #2906) bundles the plugin into the
+ # tenant image and starts it before the main server. The plugin
+ # runs `CREATE EXTENSION vector` on first boot, which fails on
+ # the harness's plain postgres:15-alpine (no pgvector). The
+ # harness doesn't exercise memory features, so disable the
+ # sidecar via the entrypoint's documented escape hatch.
+ MEMORY_PLUGIN_DISABLE: "1"
networks: [harness-net]
healthcheck:
test: ["CMD-SHELL", "wget -q -O- http://localhost:8080/health || exit 1"]
diff --git a/workspace-server/Dockerfile b/workspace-server/Dockerfile
index 7065e405..d6754312 100644
--- a/workspace-server/Dockerfile
+++ b/workspace-server/Dockerfile
@@ -21,6 +21,14 @@ ARG GIT_SHA=dev
RUN CGO_ENABLED=0 GOOS=linux go build \
-ldflags "-X github.com/Molecule-AI/molecule-monorepo/platform/internal/buildinfo.GitSHA=${GIT_SHA}" \
-o /platform ./cmd/server
+# Bundle the built-in memory-plugin-postgres binary so an operator can
+# activate Memory v2 by setting MEMORY_V2_CUTOVER=true + (default)
+# MEMORY_PLUGIN_URL=http://localhost:9100. The entrypoint starts this
+# binary in the background; main /platform talks to it over loopback.
+# Stays inert until the operator flips the cutover env var.
+RUN CGO_ENABLED=0 GOOS=linux go build \
+ -ldflags "-X github.com/Molecule-AI/molecule-monorepo/platform/internal/buildinfo.GitSHA=${GIT_SHA}" \
+ -o /memory-plugin ./cmd/memory-plugin-postgres
# Clone templates + plugins at build time from manifest.json
FROM alpine:3.20 AS templates
@@ -30,8 +38,9 @@ COPY scripts/clone-manifest.sh /scripts/clone-manifest.sh
RUN chmod +x /scripts/clone-manifest.sh && /scripts/clone-manifest.sh /manifest.json /workspace-configs-templates /org-templates /plugins
FROM alpine:3.20
-RUN apk add --no-cache ca-certificates git tzdata
+RUN apk add --no-cache ca-certificates git tzdata wget
COPY --from=builder /platform /platform
+COPY --from=builder /memory-plugin /memory-plugin
COPY workspace-server/migrations /migrations
COPY --from=templates /workspace-configs-templates /workspace-configs-templates
COPY --from=templates /org-templates /org-templates
@@ -41,6 +50,7 @@ RUN addgroup -g 1000 platform && adduser -u 1000 -G platform -s /bin/sh -D platf
EXPOSE 8080
COPY <<'ENTRY' /entrypoint.sh
#!/bin/sh
+# Set up docker-socket group (unchanged from pre-sidecar entrypoint).
if [ -S /var/run/docker.sock ]; then
SOCK_GID=$(stat -c '%g' /var/run/docker.sock 2>/dev/null || stat -f '%g' /var/run/docker.sock 2>/dev/null)
if [ -n "$SOCK_GID" ] && [ "$SOCK_GID" != "0" ]; then
@@ -50,6 +60,61 @@ if [ -S /var/run/docker.sock ]; then
addgroup platform root 2>/dev/null || true
fi
fi
+
+# Memory v2 sidecar (built-in postgres plugin). Co-located with the
+# main server so operators flipping MEMORY_V2_CUTOVER=true don't need
+# to provision a separate service.
+#
+# Spawn-gating: only start the sidecar when the operator has indicated
+# they want it — either MEMORY_V2_CUTOVER=true OR MEMORY_PLUGIN_URL set.
+# Without that signal, the sidecar adds zero value (the platform's
+# wiring.go skips building the client too) but pays a real cost: the
+# plugin's first migration runs `CREATE EXTENSION vector`, which fails
+# on tenant Postgres without pgvector preinstalled and aborts container
+# boot via the 30s health gate. Caught on staging redeploy 2026-05-05.
+#
+# Env defaults (when sidecar IS spawned):
+# MEMORY_PLUGIN_DATABASE_URL = $DATABASE_URL (share existing Postgres;
+# plugin's `memory_namespaces` / `memory_records` tables coexist
+# with `agent_memories` and the rest of the platform schema —
+# no conflicts. Operator can override with a separate URL.)
+# MEMORY_PLUGIN_LISTEN_ADDR = 127.0.0.1:9100
+#
+# Set MEMORY_PLUGIN_DISABLE=1 to force-skip the sidecar even with
+# cutover env set (e.g. running the plugin externally on a separate host).
+memory_plugin_wanted=""
+if [ "$MEMORY_V2_CUTOVER" = "true" ] || [ -n "$MEMORY_PLUGIN_URL" ]; then
+ memory_plugin_wanted=1
+fi
+if [ -z "$MEMORY_PLUGIN_DISABLE" ] && [ -n "$memory_plugin_wanted" ] && [ -n "$DATABASE_URL" ]; then
+ : "${MEMORY_PLUGIN_DATABASE_URL:=$DATABASE_URL}"
+ : "${MEMORY_PLUGIN_LISTEN_ADDR:=:9100}"
+ export MEMORY_PLUGIN_DATABASE_URL MEMORY_PLUGIN_LISTEN_ADDR
+ echo "memory-plugin: starting sidecar on $MEMORY_PLUGIN_LISTEN_ADDR" >&2
+ # Drop privs to the platform user — the plugin doesn't need root and
+ # runs unprivileged elsewhere (tenant image already starts as canvas).
+ su-exec platform /memory-plugin &
+ MEMORY_PLUGIN_PID=$!
+ # Wait up to 30s for the plugin's /v1/health to return 200. Boot
+ # failure here is fatal — better to crash-loop than to silently
+ # serve cutover traffic against a dead plugin.
+ health_port=${MEMORY_PLUGIN_LISTEN_ADDR#:}
+ ready=0
+ for _ in $(seq 1 30); do
+ if wget -qO- --timeout=2 "http://localhost:${health_port}/v1/health" >/dev/null 2>&1; then
+ ready=1
+ break
+ fi
+ sleep 1
+ done
+ if [ "$ready" != "1" ]; then
+ echo "memory-plugin: ❌ /v1/health never returned 200 after 30s — aborting boot. Check that DATABASE_URL is reachable, has the pgvector extension, and the plugin's migrations applied." >&2
+ kill "$MEMORY_PLUGIN_PID" 2>/dev/null || true
+ exit 1
+ fi
+ echo "memory-plugin: ✅ sidecar healthy on :$health_port" >&2
+fi
+
exec su-exec platform /platform "$@"
ENTRY
RUN chmod +x /entrypoint.sh && apk add --no-cache su-exec
diff --git a/workspace-server/Dockerfile.tenant b/workspace-server/Dockerfile.tenant
index 23140a67..6ccc737e 100644
--- a/workspace-server/Dockerfile.tenant
+++ b/workspace-server/Dockerfile.tenant
@@ -34,6 +34,13 @@ ARG GIT_SHA=dev
RUN CGO_ENABLED=0 GOOS=linux go build \
-ldflags "-X github.com/Molecule-AI/molecule-monorepo/platform/internal/buildinfo.GitSHA=${GIT_SHA}" \
-o /platform ./cmd/server
+# Memory v2 sidecar binary (Memory v2 #2728). Bundled so an operator
+# can activate cutover by flipping MEMORY_V2_CUTOVER=true without
+# provisioning a separate service. See entrypoint-tenant.sh for the
+# launch logic.
+RUN CGO_ENABLED=0 GOOS=linux go build \
+ -ldflags "-X github.com/Molecule-AI/molecule-monorepo/platform/internal/buildinfo.GitSHA=${GIT_SHA}" \
+ -o /memory-plugin ./cmd/memory-plugin-postgres
# ── Stage 2: Canvas Next.js standalone ────────────────────────────────
FROM node:20-alpine AS canvas-builder
@@ -74,8 +81,9 @@ RUN deluser --remove-home node 2>/dev/null || true; \
delgroup node 2>/dev/null || true; \
addgroup -g 1000 canvas && adduser -u 1000 -G canvas -s /bin/sh -D canvas
-# Go platform binary
+# Go platform binary + Memory v2 sidecar
COPY --from=go-builder /platform /platform
+COPY --from=go-builder /memory-plugin /memory-plugin
COPY workspace-server/migrations /migrations
# Templates + plugins (cloned from GitHub in stage 3)
@@ -91,7 +99,7 @@ COPY --from=canvas-builder /canvas/public ./public
COPY workspace-server/entrypoint-tenant.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh && \
- chown -R canvas:canvas /canvas /platform /migrations
+ chown -R canvas:canvas /canvas /platform /memory-plugin /migrations
EXPOSE 8080
# entrypoint.sh starts as root to fix volume perms, then drops to
diff --git a/workspace-server/cmd/memory-plugin-postgres/config_test.go b/workspace-server/cmd/memory-plugin-postgres/config_test.go
new file mode 100644
index 00000000..252f0d1b
--- /dev/null
+++ b/workspace-server/cmd/memory-plugin-postgres/config_test.go
@@ -0,0 +1,50 @@
+package main
+
+import (
+ "strings"
+ "testing"
+)
+
+// TestLoadConfig_DefaultListenAddrIsLoopback pins the default-bind contract.
+//
+// Why this matters: with the prior `:9100` default, the plugin listened on
+// every interface. Inside the container it didn't matter (no host port
+// mapping today), but a future change that publishes 9100 OR a cross-host
+// sidecar deploy would have exposed an unauth'd memory store. Loopback by
+// default is the least-privilege baseline; operators with a multi-host
+// topology override via MEMORY_PLUGIN_LISTEN_ADDR.
+func TestLoadConfig_DefaultListenAddrIsLoopback(t *testing.T) {
+ t.Setenv("MEMORY_PLUGIN_DATABASE_URL", "postgres://stub")
+ t.Setenv("MEMORY_PLUGIN_LISTEN_ADDR", "")
+
+ cfg, err := loadConfig()
+ if err != nil {
+ t.Fatalf("loadConfig: %v", err)
+ }
+ if !strings.HasPrefix(cfg.ListenAddr, "127.0.0.1:") {
+ t.Errorf("default ListenAddr must bind loopback-only, got %q "+
+ "(security regression — would expose plugin on every interface)",
+ cfg.ListenAddr)
+ }
+}
+
+func TestLoadConfig_ListenAddrEnvOverride(t *testing.T) {
+ t.Setenv("MEMORY_PLUGIN_DATABASE_URL", "postgres://stub")
+ t.Setenv("MEMORY_PLUGIN_LISTEN_ADDR", ":9100")
+
+ cfg, err := loadConfig()
+ if err != nil {
+ t.Fatalf("loadConfig: %v", err)
+ }
+ if cfg.ListenAddr != ":9100" {
+ t.Errorf("env override ignored: want :9100, got %q", cfg.ListenAddr)
+ }
+}
+
+func TestLoadConfig_MissingDatabaseURL(t *testing.T) {
+ t.Setenv("MEMORY_PLUGIN_DATABASE_URL", "")
+
+ if _, err := loadConfig(); err == nil {
+ t.Fatal("loadConfig must error when MEMORY_PLUGIN_DATABASE_URL is empty")
+ }
+}
diff --git a/workspace-server/cmd/memory-plugin-postgres/main.go b/workspace-server/cmd/memory-plugin-postgres/main.go
index 84e01351..2a1b2dee 100644
--- a/workspace-server/cmd/memory-plugin-postgres/main.go
+++ b/workspace-server/cmd/memory-plugin-postgres/main.go
@@ -10,6 +10,7 @@ package main
import (
"context"
"database/sql"
+ "embed"
"errors"
"fmt"
"log"
@@ -17,6 +18,7 @@ import (
"net/http"
"os"
"os/signal"
+ "sort"
"strings"
"syscall"
"time"
@@ -26,12 +28,28 @@ import (
"github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/pgplugin"
)
+// migrationsFS bundles the .up.sql files into the binary at build time
+// so the prebuilt image doesn't need the source tree at runtime. The
+// prior `os.ReadDir("cmd/memory-plugin-postgres/migrations")` path
+// only resolved during `go test` from the repo root — in the published
+// image the path didn't exist and boot failed after the 30s health gate
+// (caught on staging redeploy 2026-05-05 after PR #2906).
+//
+//go:embed migrations/*.up.sql
+var migrationsFS embed.FS
+
const (
envDatabaseURL = "MEMORY_PLUGIN_DATABASE_URL"
envListenAddr = "MEMORY_PLUGIN_LISTEN_ADDR"
envSkipMigrate = "MEMORY_PLUGIN_SKIP_MIGRATE"
- defaultListenAddr = ":9100"
+ // Loopback-only by default (defense in depth). The platform talks to
+ // the plugin over `http://localhost:9100` from the same container, so
+ // binding to all interfaces would only widen the reachable surface
+ // without enabling any in-design caller. Operators running the plugin
+ // on a separate host override via MEMORY_PLUGIN_LISTEN_ADDR=:9100 (or
+ // some other interface).
+ defaultListenAddr = "127.0.0.1:9100"
)
func main() {
@@ -143,32 +161,71 @@ func openDB(databaseURL string) (*sql.DB, error) {
return db, nil
}
-// runMigrations applies the schema migrations bundled at
-// cmd/memory-plugin-postgres/migrations/. Idempotent on repeat boot.
+// runMigrations applies the schema migrations bundled into the binary
+// via go:embed (see migrationsFS at the top of this file). Idempotent
+// on repeat boot — every migration file uses CREATE … IF NOT EXISTS.
//
-// Implementation note: rather than embedding the full migrate engine,
-// we read the migration files at boot from a known relative path. The
-// down migrations are deliberately NOT applied here — that's a manual
-// operator action. This keeps the binary tiny and avoids dragging in
-// golang-migrate's drivers.
+// The down migrations are deliberately NOT applied here — that's a
+// manual operator action. This keeps the binary tiny and avoids
+// dragging in golang-migrate's drivers.
+//
+// MEMORY_PLUGIN_MIGRATIONS_DIR (filesystem path) is honored as an
+// override for operators who need to ship custom migrations alongside
+// the binary without rebuilding. When unset (the common case) we read
+// from the embedded FS.
func runMigrations(db *sql.DB) error {
- // Find the migrations directory. In `go run` mode it's relative
- // to the cmd dir; in the prebuilt binary case it's expected next
- // to the binary OR via env var override.
- dir := os.Getenv("MEMORY_PLUGIN_MIGRATIONS_DIR")
- if dir == "" {
- // Best-effort: try the cwd-relative path that works for `go test`.
- dir = "cmd/memory-plugin-postgres/migrations"
+ if dir := strings.TrimSpace(os.Getenv("MEMORY_PLUGIN_MIGRATIONS_DIR")); dir != "" {
+ return runMigrationsFromDisk(db, dir)
}
- entries, err := os.ReadDir(dir)
+ return runMigrationsFromEmbed(db)
+}
+
+// runMigrationsFromEmbed applies the *.up.sql files bundled into the
+// binary at build time. Order is alphabetical (matches the on-disk
+// behavior of os.ReadDir on Linux for the same set of names).
+func runMigrationsFromEmbed(db *sql.DB) error {
+ entries, err := migrationsFS.ReadDir("migrations")
if err != nil {
- return fmt.Errorf("read migrations dir %q: %w", dir, err)
+ return fmt.Errorf("read embedded migrations: %w", err)
}
+ names := make([]string, 0, len(entries))
for _, e := range entries {
if e.IsDir() || !strings.HasSuffix(e.Name(), ".up.sql") {
continue
}
- path := dir + "/" + e.Name()
+ names = append(names, e.Name())
+ }
+ sort.Strings(names)
+ for _, name := range names {
+ data, err := migrationsFS.ReadFile("migrations/" + name)
+ if err != nil {
+ return fmt.Errorf("read embedded %q: %w", name, err)
+ }
+ if _, err := db.Exec(string(data)); err != nil {
+ return fmt.Errorf("apply %q: %w", name, err)
+ }
+ log.Printf("applied embedded migration %s", name)
+ }
+ return nil
+}
+
+// runMigrationsFromDisk preserves the legacy filesystem-path mode for
+// operator-supplied custom migrations.
+func runMigrationsFromDisk(db *sql.DB, dir string) error {
+ entries, err := os.ReadDir(dir)
+ if err != nil {
+ return fmt.Errorf("read migrations dir %q: %w", dir, err)
+ }
+ names := make([]string, 0, len(entries))
+ for _, e := range entries {
+ if e.IsDir() || !strings.HasSuffix(e.Name(), ".up.sql") {
+ continue
+ }
+ names = append(names, e.Name())
+ }
+ sort.Strings(names)
+ for _, name := range names {
+ path := dir + "/" + name
data, err := os.ReadFile(path)
if err != nil {
return fmt.Errorf("read %q: %w", path, err)
@@ -176,7 +233,7 @@ func runMigrations(db *sql.DB) error {
if _, err := db.Exec(string(data)); err != nil {
return fmt.Errorf("apply %q: %w", path, err)
}
- log.Printf("applied migration %s", e.Name())
+ log.Printf("applied disk migration %s (from %s)", name, dir)
}
return nil
}
diff --git a/workspace-server/cmd/memory-plugin-postgres/migrations_embed_test.go b/workspace-server/cmd/memory-plugin-postgres/migrations_embed_test.go
new file mode 100644
index 00000000..f2f0b785
--- /dev/null
+++ b/workspace-server/cmd/memory-plugin-postgres/migrations_embed_test.go
@@ -0,0 +1,72 @@
+package main
+
+import (
+ "strings"
+ "testing"
+)
+
+// TestMigrationsEmbedded_ContainsCreateTable pins that the migrations
+// are bundled into the binary at build time, NOT loaded from a
+// filesystem path that doesn't exist at runtime in the published image.
+//
+// Pre-fix: PR #2906 shipped the binary without the migrations dir;
+// `os.ReadDir("cmd/memory-plugin-postgres/migrations")` errored on every
+// tenant boot, the 30s health gate aborted the container, and the
+// staging redeploy fleet job marked all tenants as failed. Embedding
+// the migrations into the binary removes the runtime path entirely.
+func TestMigrationsEmbedded_ContainsCreateTable(t *testing.T) {
+ entries, err := migrationsFS.ReadDir("migrations")
+ if err != nil {
+ t.Fatalf("embedded migrations dir unreadable: %v", err)
+ }
+ if len(entries) == 0 {
+ t.Fatal("embedded migrations dir is empty — go:embed pattern matched no files")
+ }
+
+ var seenUp bool
+ for _, e := range entries {
+ if e.IsDir() || !strings.HasSuffix(e.Name(), ".up.sql") {
+ continue
+ }
+ seenUp = true
+ data, err := migrationsFS.ReadFile("migrations/" + e.Name())
+ if err != nil {
+ t.Errorf("read embedded %q: %v", e.Name(), err)
+ continue
+ }
+ if !strings.Contains(string(data), "CREATE TABLE") {
+ t.Errorf("embedded %q has no CREATE TABLE — wrong file embedded?", e.Name())
+ }
+ }
+ if !seenUp {
+ t.Fatal("no *.up.sql in embedded migrations — runtime would have no schema to apply")
+ }
+}
+
+// TestRunMigrationsFromEmbed_OrderingIsAlphabetic pins that we apply
+// migrations in deterministic alphabetical order, not in whatever
+// arbitrary order migrationsFS.ReadDir happens to return. With one
+// migration today this is moot, but a future second migration ('002_…')
+// MUST run after '001_…' or the schema is broken.
+//
+// We can't easily exercise db.Exec here (no test DB); instead pin the
+// sort step on the directory listing itself.
+func TestRunMigrationsFromEmbed_OrderingIsAlphabetic(t *testing.T) {
+ entries, err := migrationsFS.ReadDir("migrations")
+ if err != nil {
+ t.Fatalf("embedded migrations dir unreadable: %v", err)
+ }
+ var names []string
+ for _, e := range entries {
+ if e.IsDir() || !strings.HasSuffix(e.Name(), ".up.sql") {
+ continue
+ }
+ names = append(names, e.Name())
+ }
+ for i := 1; i < len(names); i++ {
+ if names[i-1] > names[i] {
+ t.Errorf("ReadDir returned non-sorted names; runMigrationsFromEmbed must sort. "+
+ "Got %q before %q", names[i-1], names[i])
+ }
+ }
+}
diff --git a/workspace-server/cmd/server/main.go b/workspace-server/cmd/server/main.go
index 3961a842..45597367 100644
--- a/workspace-server/cmd/server/main.go
+++ b/workspace-server/cmd/server/main.go
@@ -19,6 +19,7 @@ import (
"github.com/Molecule-AI/molecule-monorepo/platform/internal/handlers"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/imagewatch"
memwiring "github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/wiring"
+ "github.com/Molecule-AI/molecule-monorepo/platform/internal/pendinguploads"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/registry"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/router"
@@ -265,6 +266,14 @@ func main() {
})
}
+ // Pending-uploads GC sweep — deletes acked rows past their retention
+ // window plus unacked rows past expires_at. Without this the
+ // pending_uploads table grows unbounded; even with the 24h hard TTL,
+ // nothing actually deletes a row, just makes it un-fetchable.
+ go supervised.RunWithRecover(ctx, "pending-uploads-sweeper", func(c context.Context) {
+ pendinguploads.StartSweeper(c, pendinguploads.NewPostgres(db.DB), 0)
+ })
+
// Provision-timeout sweep — flips workspaces that have been stuck in
// status='provisioning' past the timeout window to 'failed' and emits
// WORKSPACE_PROVISION_TIMEOUT. Without this the UI banner is cosmetic
diff --git a/workspace-server/entrypoint-tenant.sh b/workspace-server/entrypoint-tenant.sh
index 9cfc1437..0f2d6dde 100644
--- a/workspace-server/entrypoint-tenant.sh
+++ b/workspace-server/entrypoint-tenant.sh
@@ -20,6 +20,51 @@ cd /canvas
PORT=3000 HOSTNAME=0.0.0.0 node server.js &
CANVAS_PID=$!
+# Memory v2 sidecar (built-in postgres plugin). See Dockerfile entrypoint
+# comment for rationale.
+#
+# Spawn-gating: only start the sidecar when the operator has indicated
+# they want it (MEMORY_V2_CUTOVER=true OR MEMORY_PLUGIN_URL set).
+# Without that signal, the sidecar adds zero value and risks aborting
+# tenant boot via the 30s health gate when the tenant Postgres lacks
+# pgvector. Caught on staging redeploy 2026-05-05:
+# pq: extension "vector" is not available
+#
+# Defaults (when sidecar IS spawned): MEMORY_PLUGIN_DATABASE_URL
+# falls back to the tenant's DATABASE_URL.
+MEMORY_PLUGIN_PID=""
+memory_plugin_wanted=""
+if [ "$MEMORY_V2_CUTOVER" = "true" ] || [ -n "$MEMORY_PLUGIN_URL" ]; then
+ memory_plugin_wanted=1
+fi
+if [ -z "$MEMORY_PLUGIN_DISABLE" ] && [ -n "$memory_plugin_wanted" ] && [ -n "$DATABASE_URL" ]; then
+ : "${MEMORY_PLUGIN_DATABASE_URL:=$DATABASE_URL}"
+ : "${MEMORY_PLUGIN_LISTEN_ADDR:=:9100}"
+ export MEMORY_PLUGIN_DATABASE_URL MEMORY_PLUGIN_LISTEN_ADDR
+ echo "memory-plugin: starting sidecar on $MEMORY_PLUGIN_LISTEN_ADDR" >&2
+ /memory-plugin &
+ MEMORY_PLUGIN_PID=$!
+ # Wait up to 30s for /v1/health. Boot failure is fatal so a misconfigured
+ # tenant crash-loops instead of silently serving cutover traffic against
+ # a dead plugin.
+ health_port=${MEMORY_PLUGIN_LISTEN_ADDR#:}
+ ready=0
+ for _ in $(seq 1 30); do
+ if wget -qO- --timeout=2 "http://localhost:${health_port}/v1/health" >/dev/null 2>&1; then
+ ready=1
+ break
+ fi
+ sleep 1
+ done
+ if [ "$ready" != "1" ]; then
+ echo "memory-plugin: ❌ /v1/health never returned 200 after 30s — aborting boot. Check DATABASE_URL reachability + pgvector extension + migrations." >&2
+ kill "$MEMORY_PLUGIN_PID" 2>/dev/null || true
+ kill "$CANVAS_PID" 2>/dev/null || true
+ exit 1
+ fi
+ echo "memory-plugin: ✅ sidecar healthy on :$health_port" >&2
+fi
+
# Start Go platform in foreground-ish (we trap signals)
# CANVAS_PROXY_URL tells the platform to proxy unmatched routes to Canvas.
# CONTAINER_BACKEND: empty = Docker (default for self-hosted/local).
@@ -29,15 +74,20 @@ cd /
/platform &
PLATFORM_PID=$!
-# If either process exits, kill the other
+# If any process exits, kill the others
cleanup() {
kill $CANVAS_PID 2>/dev/null || true
kill $PLATFORM_PID 2>/dev/null || true
+ [ -n "$MEMORY_PLUGIN_PID" ] && kill $MEMORY_PLUGIN_PID 2>/dev/null || true
}
trap cleanup EXIT SIGTERM SIGINT
-# Wait for either to exit — whichever exits first triggers cleanup
-wait -n $CANVAS_PID $PLATFORM_PID
+# Wait for any to exit — whichever exits first triggers cleanup
+if [ -n "$MEMORY_PLUGIN_PID" ]; then
+ wait -n $CANVAS_PID $PLATFORM_PID $MEMORY_PLUGIN_PID
+else
+ wait -n $CANVAS_PID $PLATFORM_PID
+fi
EXIT_CODE=$?
cleanup
exit $EXIT_CODE
diff --git a/workspace-server/internal/handlers/chat_files.go b/workspace-server/internal/handlers/chat_files.go
index ccfa0d4c..f5e980bf 100644
--- a/workspace-server/internal/handlers/chat_files.go
+++ b/workspace-server/internal/handlers/chat_files.go
@@ -600,14 +600,21 @@ func (h *ChatFilesHandler) uploadPollMode(c *gin.Context, ctx context.Context, w
return
}
- out := make([]uploadedFile, 0, len(headers))
+ // Phase 1: pre-validate + read every part BEFORE any DB write.
+ // A multi-file upload must commit all-or-nothing; a per-file
+ // failure halfway through used to leave rows 1..K-1 in the table
+ // while the client got a 500 and retried the whole batch — duplicate
+ // rows, orphan activity rows. Validating up-front + atomic PutBatch
+ // closes that gap.
+ type prepped struct {
+ Sanitized string
+ Mimetype string
+ Content []byte
+ Original string // original (unsanitized) filename for error messages
+ }
+ prepReady := make([]prepped, 0, len(headers))
+ items := make([]pendinguploads.PutItem, 0, len(headers))
for _, fh := range headers {
- // Read full content. Per-file cap enforced post-read so an
- // oversized file fails with a clean 413 rather than a torn
- // stream. The +1 byte ReadAll trick that the Python side
- // uses isn't easy through multipart.FileHeader; instead we
- // rely on the multipart layer's ContentLength header and
- // short-circuit before opening the part.
if fh.Size > pendinguploads.MaxFileBytes {
log.Printf("chat_files uploadPollMode: per-file cap exceeded for %s: %s (%d bytes)",
workspaceID, fh.Filename, fh.Size)
@@ -621,45 +628,67 @@ func (h *ChatFilesHandler) uploadPollMode(c *gin.Context, ctx context.Context, w
}
content, err := readMultipartFile(fh)
if err != nil {
- log.Printf("chat_files uploadPollMode: read part failed for %s/%s: %v", workspaceID, fh.Filename, err)
+ log.Printf("chat_files uploadPollMode: read part failed for %s/%s: %v",
+ workspaceID, fh.Filename, err)
c.JSON(http.StatusBadRequest, gin.H{"error": "could not read file part"})
return
}
-
- sanitized := SanitizeFilename(fh.Filename)
- mimetype := fh.Header.Get("Content-Type")
-
- fileID, err := h.pendingUploads.Put(ctx, wsUUID, content, sanitized, mimetype)
- if err != nil {
- if errors.Is(err, pendinguploads.ErrTooLarge) {
- // Belt + suspenders: the size check above already
- // caught this, but Storage.Put re-validates so a
- // malformed FileHeader can't slip through. 413 with
- // the same shape so the client sees one error class.
- c.JSON(http.StatusRequestEntityTooLarge, gin.H{
- "error": "file exceeds per-file cap",
- "filename": fh.Filename,
- "size": len(content),
- "max": pendinguploads.MaxFileBytes,
- })
- return
- }
- log.Printf("chat_files uploadPollMode: storage.Put failed for %s/%s: %v",
- workspaceID, sanitized, err)
- c.JSON(http.StatusInternalServerError, gin.H{"error": "could not stage file"})
+ // Belt-and-braces post-read cap (multipart.FileHeader.Size can lie
+ // on some clients that don't set Content-Length per part).
+ if len(content) > pendinguploads.MaxFileBytes {
+ log.Printf("chat_files uploadPollMode: per-file cap exceeded post-read for %s: %s (%d bytes)",
+ workspaceID, fh.Filename, len(content))
+ c.JSON(http.StatusRequestEntityTooLarge, gin.H{
+ "error": "file exceeds per-file cap",
+ "filename": fh.Filename,
+ "size": len(content),
+ "max": pendinguploads.MaxFileBytes,
+ })
return
}
+ sanitized := SanitizeFilename(fh.Filename)
+ mimetype := safeMimetype(fh.Header.Get("Content-Type"))
+ prepReady = append(prepReady, prepped{
+ Sanitized: sanitized, Mimetype: mimetype, Content: content, Original: fh.Filename,
+ })
+ items = append(items, pendinguploads.PutItem{
+ Content: content, Filename: sanitized, Mimetype: mimetype,
+ })
+ }
- // Activity row so the workspace's inbox poller picks this up
- // on its next cycle. activity_type=a2a_receive (NOT a new
- // type) so the existing poll filter
- // `?type=a2a_receive` catches it without poll-side changes;
- // method=chat_upload_receive is the discriminator the
- // workspace's adapter (Phase 2) uses to route to the upload
- // fetcher instead of the agent's message handler. Same
- // shape as A2A's tasks/send vs message/send method split.
+ // Phase 2: atomic batch insert. On failure no rows commit.
+ fileIDs, err := h.pendingUploads.PutBatch(ctx, wsUUID, items)
+ if err != nil {
+ if errors.Is(err, pendinguploads.ErrTooLarge) {
+ // Belt + suspenders: pre-validation above already caught
+ // this; surface a clean 413 if a malformed FileHeader
+ // somehow slipped through.
+ c.JSON(http.StatusRequestEntityTooLarge, gin.H{
+ "error": "one or more files exceed per-file cap",
+ "max": pendinguploads.MaxFileBytes,
+ })
+ return
+ }
+ log.Printf("chat_files uploadPollMode: storage.PutBatch failed for %s: %v",
+ workspaceID, err)
+ c.JSON(http.StatusInternalServerError, gin.H{"error": "could not stage files"})
+ return
+ }
+
+ // Phase 3: write per-file activity rows and build the response. Activity
+ // rows are written individually (not part of the same Tx as PutBatch)
+ // because LogActivity is shared across many handlers and threading the
+ // Tx through would be a bigger refactor. The trade-off: if an activity
+ // write fails after the PutBatch commits, the pending_uploads rows
+ // orphan until the 24h TTL — significantly better than the previous
+ // "every multi-file upload could orphan" behavior, and the workspace's
+ // fetcher handles soft-404 cleanly when activity rows reference a row
+ // the platform later expired.
+ out := make([]uploadedFile, 0, len(prepReady))
+ for i, p := range prepReady {
+ fileID := fileIDs[i]
uri := fmt.Sprintf("platform-pending:%s/%s", workspaceID, fileID)
- summary := "chat_upload_receive: " + sanitized
+ summary := "chat_upload_receive: " + p.Sanitized
method := "chat_upload_receive"
LogActivity(ctx, h.broadcaster, ActivityParams{
WorkspaceID: workspaceID,
@@ -669,28 +698,65 @@ func (h *ChatFilesHandler) uploadPollMode(c *gin.Context, ctx context.Context, w
Summary: &summary,
RequestBody: map[string]interface{}{
"file_id": fileID.String(),
- "name": sanitized,
- "mimeType": mimetype,
- "size": len(content),
+ "name": p.Sanitized,
+ "mimeType": p.Mimetype,
+ "size": len(p.Content),
"uri": uri,
},
Status: "ok",
})
log.Printf("chat_files uploadPollMode: staged %s/%s (file_id=%s size=%d mimetype=%q)",
- workspaceID, sanitized, fileID, len(content), mimetype)
+ workspaceID, p.Sanitized, fileID, len(p.Content), p.Mimetype)
out = append(out, uploadedFile{
URI: uri,
- Name: sanitized,
- Mimetype: mimetype,
- Size: int64(len(content)),
+ Name: p.Sanitized,
+ Mimetype: p.Mimetype,
+ Size: int64(len(p.Content)),
})
}
c.JSON(http.StatusOK, gin.H{"files": out})
}
+// safeMimetype validates a multipart-supplied Content-Type header and
+// returns a sanitized value safe to store + serve back unmodified.
+//
+// The platform's GET /content handler reflects the stored mimetype as
+// the response Content-Type. An attacker-controlled header that
+// embedded CR/LF could split the response (header injection); a value
+// containing semicolons could carry an unexpected charset parameter
+// that confuses a downstream renderer. Strip CR/LF/control chars +
+// keep only the type/subtype prefix; reject anything that doesn't
+// match a basic `type/subtype` regex by falling back to the safe
+// default (application/octet-stream — the workspace-side handler does
+// the same fallback).
+func safeMimetype(raw string) string {
+ const fallback = "application/octet-stream"
+ // Trim parameters (`text/html; charset=utf-8` → `text/html`).
+ if i := strings.IndexByte(raw, ';'); i >= 0 {
+ raw = raw[:i]
+ }
+ raw = strings.TrimSpace(raw)
+ if raw == "" {
+ return ""
+ }
+ // Reject if any control char or whitespace is present (header
+ // injection defense). RFC 7231 mimetype grammar forbids whitespace.
+ for _, r := range raw {
+ if r < 0x21 || r > 0x7e {
+ return fallback
+ }
+ }
+ // Require exactly one slash separating type and subtype.
+ parts := strings.Split(raw, "/")
+ if len(parts) != 2 || parts[0] == "" || parts[1] == "" {
+ return fallback
+ }
+ return raw
+}
+
// readMultipartFile reads a multipart part fully into memory. Wraps
// the open + io.ReadAll + close idiom so the call site stays clean,
// and so a future change (chunked reads / hashing) has one place to
diff --git a/workspace-server/internal/handlers/chat_files_poll_test.go b/workspace-server/internal/handlers/chat_files_poll_test.go
index c064bd6a..eb23acf1 100644
--- a/workspace-server/internal/handlers/chat_files_poll_test.go
+++ b/workspace-server/internal/handlers/chat_files_poll_test.go
@@ -67,12 +67,59 @@ func (s *inMemStorage) Put(_ context.Context, ws uuid.UUID, content []byte, file
return id, nil
}
+// PutBatch mirrors the production atomic-batch contract: any per-item
+// failure leaves the in-memory state unchanged, simulating Tx rollback.
+// Pre-validation matches PostgresStorage.PutBatch; oversized items
+// return ErrTooLarge before any row is added.
+func (s *inMemStorage) PutBatch(_ context.Context, ws uuid.UUID, items []pendinguploads.PutItem) ([]uuid.UUID, error) {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ if s.putErr != nil {
+ return nil, s.putErr
+ }
+ // Pre-validate so an oversized item rejects the whole batch before
+ // any state mutation — matches the Tx-rollback semantics.
+ for _, it := range items {
+ if len(it.Content) > pendinguploads.MaxFileBytes {
+ return nil, pendinguploads.ErrTooLarge
+ }
+ }
+ ids := make([]uuid.UUID, 0, len(items))
+ stagedRows := make(map[uuid.UUID]pendinguploads.Record, len(items))
+ stagedPuts := make([]putCall, 0, len(items))
+ for _, it := range items {
+ id := uuid.New()
+ stagedRows[id] = pendinguploads.Record{
+ FileID: id, WorkspaceID: ws, Content: it.Content,
+ Filename: it.Filename, Mimetype: it.Mimetype,
+ SizeBytes: int64(len(it.Content)), CreatedAt: time.Now(),
+ ExpiresAt: time.Now().Add(24 * time.Hour),
+ }
+ stagedPuts = append(stagedPuts, putCall{
+ WorkspaceID: ws, Filename: it.Filename, Mimetype: it.Mimetype, Size: len(it.Content),
+ })
+ ids = append(ids, id)
+ }
+ for id, r := range stagedRows {
+ s.rows[id] = r
+ }
+ s.puts = append(s.puts, stagedPuts...)
+ return ids, nil
+}
+
func (s *inMemStorage) Get(context.Context, uuid.UUID) (pendinguploads.Record, error) {
return pendinguploads.Record{}, pendinguploads.ErrNotFound
}
func (s *inMemStorage) MarkFetched(context.Context, uuid.UUID) error { return nil }
func (s *inMemStorage) Ack(context.Context, uuid.UUID) error { return nil }
+// Sweep is required by the Storage interface (Phase 3 GC). Not
+// exercised by upload-branch tests — the dedicated sweeper_test.go +
+// storage_sweep_test.go cover it.
+func (s *inMemStorage) Sweep(context.Context, time.Duration) (pendinguploads.SweepResult, error) {
+ return pendinguploads.SweepResult{}, nil
+}
+
// expectPollDeliveryMode stubs the SELECT delivery_mode lookup that
// uploadPollMode does (separate from the one resolveWorkspaceForwardCreds
// does — this is the new helper introduced for the poll branch).
@@ -154,7 +201,7 @@ func TestPollUpload_HappyPath_OneFile_StagesAndLogs(t *testing.T) {
expectActivityInsert(mock)
store := newInMemStorage()
- h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
WithPendingUploads(store, nil)
body, ct := pollUploadFixture(t, map[string][]byte{"report.pdf": []byte("PDF-bytes")})
@@ -212,7 +259,7 @@ func TestPollUpload_MultipleFiles_AllStagedAndLogged(t *testing.T) {
expectActivityInsert(mock)
store := newInMemStorage()
- h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
WithPendingUploads(store, nil)
body, ct := pollUploadFixture(t, map[string][]byte{
@@ -250,7 +297,7 @@ func TestPollUpload_PushModeFallsThroughToForward(t *testing.T) {
// URL empty + mode=push → 503 (no inbound secret check needed).
store := newInMemStorage()
- h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
WithPendingUploads(store, nil)
body, ct := pollUploadFixture(t, map[string][]byte{"x": []byte("data")})
@@ -274,7 +321,7 @@ func TestPollUpload_NotConfigured_FallsThrough(t *testing.T) {
wsID := "33333333-2222-3333-4444-555555555555"
expectURLAndMode(mock, wsID, "", "poll") // resolveWorkspaceForwardCreds emits 422
- h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
// No WithPendingUploads — pendingUploads is nil.
body, ct := pollUploadFixture(t, map[string][]byte{"x": []byte("data")})
@@ -295,7 +342,7 @@ func TestPollUpload_WorkspaceMissing_404(t *testing.T) {
wsID := "44444444-2222-3333-4444-555555555555"
expectPollDeliveryModeMissing(mock, wsID)
- h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
WithPendingUploads(newInMemStorage(), nil)
body, ct := pollUploadFixture(t, map[string][]byte{"x": []byte("d")})
@@ -315,7 +362,7 @@ func TestPollUpload_DeliveryModeLookupDBError_500(t *testing.T) {
mock.ExpectQuery(`SELECT delivery_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).WillReturnError(errors.New("connection lost"))
- h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
WithPendingUploads(newInMemStorage(), nil)
body, ct := pollUploadFixture(t, map[string][]byte{"x": []byte("d")})
@@ -335,7 +382,7 @@ func TestPollUpload_NoFilesField_400(t *testing.T) {
expectPollDeliveryMode(mock, wsID, "poll")
store := newInMemStorage()
- h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
WithPendingUploads(store, nil)
// Multipart with a non-files field — no actual files.
@@ -360,7 +407,7 @@ func TestPollUpload_MalformedMultipart_400(t *testing.T) {
expectPollDeliveryMode(mock, wsID, "poll")
store := newInMemStorage()
- h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
WithPendingUploads(store, nil)
// Body that doesn't match the boundary in Content-Type.
@@ -381,7 +428,7 @@ func TestPollUpload_StorageError_500(t *testing.T) {
store := newInMemStorage()
store.putErr = errors.New("disk full")
- h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
WithPendingUploads(store, nil)
body, ct := pollUploadFixture(t, map[string][]byte{"x.bin": []byte("data")})
@@ -402,7 +449,7 @@ func TestPollUpload_StorageTooLarge_413(t *testing.T) {
store := newInMemStorage()
store.putErr = pendinguploads.ErrTooLarge
- h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
WithPendingUploads(store, nil)
body, ct := pollUploadFixture(t, map[string][]byte{"x.bin": []byte("data")})
@@ -422,7 +469,7 @@ func TestPollUpload_TooManyFiles_400(t *testing.T) {
expectPollDeliveryMode(mock, wsID, "poll")
store := newInMemStorage()
- h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
WithPendingUploads(store, nil)
// 65 files — over the per-batch cap.
@@ -457,7 +504,7 @@ func TestPollUpload_NullDeliveryMode_TreatedAsPush(t *testing.T) {
expectURLAndMode(mock, wsID, "", "")
store := newInMemStorage()
- h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
WithPendingUploads(store, nil)
body, ct := pollUploadFixture(t, map[string][]byte{"x.bin": []byte("data")})
@@ -490,7 +537,7 @@ func TestPollUpload_PerFileCapPreStorage_413(t *testing.T) {
expectPollDeliveryMode(mock, wsID, "poll")
store := newInMemStorage()
- h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
WithPendingUploads(store, nil)
// 25 MB + 1 byte. Single file, large enough to trip the early
@@ -525,7 +572,7 @@ func TestPollUpload_SanitizesFilenameInResponse(t *testing.T) {
expectActivityInsert(mock)
store := newInMemStorage()
- h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
WithPendingUploads(store, nil)
body, ct := pollUploadFixture(t, map[string][]byte{"hello world!.pdf": []byte("data")})
@@ -550,6 +597,120 @@ func TestPollUpload_SanitizesFilenameInResponse(t *testing.T) {
}
}
+// TestPollUpload_AtomicRollbackOnSecondFileTooLarge pins the
+// transactional contract introduced in phase 5: when one file in a
+// multi-file batch fails pre-validation (oversize), NONE of the files
+// in the batch land in storage. Previously a per-file Put loop would
+// stage rows 1..K-1 before failing on row K, leaving orphan
+// pending_uploads + activity rows the client would re-create on retry.
+//
+// Pinned via inMemStorage's PutBatch (which mirrors PostgresStorage's
+// Tx-rollback behavior on a per-item validation failure) — but the
+// real atomicity guarantee is the integration test in
+// pending_uploads_integration_test.go.
+func TestPollUpload_AtomicRollbackOnSecondFileTooLarge(t *testing.T) {
+ mock := setupTestDB(t)
+ setupTestRedis(t)
+
+ wsID := "aaaaaaaa-3333-3333-4444-555555555555"
+ expectPollDeliveryMode(mock, wsID, "poll")
+
+ store := newInMemStorage()
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
+ WithPendingUploads(store, nil)
+
+ // Two files: first OK, second over the per-file cap. Pre-validation
+ // in uploadPollMode catches it BEFORE any Put — store.puts must
+ // stay empty. (If the test ever sees len=1, the regression is
+ // "first file slipped through into storage on a partial-failure
+ // batch.")
+ tooBig := bytes.Repeat([]byte{0x42}, pendinguploads.MaxFileBytes+1)
+ body, ct := pollUploadFixture(t, map[string][]byte{
+ "ok.txt": []byte("small"),
+ "huge.bin": tooBig,
+ })
+ c, w := makeUploadRequest(t, wsID, body, ct)
+ h.Upload(c)
+
+ if w.Code != http.StatusRequestEntityTooLarge {
+ t.Errorf("status=%d body=%s, want 413", w.Code, w.Body.String())
+ }
+ if len(store.puts) != 0 {
+ t.Errorf("expected zero Puts on rollback, got %d: %+v", len(store.puts), store.puts)
+ }
+}
+
+// TestPollUpload_AtomicRollbackOnPutBatchError validates that an in-
+// flight PutBatch failure (e.g. simulated DB error) leaves zero rows
+// — same guarantee as the pre-validation path, but exercises the
+// "Tx-Rollback after BEGIN" branch via the fake.
+func TestPollUpload_AtomicRollbackOnPutBatchError(t *testing.T) {
+ mock := setupTestDB(t)
+ setupTestRedis(t)
+
+ wsID := "bbbbbbbb-3333-3333-4444-555555555555"
+ expectPollDeliveryMode(mock, wsID, "poll")
+
+ store := newInMemStorage()
+ store.putErr = errors.New("db down mid-batch")
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
+ WithPendingUploads(store, nil)
+
+ body, ct := pollUploadFixture(t, map[string][]byte{
+ "a.txt": []byte("aaa"),
+ "b.txt": []byte("bbb"),
+ "c.txt": []byte("ccc"),
+ })
+ c, w := makeUploadRequest(t, wsID, body, ct)
+ h.Upload(c)
+
+ if w.Code != http.StatusInternalServerError {
+ t.Errorf("status=%d, want 500", w.Code)
+ }
+ if len(store.puts) != 0 {
+ t.Errorf("expected zero Puts after PutBatch error, got %d", len(store.puts))
+ }
+}
+
+// TestPollUpload_MimetypeWithCRLFInjectionStripped pins the safeMimetype
+// hardening: a multipart-supplied Content-Type header with CR/LF is
+// rewritten to application/octet-stream so the eventual /content
+// response can't be header-split on the wire.
+func TestPollUpload_MimetypeWithCRLFInjectionStripped(t *testing.T) {
+ got := safeMimetype("text/html\r\nX-Injected: pwn")
+ if got != "application/octet-stream" {
+ t.Errorf("CRLF mimetype not stripped, got %q", got)
+ }
+ got = safeMimetype("image/png\x00")
+ if got != "application/octet-stream" {
+ t.Errorf("NUL byte mimetype not stripped, got %q", got)
+ }
+ got = safeMimetype("text/plain; charset=utf-8")
+ if got != "text/plain" {
+ t.Errorf("parameter not stripped, got %q", got)
+ }
+ got = safeMimetype("application/pdf")
+ if got != "application/pdf" {
+ t.Errorf("clean mime modified, got %q", got)
+ }
+ got = safeMimetype("")
+ if got != "" {
+ t.Errorf("empty input should pass through, got %q", got)
+ }
+ got = safeMimetype("notamime")
+ if got != "application/octet-stream" {
+ t.Errorf("non-type/subtype not coerced, got %q", got)
+ }
+ got = safeMimetype("/empty-type")
+ if got != "application/octet-stream" {
+ t.Errorf("missing type half not coerced, got %q", got)
+ }
+ got = safeMimetype("type/")
+ if got != "application/octet-stream" {
+ t.Errorf("missing subtype half not coerced, got %q", got)
+ }
+}
+
// TestPollUpload_ActivityRowDiscriminator pins the
// activity_type / method shape that the workspace inbox poller depends
// on. The poller filters `GET /workspaces/:id/activity?type=a2a_receive`
@@ -573,7 +734,7 @@ func TestPollUpload_ActivityRowDiscriminator(t *testing.T) {
expectActivityInsertWithTypeAndMethod(mock, wsID, "a2a_receive", "chat_upload_receive")
store := newInMemStorage()
- h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
WithPendingUploads(store, nil)
body, ct := pollUploadFixture(t, map[string][]byte{"x.pdf": []byte("xx")})
diff --git a/workspace-server/internal/handlers/chat_files_test.go b/workspace-server/internal/handlers/chat_files_test.go
index e7829f45..6012d3a7 100644
--- a/workspace-server/internal/handlers/chat_files_test.go
+++ b/workspace-server/internal/handlers/chat_files_test.go
@@ -105,7 +105,7 @@ func TestChatUpload_InvalidWorkspaceID(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
- h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
c, w := makeUploadRequest(t, "not-a-uuid", &bytes.Buffer{}, "")
h.Upload(c)
@@ -122,7 +122,7 @@ func TestChatUpload_WorkspaceNotInDB(t *testing.T) {
wsID := "00000000-0000-0000-0000-000000000099"
expectURLMissing(mock, wsID)
- h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
body, ct := uploadFixture(t)
c, w := makeUploadRequest(t, wsID, body, ct)
h.Upload(c)
@@ -166,7 +166,7 @@ func TestChatUpload_NoInboundSecret_LazyHeal(t *testing.T) {
WithArgs(sqlmock.AnyArg(), wsID).
WillReturnResult(sqlmock.NewResult(0, 1))
- h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
body, ct := uploadFixture(t)
c, w := makeUploadRequest(t, wsID, body, ct)
h.Upload(c)
@@ -203,7 +203,7 @@ func TestChatUpload_NoInboundSecret_LazyHealFailure(t *testing.T) {
WithArgs(sqlmock.AnyArg(), wsID).
WillReturnError(sql.ErrConnDone) // mint fails
- h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
body, ct := uploadFixture(t)
c, w := makeUploadRequest(t, wsID, body, ct)
h.Upload(c)
@@ -231,7 +231,7 @@ func TestChatUpload_NoURL(t *testing.T) {
wsID := "00000000-0000-0000-0000-000000000042"
expectURLAndMode(mock, wsID, "", "push")
- h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
body, ct := uploadFixture(t)
c, w := makeUploadRequest(t, wsID, body, ct)
h.Upload(c)
@@ -256,7 +256,7 @@ func TestChatUpload_PollModeEmptyURL(t *testing.T) {
wsID := "00000000-0000-0000-0000-000000000099"
expectURLAndMode(mock, wsID, "", "poll")
- h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
body, ct := uploadFixture(t)
c, w := makeUploadRequest(t, wsID, body, ct)
h.Upload(c)
@@ -286,7 +286,7 @@ func TestChatUpload_NullModeEmptyURL(t *testing.T) {
wsID := "30ba7f0b-b303-4a20-aefe-3a4a675b8aa4" // user's "mac laptop"
expectURLNullMode(mock, wsID, "")
- h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
body, ct := uploadFixture(t)
c, w := makeUploadRequest(t, wsID, body, ct)
h.Upload(c)
@@ -338,7 +338,7 @@ func TestChatUpload_ForwardsToWorkspace_HappyPath(t *testing.T) {
expectURL(mock, wsID, srv.URL)
expectInboundSecret(mock, wsID, "super-secret-123")
- h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
body, ct := uploadFixture(t)
c, w := makeUploadRequest(t, wsID, body, ct)
h.Upload(c)
@@ -380,7 +380,7 @@ func TestChatUpload_ForwardsErrorStatusUnchanged(t *testing.T) {
expectURL(mock, wsID, srv.URL)
expectInboundSecret(mock, wsID, "tok")
- h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
body, ct := uploadFixture(t)
c, w := makeUploadRequest(t, wsID, body, ct)
h.Upload(c)
@@ -402,7 +402,7 @@ func TestChatUpload_WorkspaceUnreachable(t *testing.T) {
expectURL(mock, wsID, "http://127.0.0.1:1")
expectInboundSecret(mock, wsID, "tok")
- h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
body, ct := uploadFixture(t)
c, w := makeUploadRequest(t, wsID, body, ct)
h.Upload(c)
@@ -418,7 +418,7 @@ func TestChatDownload_InvalidPath(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
- h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
cases := []struct {
name, path, wantSubstr string
@@ -507,7 +507,7 @@ func TestChatDownload_WorkspaceNotInDB(t *testing.T) {
WithArgs(wsID).
WillReturnError(sql.ErrNoRows)
- h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
c, w := makeDownloadRequest(t, wsID, "/workspace/foo.txt")
h.Download(c)
@@ -533,7 +533,7 @@ func TestChatDownload_NoInboundSecret_LazyHeal(t *testing.T) {
WithArgs(sqlmock.AnyArg(), wsID).
WillReturnResult(sqlmock.NewResult(0, 1))
- h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
c, w := makeDownloadRequest(t, wsID, "/workspace/foo.txt")
h.Download(c)
@@ -559,7 +559,7 @@ func TestChatDownload_NoInboundSecret_LazyHealFailure(t *testing.T) {
WithArgs(sqlmock.AnyArg(), wsID).
WillReturnError(sql.ErrConnDone)
- h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
c, w := makeDownloadRequest(t, wsID, "/workspace/foo.txt")
h.Download(c)
@@ -592,7 +592,7 @@ func TestChatDownload_ForwardsToWorkspace_HappyPath(t *testing.T) {
expectURL(mock, wsID, srv.URL)
expectInboundSecret(mock, wsID, "the-secret")
- h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
c, w := makeDownloadRequest(t, wsID, "/workspace/report.txt")
h.Download(c)
@@ -634,7 +634,7 @@ func TestChatDownload_404FromWorkspacePropagated(t *testing.T) {
expectURL(mock, wsID, srv.URL)
expectInboundSecret(mock, wsID, "tok")
- h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
+ h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
c, w := makeDownloadRequest(t, wsID, "/workspace/missing.txt")
h.Download(c)
diff --git a/workspace-server/internal/handlers/class1_ast_gate_test.go b/workspace-server/internal/handlers/class1_ast_gate_test.go
new file mode 100644
index 00000000..bb362364
--- /dev/null
+++ b/workspace-server/internal/handlers/class1_ast_gate_test.go
@@ -0,0 +1,468 @@
+package handlers
+
+// class1_ast_gate_test.go — generic Class 1 leak gate per #2867 PR-A.
+//
+// What this gate prevents:
+// The tenant-hongming leak class — a handler iterates a YAML-derived
+// slice (ws.Children, sub_workspaces, etc.) and calls
+// `INSERT INTO workspaces` inside the loop body without first
+// checking whether a workspace with the same (parent_id, name) is
+// already there. Each call to such a handler doubles the tree.
+//
+// Why this is broader than TestCreateWorkspaceTree_CallsLookupBeforeInsert:
+// The existing gate is hard-coded to org_import.go's createWorkspaceTree.
+// That catches the specific function that triggered the original
+// incident — but a future handler written from scratch in a different
+// file would not be covered. This gate walks every production handler
+// .go file and applies a structural rule that does not depend on
+// function or file names.
+//
+// The rule (verbatim from #2867 PR-A):
+//
+// "No handler in handlers/ may iterate a slice (any RangeStmt) AND
+// call INSERT INTO workspaces inside the loop body without a
+// preceding SELECT id FROM workspaces WHERE name=$1 AND parent_id IS
+// NOT DISTINCT FROM $2 in the same function (== a lookupExistingChild
+// call, OR an ON CONFLICT clause baked into the same INSERT, OR an
+// explicit allowlist annotation)."
+//
+// Allowlist mechanism: a function whose body contains the exact comment
+// string `// class1-gate: idempotent-by-design` is treated as safe.
+// Use this only after writing a unit test that pins WHY the function
+// is safe. The annotation is intentionally awkward to type — it should
+// be rare.
+
+import (
+ "go/ast"
+ "go/parser"
+ "go/token"
+ "os"
+ "path/filepath"
+ "regexp"
+ "sort"
+ "strings"
+ "testing"
+)
+
+// reINSERTWorkspaces matches the exact statement shape we care about.
+// Tightened (vs bytes.Index "INSERT INTO workspaces") so the audit
+// table `workspaces_audit` literal — or any other lookalike — does not
+// false-positive trigger this gate. The same regex is used in the
+// existing createWorkspaceTree gate (workspaces_insert_allowlist_test.go)
+// — keep them in sync if either changes.
+var reINSERTWorkspaces = regexp.MustCompile(`(?m)^\s*INSERT INTO workspaces\s*\(`)
+
+// reONCONFLICT matches ON CONFLICT clauses anywhere in the same SQL
+// literal. An UPSERT (INSERT ... ON CONFLICT ... DO UPDATE) is
+// idempotent by definition, so the gate exempts it.
+var reONCONFLICT = regexp.MustCompile(`(?i)\bON CONFLICT\b`)
+
+// gateAllowlistComment is the magic comment a function author writes
+// to opt out of this gate. Forces an explicit decision.
+const gateAllowlistComment = "// class1-gate: idempotent-by-design"
+
+// preflightCallNames are function names whose presence in a function
+// body counts as "did a SELECT-by-(parent_id, name) preflight". Add
+// new names here as new preflight helpers are introduced. Keep the
+// list TIGHT — any sloppy addition weakens the gate.
+var preflightCallNames = map[string]bool{
+ "lookupExistingChild": true,
+}
+
+// TestClass1_NoUnpreflightedInsertInsideRange walks every production
+// .go file in this package, parses the AST, and fails the test if any
+// FuncDecl violates the rule above.
+//
+// Failure message must include: file path, function name, line of
+// the offending INSERT, line of the enclosing range, and a hint at
+// the three escape hatches (preflight call, ON CONFLICT, allowlist
+// comment).
+func TestClass1_NoUnpreflightedInsertInsideRange(t *testing.T) {
+ wd, err := os.Getwd()
+ if err != nil {
+ t.Fatalf("getwd: %v", err)
+ }
+
+ entries, err := os.ReadDir(wd)
+ if err != nil {
+ t.Fatalf("readdir %s: %v", wd, err)
+ }
+
+ type violation struct {
+ file string
+ fn string
+ insertLine int
+ rangeLine int
+ }
+ var violations []violation
+ scanned := 0
+
+ for _, e := range entries {
+ name := e.Name()
+ if e.IsDir() || !strings.HasSuffix(name, ".go") {
+ continue
+ }
+ if strings.HasSuffix(name, "_test.go") {
+ continue
+ }
+ path := filepath.Join(wd, name)
+ src, err := os.ReadFile(path)
+ if err != nil {
+ t.Fatalf("read %s: %v", path, err)
+ }
+ fset := token.NewFileSet()
+ file, err := parser.ParseFile(fset, name, src, parser.ParseComments)
+ if err != nil {
+ t.Fatalf("parse %s: %v", path, err)
+ }
+ scanned++
+
+ // Walk every function declaration and apply the rule.
+ for _, decl := range file.Decls {
+ fd, ok := decl.(*ast.FuncDecl)
+ if !ok || fd.Body == nil {
+ continue
+ }
+
+ // Allowlist: skip if the function body contains the magic
+ // comment. We check via the source range of the function
+ // — comments inside the body are in file.Comments and
+ // must overlap the function's Pos/End range.
+ if functionHasAllowlistComment(file, fd) {
+ continue
+ }
+
+ // First pass: locate every INSERT INTO workspaces literal
+ // in this function. We treat each such literal as a
+ // candidate violation and try to clear it via the rules.
+ candidates := findInsertWorkspacesLiterals(fd, src, fset)
+ if len(candidates) == 0 {
+ continue
+ }
+
+ // Has the function called a preflight helper? Single
+ // pass — if any preflight name appears, every INSERT in
+ // the function is considered preflighted. This is more
+ // permissive than position-aware (preflight could be
+ // AFTER the INSERT and still satisfy the gate), but the
+ // existing org_import.go gate already pins the position
+ // invariant for createWorkspaceTree, and a function that
+ // preflights AFTER inserting would fail the position
+ // gate in a separate test.
+ hasPreflight := functionCallsAny(fd, preflightCallNames)
+
+ for _, c := range candidates {
+ if c.hasONCONFLICT {
+ continue
+ }
+ if hasPreflight {
+ continue
+ }
+ if c.enclosingRangeLine == 0 {
+ // INSERT not inside any RangeStmt — single-shot,
+ // not the bug pattern.
+ continue
+ }
+ violations = append(violations, violation{
+ file: name,
+ fn: fd.Name.Name,
+ insertLine: c.insertLine,
+ rangeLine: c.enclosingRangeLine,
+ })
+ }
+ }
+ }
+
+ if scanned == 0 {
+ t.Fatal("scanned 0 .go files — wrong working directory? gate would always pass")
+ }
+
+ if len(violations) > 0 {
+ // Stable sort so the failure message is deterministic across
+ // reruns.
+ sort.Slice(violations, func(i, j int) bool {
+ if violations[i].file != violations[j].file {
+ return violations[i].file < violations[j].file
+ }
+ return violations[i].insertLine < violations[j].insertLine
+ })
+ var b strings.Builder
+ b.WriteString("Class 1 leak gate (#2867 PR-A) — these handler functions iterate a slice and INSERT INTO workspaces inside the loop body without a (parent_id, name) preflight.\n\n")
+ b.WriteString("This is the bug shape that triggered the tenant-hongming leak (TeamHandler.Expand re-inserting the entire sub_workspaces tree on every call). To fix any reported violation, choose ONE of:\n")
+ b.WriteString(" 1. Call h.lookupExistingChild(ctx, name, parentID) before the INSERT and skip the INSERT when it returns existing=true. (preferred)\n")
+ b.WriteString(" 2. Use INSERT ... ON CONFLICT ... DO ... (idempotent UPSERT, like registry.go).\n")
+ b.WriteString(" 3. Annotate the function with a `// class1-gate: idempotent-by-design` comment AND a unit test that pins why the function is structurally idempotent. (rare; require code review)\n\n")
+ b.WriteString("Violations:\n")
+ for _, v := range violations {
+ b.WriteString(" - ")
+ b.WriteString(v.file)
+ b.WriteString(":")
+ b.WriteString(itoa(v.insertLine))
+ b.WriteString(" — function ")
+ b.WriteString(v.fn)
+ b.WriteString("() INSERTs inside RangeStmt at line ")
+ b.WriteString(itoa(v.rangeLine))
+ b.WriteString("\n")
+ }
+ t.Fatal(b.String())
+ }
+}
+
+func itoa(n int) string {
+ // Avoid strconv import for one call site — keeps the test focused.
+ if n == 0 {
+ return "0"
+ }
+ neg := n < 0
+ if neg {
+ n = -n
+ }
+ var buf [20]byte
+ i := len(buf)
+ for n > 0 {
+ i--
+ buf[i] = byte('0' + n%10)
+ n /= 10
+ }
+ if neg {
+ i--
+ buf[i] = '-'
+ }
+ return string(buf[i:])
+}
+
+// candidateInsert holds the per-INSERT facts needed to decide whether
+// the gate fires.
+type candidateInsert struct {
+ insertLine int
+ hasONCONFLICT bool
+ enclosingRangeLine int // 0 means not inside any range
+}
+
+// findInsertWorkspacesLiterals walks fd's body and returns one
+// candidateInsert per INSERT INTO workspaces string literal.
+//
+// Position-based detection: collect every RangeStmt's body span first,
+// then for each INSERT literal check if its position is inside any
+// span. ast.Inspect's nil-call ordering does NOT give per-node pop
+// semantics, so a stack-based approach against ast.Inspect would
+// silently miscount. Position spans are deterministic and easy to
+// reason about.
+func findInsertWorkspacesLiterals(fd *ast.FuncDecl, src []byte, fset *token.FileSet) []candidateInsert {
+ var out []candidateInsert
+
+ type span struct{ start, end token.Pos }
+ var ranges []span
+ ast.Inspect(fd.Body, func(n ast.Node) bool {
+ rs, ok := n.(*ast.RangeStmt)
+ if !ok || rs.Body == nil {
+ return true
+ }
+ ranges = append(ranges, span{rs.Body.Lbrace, rs.Body.Rbrace})
+ return true
+ })
+
+ enclosingRangeLineFor := func(p token.Pos) int {
+ // Pick the innermost enclosing range — i.e., the one with the
+ // largest start that still covers p. Innermost is the one
+ // whose body actually contains the INSERT, which is the line
+ // most useful in a violation message.
+ bestStart := token.NoPos
+ bestLine := 0
+ for _, s := range ranges {
+ if p > s.start && p < s.end && s.start > bestStart {
+ bestStart = s.start
+ bestLine = fset.Position(s.start).Line
+ }
+ }
+ return bestLine
+ }
+
+ ast.Inspect(fd.Body, func(n ast.Node) bool {
+ bl, ok := n.(*ast.BasicLit)
+ if !ok || bl.Kind != token.STRING {
+ return true
+ }
+ // Strip surrounding backticks/quotes — value includes them.
+ lit := bl.Value
+ if len(lit) >= 2 {
+ lit = lit[1 : len(lit)-1]
+ }
+ if !reINSERTWorkspaces.MatchString(lit) {
+ return true
+ }
+ out = append(out, candidateInsert{
+ insertLine: fset.Position(bl.Pos()).Line,
+ hasONCONFLICT: reONCONFLICT.MatchString(lit),
+ enclosingRangeLine: enclosingRangeLineFor(bl.Pos()),
+ })
+ return true
+ })
+ return out
+}
+
+// functionCallsAny returns true if any CallExpr in fd's body has a
+// function name (either a SelectorExpr Sel.Name or an Ident name)
+// matching a key in names.
+func functionCallsAny(fd *ast.FuncDecl, names map[string]bool) bool {
+ found := false
+ ast.Inspect(fd.Body, func(n ast.Node) bool {
+ if found {
+ return false
+ }
+ ce, ok := n.(*ast.CallExpr)
+ if !ok {
+ return true
+ }
+ switch fun := ce.Fun.(type) {
+ case *ast.Ident:
+ if names[fun.Name] {
+ found = true
+ return false
+ }
+ case *ast.SelectorExpr:
+ if names[fun.Sel.Name] {
+ found = true
+ return false
+ }
+ }
+ return true
+ })
+ return found
+}
+
+// functionHasAllowlistComment returns true if the function body
+// (between fd.Body.Lbrace and fd.Body.Rbrace) contains a comment
+// equal to gateAllowlistComment.
+func functionHasAllowlistComment(file *ast.File, fd *ast.FuncDecl) bool {
+ if fd.Body == nil {
+ return false
+ }
+ start := fd.Body.Lbrace
+ end := fd.Body.Rbrace
+ for _, cg := range file.Comments {
+ for _, c := range cg.List {
+ if c.Pos() < start || c.Pos() > end {
+ continue
+ }
+ if strings.TrimSpace(c.Text) == gateAllowlistComment {
+ return true
+ }
+ }
+ }
+ return false
+}
+
+// TestClass1_GateFiresOnSyntheticBuggySource — proves the gate actually
+// catches the bug shape it's named after. Without this, a regression
+// to "always pass" would not be noticed until the leak shipped again.
+// Per memory feedback_assert_exact_not_substring.md: tighten the test
+// + verify it FAILS on old-shape source before merging.
+func TestClass1_GateFiresOnSyntheticBuggySource(t *testing.T) {
+ const buggySrc = `package handlers
+
+import "context"
+
+type fakeDB struct{}
+func (fakeDB) ExecContext(ctx context.Context, sql string, args ...interface{}) {}
+
+func buggyExpand(db fakeDB, ctx context.Context, children []string) {
+ for _, child := range children {
+ // Bug shape: INSERT inside the range body, no preflight.
+ db.ExecContext(ctx, ` + "`INSERT INTO workspaces (id, name) VALUES ($1, $2)`" + `, "x", child)
+ }
+}
+`
+ fset := token.NewFileSet()
+ file, err := parser.ParseFile(fset, "buggy.go", buggySrc, parser.ParseComments)
+ if err != nil {
+ t.Fatalf("parse synthetic source: %v", err)
+ }
+ for _, decl := range file.Decls {
+ fd, ok := decl.(*ast.FuncDecl)
+ if !ok || fd.Name.Name != "buggyExpand" {
+ continue
+ }
+ candidates := findInsertWorkspacesLiterals(fd, []byte(buggySrc), fset)
+ if len(candidates) != 1 {
+ t.Fatalf("expected 1 INSERT literal, got %d", len(candidates))
+ }
+ c := candidates[0]
+ if c.enclosingRangeLine == 0 {
+ t.Errorf("synthetic INSERT inside `for _, child := range` should be detected as enclosed by range, got enclosingRangeLine=0 — gate would miss the bug shape")
+ }
+ if c.hasONCONFLICT {
+ t.Errorf("synthetic INSERT has no ON CONFLICT, gate falsely treated it as idempotent")
+ }
+ if functionCallsAny(fd, preflightCallNames) {
+ t.Errorf("synthetic function does not call lookupExistingChild — gate falsely treated it as preflighted")
+ }
+ // All three guards say the gate WOULD fire. Pass.
+ return
+ }
+ t.Fatal("buggyExpand FuncDecl not found in synthetic source")
+}
+
+// TestClass1_GateAllowsONCONFLICT — pins that an INSERT with ON
+// CONFLICT inside a range body is NOT flagged. registry.go's
+// upsert pattern is the prod example.
+func TestClass1_GateAllowsONCONFLICT(t *testing.T) {
+ const safeSrc = `package handlers
+
+import "context"
+
+type fakeDB struct{}
+func (fakeDB) ExecContext(ctx context.Context, sql string, args ...interface{}) {}
+
+func upsertLoop(db fakeDB, ctx context.Context, children []string) {
+ for _, child := range children {
+ db.ExecContext(ctx, ` + "`INSERT INTO workspaces (id, name) VALUES ($1, $2) ON CONFLICT (id) DO UPDATE SET name = $2`" + `, "x", child)
+ }
+}
+`
+ fset := token.NewFileSet()
+ file, _ := parser.ParseFile(fset, "safe.go", safeSrc, parser.ParseComments)
+ for _, decl := range file.Decls {
+ fd, ok := decl.(*ast.FuncDecl)
+ if !ok || fd.Name.Name != "upsertLoop" {
+ continue
+ }
+ candidates := findInsertWorkspacesLiterals(fd, []byte(safeSrc), fset)
+ if len(candidates) != 1 {
+ t.Fatalf("expected 1 candidate, got %d", len(candidates))
+ }
+ if !candidates[0].hasONCONFLICT {
+ t.Errorf("ON CONFLICT clause should be detected, was missed — gate would falsely flag idempotent UPSERTs")
+ }
+ }
+}
+
+// TestClass1_GateAllowsAllowlistAnnotation — pins the escape hatch
+// works. Annotated functions are skipped at the FuncDecl level.
+func TestClass1_GateAllowsAllowlistAnnotation(t *testing.T) {
+ const annotatedSrc = `package handlers
+
+import "context"
+
+type fakeDB struct{}
+func (fakeDB) ExecContext(ctx context.Context, sql string, args ...interface{}) {}
+
+func intentionallyUnpreflighted(db fakeDB, ctx context.Context, children []string) {
+ // class1-gate: idempotent-by-design
+ for _, child := range children {
+ db.ExecContext(ctx, ` + "`INSERT INTO workspaces (id, name) VALUES ($1, $2)`" + `, "x", child)
+ }
+}
+`
+ fset := token.NewFileSet()
+ file, _ := parser.ParseFile(fset, "annotated.go", annotatedSrc, parser.ParseComments)
+ for _, decl := range file.Decls {
+ fd, ok := decl.(*ast.FuncDecl)
+ if !ok || fd.Name.Name != "intentionallyUnpreflighted" {
+ continue
+ }
+ if !functionHasAllowlistComment(file, fd) {
+ t.Error("allowlist comment should be detected for the intentionallyUnpreflighted function — escape hatch not working")
+ }
+ }
+}
diff --git a/workspace-server/internal/handlers/org_import.go b/workspace-server/internal/handlers/org_import.go
index 70151e09..3dfe2fbd 100644
--- a/workspace-server/internal/handlers/org_import.go
+++ b/workspace-server/internal/handlers/org_import.go
@@ -7,6 +7,7 @@ import (
"context"
"database/sql"
"encoding/json"
+ "errors"
"fmt"
"log"
"os"
@@ -21,6 +22,7 @@ import (
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
+ "github.com/Molecule-AI/molecule-monorepo/platform/internal/provlog"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/scheduler"
"github.com/google/uuid"
)
@@ -61,10 +63,33 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
tier = defaults.Tier
}
if tier == 0 {
- tier = 2
+ // Resolved via the same DefaultTier helper Create + Templates
+ // use (#2910 PR-E). SaaS → T4 (one container per sibling EC2,
+ // no neighbour to protect from), self-hosted → T3. Pre-#2910
+ // this path returned T2 on self-hosted, asymmetric with
+ // workspace.go's T3 — undocumented drift. Lifting to
+ // DefaultTier collapses both call sites onto one source of
+ // truth so a future tier-default change sweeps every entry
+ // point at once. Templates that want a different floor still
+ // declare `tier:` in config.yaml or `defaults.tier` in
+ // org.yaml.
+ if h.workspace != nil {
+ tier = h.workspace.DefaultTier()
+ } else {
+ tier = 3
+ }
}
- ctxLookup := context.Background()
+ // 5s timeout bounds the lookup independently of any HTTP request
+ // context. createWorkspaceTree runs in goroutines spawned from the
+ // /org/import handler, so plumbing the request context here would
+ // cascade-cancel into provisionWorkspaceAuto and abort in-flight
+ // EC2 provisioning if the client disconnected mid-import — that's
+ // the wrong behaviour. A short bounded timeout protects the
+ // per-row SELECT against a wedged DB without taking the
+ // drop-everything-on-disconnect tradeoff.
+ ctxLookup, cancelLookup := context.WithTimeout(context.Background(), 5*time.Second)
+ defer cancelLookup()
// Idempotency: if a workspace with the same (parent_id, name) already
// exists, skip the INSERT + canvas_layouts + broadcast + provisioning.
// This is what makes /org/import safe to call multiple times — the
@@ -76,12 +101,31 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
// (parent exists, some children missing) backfill the missing children
// instead of either no-op'ing the whole subtree or duplicating the
// existing children.
+ //
+ // /org/import is ADDITIVE-ONLY, never destructive. Children present
+ // in the existing tree but absent from the new template are
+ // preserved (no DELETE on diff). Skip-path also does NOT propagate
+ // updates to existing nodes — a re-import that adds an
+ // initial_memory or schedule to an existing workspace is silently
+ // dropped (the function bypasses seedInitialMemories, schedule SQL,
+ // channel config for skipped rows). To force-update an existing
+ // tree, delete and re-import or use a future /org/sync route.
existingID, existing, lookupErr := h.lookupExistingChild(ctxLookup, ws.Name, parentID)
if lookupErr != nil {
return fmt.Errorf("idempotency check for %s: %w", ws.Name, lookupErr)
}
if existing {
log.Printf("Org import: %q already exists (id=%s) — skipping create+provision, recursing into children for partial-match", ws.Name, existingID)
+ parentRef := ""
+ if parentID != nil {
+ parentRef = *parentID
+ }
+ provlog.Event("provision.skip_existing", map[string]any{
+ "name": ws.Name,
+ "existing_id": existingID,
+ "parent_id": parentRef,
+ "tier": tier,
+ })
*results = append(*results, map[string]interface{}{
"id": existingID,
"name": ws.Name,
@@ -580,6 +624,12 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
//
// On sql.ErrNoRows: returns ("", false, nil) — caller should INSERT.
// On a real DB error: returns ("", false, err) — caller propagates.
+//
+// errors.Is is wrap-safe — a future caller wrapping the error
+// (database/sql can wrap driver errors with %w in some setups) would
+// silently break a `err == sql.ErrNoRows` equality check, causing the
+// no-rows path to fall through to the "real DB error" branch and
+// abort the import. errors.Is unwraps.
func (h *OrgHandler) lookupExistingChild(ctx context.Context, name string, parentID *string) (string, bool, error) {
var existingID string
err := db.DB.QueryRowContext(ctx, `
@@ -589,7 +639,7 @@ func (h *OrgHandler) lookupExistingChild(ctx context.Context, name string, paren
AND status != 'removed'
LIMIT 1
`, name, parentID).Scan(&existingID)
- if err == sql.ErrNoRows {
+ if errors.Is(err, sql.ErrNoRows) {
return "", false, nil
}
if err != nil {
diff --git a/workspace-server/internal/handlers/org_import_idempotency_test.go b/workspace-server/internal/handlers/org_import_idempotency_test.go
index 0d7498fb..1f2955cb 100644
--- a/workspace-server/internal/handlers/org_import_idempotency_test.go
+++ b/workspace-server/internal/handlers/org_import_idempotency_test.go
@@ -1,11 +1,17 @@
package handlers
import (
- "bytes"
"context"
+ "database/sql"
"errors"
+ "fmt"
+ "go/ast"
+ "go/parser"
+ "go/token"
"os"
"path/filepath"
+ "regexp"
+ "strconv"
"strings"
"testing"
@@ -119,6 +125,90 @@ func TestLookupExistingChild_DBError_Propagates(t *testing.T) {
}
}
+// TestLookupExistingChild_WrappedNoRows_TreatedAsNotFound — pins the
+// wrap-safety of the errors.Is(err, sql.ErrNoRows) check. The previous
+// `err == sql.ErrNoRows` equality would fall through to the
+// "real DB error" branch on a wrapped no-rows error, aborting the
+// import for what is in fact the no-rows happy path. driver/sql
+// wrapping is currently a non-issue but a future driver change or a
+// caller that wraps the result via fmt.Errorf("…: %w", err) would
+// silently break the equality check. errors.Is unwraps.
+func TestLookupExistingChild_WrappedNoRows_TreatedAsNotFound(t *testing.T) {
+ mock := setupTestDB(t)
+ parent := "parent-1"
+ wrapped := fmt.Errorf("driver-wrapped: %w", sql.ErrNoRows)
+ mock.ExpectQuery(`SELECT id FROM workspaces`).
+ WithArgs("Alpha", &parent).
+ WillReturnError(wrapped)
+
+ h := &OrgHandler{}
+ id, found, err := h.lookupExistingChild(context.Background(), "Alpha", &parent)
+
+ if err != nil {
+ t.Fatalf("expected wrapped no-rows to be treated as not-found (err=nil), got: %v", err)
+ }
+ if found {
+ t.Errorf("expected found=false on wrapped no-rows, got found=true")
+ }
+ if id != "" {
+ t.Errorf("expected empty id on wrapped no-rows, got %q", id)
+ }
+}
+
+// workspacesInsertRE matches a SQL literal that begins (after optional
+// leading whitespace) with `INSERT INTO workspaces` followed by `(` —
+// requiring the open-paren rules out lookalikes like
+// `INSERT INTO workspaces_audit`, `INSERT INTO workspace_secrets`,
+// `INSERT INTO workspace_channels`, `INSERT INTO canvas_layouts`. The
+// previous bytes.Index gate accepted `workspaces_audit` as a prefix
+// match — see RFC #2872 Important-1 for the silent-false-pass shape.
+var workspacesInsertRE = regexp.MustCompile(`(?s)^\s*INSERT\s+INTO\s+workspaces\s*\(`)
+
+// findLookupAndWorkspacesInsertPos walks the AST of `src` and returns
+// the source positions of (a) the first call to `lookupExistingChild`
+// and (b) the first CallExpr whose argument list contains a STRING
+// BasicLit matching workspacesInsertRE. Either may be token.NoPos if
+// not found.
+//
+// Extracted as a helper so the gate logic can be exercised against
+// synthetic source — TestGate_FailsWhenLookupAfterInsert below proves
+// the gate actually catches the bug shape, not just the happy path.
+func findLookupAndWorkspacesInsertPos(t *testing.T, fname string, src []byte) (lookupPos, insertPos token.Pos, fset *token.FileSet) {
+ t.Helper()
+ fset = token.NewFileSet()
+ file, err := parser.ParseFile(fset, fname, src, parser.ParseComments)
+ if err != nil {
+ t.Fatalf("parse %s: %v", fname, err)
+ }
+ lookupPos, insertPos = token.NoPos, token.NoPos
+ ast.Inspect(file, func(n ast.Node) bool {
+ call, ok := n.(*ast.CallExpr)
+ if !ok {
+ return true
+ }
+ if sel, ok := call.Fun.(*ast.SelectorExpr); ok {
+ if sel.Sel.Name == "lookupExistingChild" && lookupPos == token.NoPos {
+ lookupPos = call.Pos()
+ }
+ }
+ for _, arg := range call.Args {
+ lit, ok := arg.(*ast.BasicLit)
+ if !ok || lit.Kind != token.STRING {
+ continue
+ }
+ raw := lit.Value
+ if unq, err := strconv.Unquote(raw); err == nil {
+ raw = unq
+ }
+ if workspacesInsertRE.MatchString(raw) && insertPos == token.NoPos {
+ insertPos = call.Pos()
+ }
+ }
+ return true
+ })
+ return
+}
+
// Source-level guard — pins that org_import.go calls
// h.lookupExistingChild BEFORE its INSERT INTO workspaces.
//
@@ -126,6 +216,11 @@ func TestLookupExistingChild_DBError_Propagates(t *testing.T) {
// (idempotency check before INSERT), not just function names. If a
// future refactor reintroduces the un-checked INSERT (the original
// bug shape that leaked 72 workspaces in 4 days), this test fails.
+//
+// AST-walk implementation closes the silent-false-pass mode that the
+// previous bytes.Index gate had — see workspacesInsertRE comment for
+// the failure mode (workspaces_audit / workspace_secrets / etc.
+// shadowing the real target via prefix match).
func TestCreateWorkspaceTree_CallsLookupBeforeInsert(t *testing.T) {
wd, err := os.Getwd()
if err != nil {
@@ -135,17 +230,189 @@ func TestCreateWorkspaceTree_CallsLookupBeforeInsert(t *testing.T) {
if err != nil {
t.Fatalf("read org_import.go: %v", err)
}
+ lookupPos, insertPos, fset := findLookupAndWorkspacesInsertPos(t, "org_import.go", src)
- lookupAt := bytes.Index(src, []byte("h.lookupExistingChild("))
- insertAt := bytes.Index(src, []byte("INSERT INTO workspaces"))
-
- if lookupAt < 0 {
- t.Fatalf("org_import.go missing call to h.lookupExistingChild — idempotency check removed?")
+ if lookupPos == token.NoPos {
+ t.Fatalf("AST: no call to lookupExistingChild in org_import.go — idempotency check removed?")
}
- if insertAt < 0 {
- t.Fatalf("org_import.go missing INSERT INTO workspaces — schema change?")
+ if insertPos == token.NoPos {
+ t.Fatalf("AST: no SQL literal matching `^\\s*INSERT INTO workspaces\\s*\\(` in any CallExpr in org_import.go — schema change or rename?")
}
- if lookupAt > insertAt {
- t.Errorf("h.lookupExistingChild must come BEFORE INSERT INTO workspaces in org_import.go (lookup@%d, insert@%d) — non-idempotent ordering would re-leak under repeat /org/import calls", lookupAt, insertAt)
+ if lookupPos > insertPos {
+ t.Errorf("lookupExistingChild call at %s must come BEFORE INSERT INTO workspaces at %s — non-idempotent ordering would re-leak under repeat /org/import calls",
+ fset.Position(lookupPos), fset.Position(insertPos))
+ }
+}
+
+// TestGate_FailsWhenLookupAfterInsert proves the gate actually catches
+// the bug it's named after — running it against synthetic Go source
+// where the lookup call is positioned AFTER the workspaces INSERT must
+// produce lookupPos > insertPos, which the production gate flags as
+// an ERROR. Without this test the gate could regress to "always pass"
+// and we wouldn't notice until the bug shipped again.
+//
+// Per memory feedback_assert_exact_not_substring.md: verify a
+// tightened test FAILS on old code before merging.
+func TestGate_FailsWhenLookupAfterInsert(t *testing.T) {
+ const buggySrc = `package handlers
+
+import "context"
+
+type fakeDB struct{}
+
+func (fakeDB) ExecContext(ctx context.Context, sql string, args ...interface{}) {}
+
+type fakeOrgHandler struct{}
+
+func (h *fakeOrgHandler) lookupExistingChild(ctx context.Context, name string, parentID *string) (string, bool, error) {
+ return "", false, nil
+}
+
+func buggyCreate(h *fakeOrgHandler, db fakeDB, ctx context.Context, name string, parentID *string) {
+ // Bug shape: INSERT runs FIRST, lookup runs AFTER. This is the
+ // non-idempotent ordering the gate exists to forbid.
+ db.ExecContext(ctx, ` + "`INSERT INTO workspaces (id, name) VALUES ($1, $2)`" + `, "x", name)
+ h.lookupExistingChild(ctx, name, parentID)
+}
+`
+ lookupPos, insertPos, _ := findLookupAndWorkspacesInsertPos(t, "buggy.go", []byte(buggySrc))
+ if lookupPos == token.NoPos || insertPos == token.NoPos {
+ t.Fatalf("synthetic buggy source missing expected nodes (lookupPos=%v insertPos=%v) — helper logic regression", lookupPos, insertPos)
+ }
+ if lookupPos < insertPos {
+ t.Fatalf("synthetic bug shape (lookup AFTER insert) returned lookupPos=%d < insertPos=%d — gate would NOT fire on actual bug, regression!", lookupPos, insertPos)
+ }
+ // Implicit: lookupPos > insertPos here, which the production gate
+ // flags via t.Errorf. This proves the gate is live, not vestigial.
+}
+
+// TestGate_IgnoresAuditTableShadow proves the regex tightening
+// actually ignores `INSERT INTO workspaces_audit` literals — the
+// specific shape #2872 cited as the silent-false-pass failure mode
+// for the previous bytes.Index gate.
+func TestGate_IgnoresAuditTableShadow(t *testing.T) {
+ // Synthetic source with audit-table INSERT at line 1 (would be
+ // position 0 under prefix-match) and lookup + real INSERT at later
+ // positions. With the tightened regex, the audit literal is
+ // ignored: insertPos points at the REAL INSERT, lookup precedes it,
+ // gate passes correctly.
+ const src = `package handlers
+
+import "context"
+
+type fakeDB struct{}
+
+func (fakeDB) ExecContext(ctx context.Context, sql string, args ...interface{}) {}
+
+type fakeOrgHandler struct{}
+
+func (h *fakeOrgHandler) lookupExistingChild(ctx context.Context, name string, parentID *string) (string, bool, error) {
+ return "", false, nil
+}
+
+func okCreateWithAudit(h *fakeOrgHandler, db fakeDB, ctx context.Context, name string, parentID *string) {
+ // Audit-table INSERT — should be IGNORED by the tightened regex.
+ db.ExecContext(ctx, ` + "`INSERT INTO workspaces_audit (id, action) VALUES ($1, $2)`" + `, "x", "create_attempt")
+ // Lookup BEFORE real INSERT — correct order.
+ h.lookupExistingChild(ctx, name, parentID)
+ // Real INSERT.
+ db.ExecContext(ctx, ` + "`INSERT INTO workspaces (id, name) VALUES ($1, $2)`" + `, "x", name)
+}
+`
+ lookupPos, insertPos, fset := findLookupAndWorkspacesInsertPos(t, "shadow.go", []byte(src))
+ if lookupPos == token.NoPos || insertPos == token.NoPos {
+ t.Fatalf("expected to find lookup + real INSERT, got lookupPos=%v insertPos=%v", lookupPos, insertPos)
+ }
+ // The audit-table INSERT is at line ~16 (column ~20-ish), the
+ // lookup is at line 19, the real INSERT is at line 21. If the
+ // regex regressed to prefix-match, insertPos would point at the
+ // audit literal at line 16, and the gate would falsely fail
+ // (lookup at 19 > "insert" at 16). With the tightened regex,
+ // insertPos correctly points at line 21, and the gate passes.
+ insertLine := fset.Position(insertPos).Line
+ lookupLine := fset.Position(lookupPos).Line
+ if insertLine < lookupLine {
+ t.Errorf("regex regressed: audit shadow at line %d swallowed real INSERT (lookup at line %d). insertPos should point at the real INSERT (line ~21), not the audit literal.",
+ insertLine, lookupLine)
+ }
+ if lookupPos > insertPos {
+ t.Errorf("synthetic source has lookup at line %d before real INSERT at line %d, gate should pass (lookupPos < insertPos), got lookupPos=%d > insertPos=%d",
+ lookupLine, insertLine, lookupPos, insertPos)
+ }
+}
+
+// TestWorkspacesInsertRE_RejectsLookalikes pins the regex that
+// discriminates the real workspaces INSERT from prefix-matching
+// lookalikes. If this regex regresses to a substring match, the
+// AST gate above silently false-passes when a future refactor
+// shadows the real INSERT with a workspaces_audit / workspace_secrets
+// / canvas_layouts literal placed earlier in source.
+func TestWorkspacesInsertRE_RejectsLookalikes(t *testing.T) {
+ cases := []struct {
+ sql string
+ want bool
+ comment string
+ }{
+ {"INSERT INTO workspaces (id, name) VALUES ($1, $2)", true, "real target"},
+ {"\n\t\tINSERT INTO workspaces (id, name)\n\t\tVALUES ($1, $2)", true, "real target with leading whitespace + newlines (raw string literal shape)"},
+ {"INSERT INTO workspaces_audit (id) VALUES ($1)", false, "underscore-suffix lookalike (the #2872 specific failure mode)"},
+ {"INSERT INTO workspace_secrets (key, value) VALUES ($1, $2)", false, "prefix without trailing 's' (workspace_*)"},
+ {"INSERT INTO workspace_channels (id) VALUES ($1)", false, "another workspace_* prefix"},
+ {"INSERT INTO canvas_layouts (workspace_id, x, y) VALUES ($1, $2, $3)", false, "unrelated table that contains 'workspace' in a column ref"},
+ {"UPDATE workspaces SET status='running' WHERE id=$1", false, "UPDATE shouldn't match"},
+ {"SELECT * FROM workspaces WHERE id=$1", false, "SELECT shouldn't match"},
+ {"-- comment about INSERT INTO workspaces (\nSELECT 1", false, "comment shouldn't match"},
+ }
+ for _, c := range cases {
+ got := workspacesInsertRE.MatchString(c.sql)
+ if got != c.want {
+ t.Errorf("workspacesInsertRE.MatchString(%q) = %v, want %v (%s)", c.sql, got, c.want, c.comment)
+ }
+ }
+}
+
+// Confirm the regex actually matches the literal currently in
+// org_import.go. Pins the shape so `gofmt` reflows or trivial edits
+// to the SQL string don't silently disable the gate above.
+func TestWorkspacesInsertRE_MatchesActualSourceLiteral(t *testing.T) {
+ wd, err := os.Getwd()
+ if err != nil {
+ t.Fatalf("getwd: %v", err)
+ }
+ src, err := os.ReadFile(filepath.Join(wd, "org_import.go"))
+ if err != nil {
+ t.Fatalf("read org_import.go: %v", err)
+ }
+ // Strip backtick strings, find any whose content matches.
+ // Walk the source via parser.ParseFile to avoid string-search
+ // drift if the literal is reflowed.
+ fset := token.NewFileSet()
+ file, err := parser.ParseFile(fset, filepath.Join(wd, "org_import.go"), src, parser.ParseComments)
+ if err != nil {
+ t.Fatalf("parse org_import.go: %v", err)
+ }
+ var matched bool
+ ast.Inspect(file, func(n ast.Node) bool {
+ lit, ok := n.(*ast.BasicLit)
+ if !ok || lit.Kind != token.STRING {
+ return true
+ }
+ raw := lit.Value
+ if unq, err := strconv.Unquote(raw); err == nil {
+ raw = unq
+ }
+ if workspacesInsertRE.MatchString(raw) {
+ matched = true
+ }
+ return true
+ })
+ if !matched {
+ t.Fatalf("no SQL literal in org_import.go matches workspacesInsertRE — gate is dead. Either the INSERT was renamed (update the regex) or the file was restructured (review the gate logic).")
+ }
+ // strings.Contains keeps the test informative: if the regex
+ // stopped matching but the literal source still contains the
+ // magic phrase, that's a regex-side failure (test the fix above).
+ if !strings.Contains(string(src), "INSERT INTO workspaces") {
+ t.Fatalf("org_import.go has no `INSERT INTO workspaces` substring at all — schema change?")
}
}
diff --git a/workspace-server/internal/handlers/pending_uploads_integration_test.go b/workspace-server/internal/handlers/pending_uploads_integration_test.go
new file mode 100644
index 00000000..61c64f86
--- /dev/null
+++ b/workspace-server/internal/handlers/pending_uploads_integration_test.go
@@ -0,0 +1,476 @@
+//go:build integration
+// +build integration
+
+// pending_uploads_integration_test.go — REAL Postgres integration
+// tests for the poll-mode chat upload flow (RFC: phases 1–3).
+//
+// Run with:
+//
+// docker run --rm -d --name pg-integration \
+// -e POSTGRES_PASSWORD=test -e POSTGRES_DB=molecule \
+// -p 55432:5432 postgres:15-alpine
+// sleep 4
+// psql ... < workspace-server/migrations/20260505100000_pending_uploads.up.sql
+// cd workspace-server
+// INTEGRATION_DB_URL="postgres://postgres:test@localhost:55432/molecule?sslmode=disable" \
+// go test -tags=integration ./internal/handlers/ -run Integration_PendingUploads
+//
+// CI (.github/workflows/handlers-postgres-integration.yml) runs this on
+// every PR that touches workspace-server/internal/handlers/** OR
+// workspace-server/migrations/**.
+//
+// Why these are NOT plain unit tests
+// ----------------------------------
+// The strict-sqlmock unit tests in storage_test.go pin which SQL
+// statements fire — they are fast and let us iterate without a DB. But
+// sqlmock CANNOT detect bugs that depend on the actual row state after
+// the SQL runs. In particular:
+//
+// - the WITH … DELETE … RETURNING CTE used by Sweep depends on
+// Postgres' `make_interval` function and the table's CHECK
+// constraints. sqlmock would happily accept a hand-written SQL
+// literal that Postgres rejects at runtime.
+// - the partial index `idx_pending_uploads_unacked` (created by the
+// Phase 1 migration) only catches a wrong WHERE predicate at real-
+// query-plan time.
+//
+// These tests close those gaps by booting a real Postgres, running the
+// production helpers, and SELECTing the row to verify the observable
+// state matches the expected outcome.
+
+package handlers
+
+import (
+ "context"
+ "database/sql"
+ "os"
+ "strings"
+ "testing"
+ "time"
+
+ "github.com/google/uuid"
+ _ "github.com/lib/pq"
+
+ "github.com/Molecule-AI/molecule-monorepo/platform/internal/pendinguploads"
+)
+
+// integrationDB_PendingUploads opens a connection from $INTEGRATION_DB_URL
+// (skipping the test if unset), wipes the pending_uploads table for
+// isolation, and registers a Cleanup that closes the connection.
+//
+// NOT SAFE FOR `t.Parallel()` — each test gets the table to itself.
+// Mirrors the integrationDB helper in delegation_ledger_integration_test.go
+// but kept separate so each table's wipe step is local to its tests.
+func integrationDB_PendingUploads(t *testing.T) *sql.DB {
+ t.Helper()
+ url := os.Getenv("INTEGRATION_DB_URL")
+ if url == "" {
+ t.Skip("INTEGRATION_DB_URL not set; skipping (local devs: see file header)")
+ }
+ conn, err := sql.Open("postgres", url)
+ if err != nil {
+ t.Fatalf("open: %v", err)
+ }
+ if err := conn.Ping(); err != nil {
+ t.Fatalf("ping: %v", err)
+ }
+ if _, err := conn.ExecContext(context.Background(), `DELETE FROM pending_uploads`); err != nil {
+ t.Fatalf("cleanup: %v", err)
+ }
+ t.Cleanup(func() { conn.Close() })
+ return conn
+}
+
+func TestIntegration_PendingUploads_PutGetAckRoundTrip(t *testing.T) {
+ conn := integrationDB_PendingUploads(t)
+ store := pendinguploads.NewPostgres(conn)
+ ctx := context.Background()
+
+ wsID := uuid.New()
+ fileID, err := store.Put(ctx, wsID, []byte("hello PDF"), "report.pdf", "application/pdf")
+ if err != nil {
+ t.Fatalf("Put: %v", err)
+ }
+
+ // Get reads back the row.
+ rec, err := store.Get(ctx, fileID)
+ if err != nil {
+ t.Fatalf("Get: %v", err)
+ }
+ if rec.WorkspaceID != wsID {
+ t.Errorf("workspace_id = %s, want %s", rec.WorkspaceID, wsID)
+ }
+ if string(rec.Content) != "hello PDF" {
+ t.Errorf("content = %q, want %q", rec.Content, "hello PDF")
+ }
+ if rec.Filename != "report.pdf" {
+ t.Errorf("filename = %q, want %q", rec.Filename, "report.pdf")
+ }
+ if rec.AckedAt != nil {
+ t.Errorf("AckedAt should be nil before Ack, got %v", rec.AckedAt)
+ }
+
+ // MarkFetched stamps fetched_at.
+ if err := store.MarkFetched(ctx, fileID); err != nil {
+ t.Fatalf("MarkFetched: %v", err)
+ }
+
+ // Re-read to confirm.
+ rec2, err := store.Get(ctx, fileID)
+ if err != nil {
+ t.Fatalf("Get after MarkFetched: %v", err)
+ }
+ if rec2.FetchedAt == nil {
+ t.Errorf("FetchedAt should be set after MarkFetched")
+ }
+
+ // Ack flips acked_at; subsequent Gets return ErrNotFound (acked rows
+ // are filtered out at the SELECT predicate).
+ if err := store.Ack(ctx, fileID); err != nil {
+ t.Fatalf("Ack: %v", err)
+ }
+ if _, err := store.Get(ctx, fileID); err != pendinguploads.ErrNotFound {
+ t.Errorf("Get after Ack: got %v, want ErrNotFound", err)
+ }
+
+ // Idempotent re-ack succeeds.
+ if err := store.Ack(ctx, fileID); err != nil {
+ t.Errorf("re-Ack should be idempotent, got %v", err)
+ }
+}
+
+func TestIntegration_PendingUploads_Sweep_DeletesAckedAfterRetention(t *testing.T) {
+ conn := integrationDB_PendingUploads(t)
+ store := pendinguploads.NewPostgres(conn)
+ ctx := context.Background()
+
+ wsID := uuid.New()
+ fid, err := store.Put(ctx, wsID, []byte("data"), "x.txt", "text/plain")
+ if err != nil {
+ t.Fatalf("Put: %v", err)
+ }
+ if err := store.Ack(ctx, fid); err != nil {
+ t.Fatalf("Ack: %v", err)
+ }
+
+ // retention=1h, row was acked just now → not yet eligible.
+ res, err := store.Sweep(ctx, time.Hour)
+ if err != nil {
+ t.Fatalf("Sweep(1h): %v", err)
+ }
+ if res.Total() != 0 {
+ t.Errorf("expected 0 deletions yet, got %+v", res)
+ }
+
+ // retention=0 → row IS eligible immediately.
+ res, err = store.Sweep(ctx, 0)
+ if err != nil {
+ t.Fatalf("Sweep(0): %v", err)
+ }
+ if res.Acked != 1 || res.Expired != 0 {
+ t.Errorf("expected acked=1 expired=0, got %+v", res)
+ }
+
+ // Verify row is actually gone — not just un-fetchable.
+ var n int
+ if err := conn.QueryRowContext(ctx, `SELECT COUNT(*) FROM pending_uploads WHERE file_id = $1`, fid).Scan(&n); err != nil {
+ t.Fatalf("count: %v", err)
+ }
+ if n != 0 {
+ t.Errorf("row should be DELETEd, found %d rows", n)
+ }
+}
+
+func TestIntegration_PendingUploads_Sweep_DeletesExpiredUnacked(t *testing.T) {
+ conn := integrationDB_PendingUploads(t)
+ store := pendinguploads.NewPostgres(conn)
+ ctx := context.Background()
+
+ wsID := uuid.New()
+ fid, err := store.Put(ctx, wsID, []byte("data"), "x.txt", "text/plain")
+ if err != nil {
+ t.Fatalf("Put: %v", err)
+ }
+
+ // Manually backdate expires_at so the row IS expired. We don't ack,
+ // so this exercises the unacked-and-expired branch of the WHERE
+ // clause specifically.
+ if _, err := conn.ExecContext(ctx,
+ `UPDATE pending_uploads SET expires_at = now() - interval '1 minute' WHERE file_id = $1`,
+ fid,
+ ); err != nil {
+ t.Fatalf("backdate: %v", err)
+ }
+
+ res, err := store.Sweep(ctx, time.Hour)
+ if err != nil {
+ t.Fatalf("Sweep: %v", err)
+ }
+ if res.Acked != 0 || res.Expired != 1 {
+ t.Errorf("expected acked=0 expired=1, got %+v", res)
+ }
+}
+
+func TestIntegration_PendingUploads_Sweep_DeletesBothCategoriesInOneCycle(t *testing.T) {
+ conn := integrationDB_PendingUploads(t)
+ store := pendinguploads.NewPostgres(conn)
+ ctx := context.Background()
+
+ wsID := uuid.New()
+
+ // Three rows: one acked (eligible at retention=0), one expired
+ // unacked, one fresh unacked (must NOT be deleted).
+ ackedFID, err := store.Put(ctx, wsID, []byte("acked"), "a.txt", "text/plain")
+ if err != nil {
+ t.Fatalf("Put acked: %v", err)
+ }
+ if err := store.Ack(ctx, ackedFID); err != nil {
+ t.Fatalf("Ack: %v", err)
+ }
+
+ expiredFID, err := store.Put(ctx, wsID, []byte("expired"), "e.txt", "text/plain")
+ if err != nil {
+ t.Fatalf("Put expired: %v", err)
+ }
+ if _, err := conn.ExecContext(ctx,
+ `UPDATE pending_uploads SET expires_at = now() - interval '1 minute' WHERE file_id = $1`,
+ expiredFID,
+ ); err != nil {
+ t.Fatalf("backdate: %v", err)
+ }
+
+ freshFID, err := store.Put(ctx, wsID, []byte("fresh"), "f.txt", "text/plain")
+ if err != nil {
+ t.Fatalf("Put fresh: %v", err)
+ }
+
+ res, err := store.Sweep(ctx, 0) // retention=0 makes the acked row eligible
+ if err != nil {
+ t.Fatalf("Sweep: %v", err)
+ }
+ if res.Acked != 1 || res.Expired != 1 {
+ t.Errorf("expected acked=1 expired=1, got %+v", res)
+ }
+
+ // Fresh row survives.
+ rec, err := store.Get(ctx, freshFID)
+ if err != nil {
+ t.Errorf("fresh row should still be Get-able, got err=%v", err)
+ }
+ if rec.FileID != freshFID {
+ t.Errorf("fresh row file_id = %s, want %s", rec.FileID, freshFID)
+ }
+}
+
+func TestIntegration_PendingUploads_PutEnforcesSizeCap(t *testing.T) {
+ conn := integrationDB_PendingUploads(t)
+ store := pendinguploads.NewPostgres(conn)
+ ctx := context.Background()
+
+ wsID := uuid.New()
+ tooBig := make([]byte, pendinguploads.MaxFileBytes+1)
+ if _, err := store.Put(ctx, wsID, tooBig, "big.bin", "application/octet-stream"); err != pendinguploads.ErrTooLarge {
+ t.Errorf("expected ErrTooLarge, got %v", err)
+ }
+}
+
+// TestIntegration_PendingUploads_PutBatch_HappyPath_AllRowsCommit pins the
+// "all rows commit" leg of the PutBatch atomicity contract against a real
+// Postgres. sqlmock can't catch a regression where the Go-side Tx machinery
+// silently no-ops the inserts (e.g., wrong driver options on BeginTx); only
+// COUNT(*) on the real table can.
+func TestIntegration_PendingUploads_PutBatch_HappyPath_AllRowsCommit(t *testing.T) {
+ conn := integrationDB_PendingUploads(t)
+ store := pendinguploads.NewPostgres(conn)
+ ctx := context.Background()
+
+ wsID := uuid.New()
+
+ // Pre-existing row so the COUNT(*) baseline is non-zero — proves
+ // PutBatch adds rows incrementally rather than overwriting.
+ if _, err := store.Put(ctx, wsID, []byte("seed"), "seed.txt", "text/plain"); err != nil {
+ t.Fatalf("seed Put: %v", err)
+ }
+
+ items := []pendinguploads.PutItem{
+ {Content: []byte("alpha"), Filename: "alpha.txt", Mimetype: "text/plain"},
+ {Content: []byte("beta"), Filename: "beta.bin", Mimetype: "application/octet-stream"},
+ {Content: []byte("gamma"), Filename: "gamma.pdf", Mimetype: "application/pdf"},
+ }
+ ids, err := store.PutBatch(ctx, wsID, items)
+ if err != nil {
+ t.Fatalf("PutBatch: %v", err)
+ }
+ if len(ids) != len(items) {
+ t.Fatalf("ids length %d, want %d", len(ids), len(items))
+ }
+
+ // Each returned id round-trips through Get with the right content.
+ for i, id := range ids {
+ rec, err := store.Get(ctx, id)
+ if err != nil {
+ t.Fatalf("Get item %d (%s): %v", i, id, err)
+ }
+ if string(rec.Content) != string(items[i].Content) {
+ t.Errorf("item %d content = %q, want %q", i, rec.Content, items[i].Content)
+ }
+ if rec.Filename != items[i].Filename {
+ t.Errorf("item %d filename = %q, want %q", i, rec.Filename, items[i].Filename)
+ }
+ }
+
+ var n int
+ if err := conn.QueryRowContext(ctx, `SELECT COUNT(*) FROM pending_uploads WHERE workspace_id = $1`, wsID).Scan(&n); err != nil {
+ t.Fatalf("count: %v", err)
+ }
+ if n != 4 {
+ t.Errorf("workspace row count = %d, want 4 (1 seed + 3 batch)", n)
+ }
+}
+
+// TestIntegration_PendingUploads_PutBatch_AtomicRollback_NoLeakOnFailure
+// proves the all-or-nothing contract end-to-end against real Postgres MVCC.
+//
+// Strategy: build a 3-item batch where item index 1 carries a filename with
+// an embedded NUL byte. lib/pq rejects NULs in TEXT columns at the protocol
+// layer (`pq: invalid byte sequence for encoding "UTF8": 0x00`), which
+// triggers the per-row INSERT error path in PutBatch. The first item's
+// INSERT…RETURNING already wrote a row to the Tx's snapshot, so a buggy
+// rollback would leave that row visible after PutBatch returns.
+//
+// Postgrest semantics: ROLLBACK is the only way a real DB can guarantee the
+// "no leak" contract; a unit test with sqlmock can prove the Go function
+// CALLED Rollback, but only this integration test proves Postgres actually
+// HONORED it.
+func TestIntegration_PendingUploads_PutBatch_AtomicRollback_NoLeakOnFailure(t *testing.T) {
+ conn := integrationDB_PendingUploads(t)
+ store := pendinguploads.NewPostgres(conn)
+ ctx := context.Background()
+
+ wsID := uuid.New()
+
+ // Baseline COUNT(*) for this workspace — must remain 0 after a failed batch.
+ var before int
+ if err := conn.QueryRowContext(ctx, `SELECT COUNT(*) FROM pending_uploads WHERE workspace_id = $1`, wsID).Scan(&before); err != nil {
+ t.Fatalf("baseline count: %v", err)
+ }
+ if before != 0 {
+ t.Fatalf("workspace not isolated: baseline = %d, want 0", before)
+ }
+
+ // Item 1 has a NUL byte in the filename — Go-side pre-validation
+ // (which only checks empty/length) lets it through, so the INSERT
+ // reaches lib/pq, which rejects it at the protocol level. That's the
+ // canonical "DB-side error mid-batch" we want to exercise.
+ items := []pendinguploads.PutItem{
+ {Content: []byte("ok"), Filename: "ok.txt", Mimetype: "text/plain"},
+ {Content: []byte("bad"), Filename: "bad\x00name.txt", Mimetype: "text/plain"},
+ {Content: []byte("never"), Filename: "never.txt", Mimetype: "text/plain"},
+ }
+ _, err := store.PutBatch(ctx, wsID, items)
+ if err == nil {
+ t.Fatalf("expected error from NUL-byte filename, got nil")
+ }
+
+ // THE assertion this whole test exists for: even though item 0's
+ // INSERT…RETURNING succeeded inside the Tx, the rollback unwound
+ // it — zero rows for this workspace, not one (let alone three).
+ var after int
+ if err := conn.QueryRowContext(ctx, `SELECT COUNT(*) FROM pending_uploads WHERE workspace_id = $1`, wsID).Scan(&after); err != nil {
+ t.Fatalf("post-failure count: %v", err)
+ }
+ if after != 0 {
+ t.Errorf("Tx rollback leaked rows: workspace count = %d, want 0", after)
+ }
+}
+
+// TestIntegration_PendingUploads_PutBatch_Oversize_NoTxOpened verifies the
+// pre-validation short-circuit: an oversized item rejects with ErrTooLarge
+// BEFORE any Tx opens, so the table is untouched. The unit test (sqlmock
+// with zero expectations) catches the Go-side path; this test sanity-checks
+// no real DB I/O happens by confirming COUNT(*) doesn't move.
+func TestIntegration_PendingUploads_PutBatch_Oversize_NoTxOpened(t *testing.T) {
+ conn := integrationDB_PendingUploads(t)
+ store := pendinguploads.NewPostgres(conn)
+ ctx := context.Background()
+
+ wsID := uuid.New()
+ tooBig := make([]byte, pendinguploads.MaxFileBytes+1)
+ _, err := store.PutBatch(ctx, wsID, []pendinguploads.PutItem{
+ {Content: []byte("ok"), Filename: "ok.txt"},
+ {Content: tooBig, Filename: "too-big.bin"},
+ })
+ if err != pendinguploads.ErrTooLarge {
+ t.Fatalf("expected ErrTooLarge, got %v", err)
+ }
+ var n int
+ if err := conn.QueryRowContext(ctx, `SELECT COUNT(*) FROM pending_uploads WHERE workspace_id = $1`, wsID).Scan(&n); err != nil {
+ t.Fatalf("count: %v", err)
+ }
+ if n != 0 {
+ t.Errorf("pre-validation did NOT short-circuit: count = %d, want 0", n)
+ }
+}
+
+// TestIntegration_PendingUploads_AckedIndexExists verifies the Phase 5a
+// migration (20260505200000_pending_uploads_acked_index.up.sql) actually
+// created idx_pending_uploads_acked with the right partial-index predicate.
+//
+// Why pg_indexes and not EXPLAIN: the planner prefers Seq Scan on tiny
+// tables regardless of available indexes — a plan-shape check would be
+// flaky under real test loads. The contract we care about is "the index
+// exists with the predicate we wrote in the migration"; pg_indexes is
+// the canonical source for that, robust to row count and planner version.
+func TestIntegration_PendingUploads_AckedIndexExists(t *testing.T) {
+ conn := integrationDB_PendingUploads(t)
+ ctx := context.Background()
+
+ var indexdef string
+ err := conn.QueryRowContext(ctx, `
+ SELECT indexdef FROM pg_indexes
+ WHERE schemaname = 'public'
+ AND tablename = 'pending_uploads'
+ AND indexname = 'idx_pending_uploads_acked'
+ `).Scan(&indexdef)
+ if err == sql.ErrNoRows {
+ t.Fatal("idx_pending_uploads_acked is missing — migration 20260505200000 not applied")
+ }
+ if err != nil {
+ t.Fatalf("pg_indexes query: %v", err)
+ }
+
+ // Pin the partial-index predicate. Without "WHERE acked_at IS NOT NULL"
+ // we'd be indexing the entire table (defeats the point — most rows are
+ // unacked), and the existing idx_pending_uploads_unacked already covers
+ // the inverse predicate.
+ if !strings.Contains(indexdef, "(acked_at)") {
+ t.Errorf("index missing acked_at column: %s", indexdef)
+ }
+ if !strings.Contains(indexdef, "WHERE (acked_at IS NOT NULL)") {
+ t.Errorf("index missing partial predicate: %s", indexdef)
+ }
+}
+
+func TestIntegration_PendingUploads_GetIgnoresExpiredAndAcked(t *testing.T) {
+ conn := integrationDB_PendingUploads(t)
+ store := pendinguploads.NewPostgres(conn)
+ ctx := context.Background()
+
+ wsID := uuid.New()
+ fid, err := store.Put(ctx, wsID, []byte("data"), "x.txt", "text/plain")
+ if err != nil {
+ t.Fatalf("Put: %v", err)
+ }
+
+ // Backdate expires_at — Get must return ErrNotFound, even though the
+ // row physically exists in the table (Sweep hasn't run).
+ if _, err := conn.ExecContext(ctx,
+ `UPDATE pending_uploads SET expires_at = now() - interval '1 minute' WHERE file_id = $1`,
+ fid,
+ ); err != nil {
+ t.Fatalf("backdate: %v", err)
+ }
+ if _, err := store.Get(ctx, fid); err != pendinguploads.ErrNotFound {
+ t.Errorf("Get after expiry: got %v, want ErrNotFound", err)
+ }
+}
diff --git a/workspace-server/internal/handlers/pending_uploads_test.go b/workspace-server/internal/handlers/pending_uploads_test.go
index 17da24af..778e8170 100644
--- a/workspace-server/internal/handlers/pending_uploads_test.go
+++ b/workspace-server/internal/handlers/pending_uploads_test.go
@@ -71,6 +71,20 @@ func (f *fakeStorage) Ack(_ context.Context, fileID uuid.UUID) error {
return nil
}
+// Sweep is required by the Storage interface (Phase 3 GC). Not exercised
+// by these handler tests — the dedicated sweeper_test.go covers it.
+func (f *fakeStorage) Sweep(_ context.Context, _ time.Duration) (pendinguploads.SweepResult, error) {
+ return pendinguploads.SweepResult{}, nil
+}
+
+// PutBatch is required by the Storage interface; the upload handler
+// tests live in chat_files_poll_test.go and use a separate fake
+// (inMemStorage). Stubbed here because the Get/Ack tests don't drive
+// PutBatch, but the interface must be satisfied.
+func (f *fakeStorage) PutBatch(_ context.Context, _ uuid.UUID, _ []pendinguploads.PutItem) ([]uuid.UUID, error) {
+ return nil, nil
+}
+
func newRouter(handler *handlers.PendingUploadsHandler) *gin.Engine {
gin.SetMode(gin.TestMode)
r := gin.New()
diff --git a/workspace-server/internal/handlers/provlog_emit_test.go b/workspace-server/internal/handlers/provlog_emit_test.go
new file mode 100644
index 00000000..6681c203
--- /dev/null
+++ b/workspace-server/internal/handlers/provlog_emit_test.go
@@ -0,0 +1,112 @@
+package handlers
+
+// provlog_emit_test.go — pins that the structured-logging emit sites
+// added for #2867 PR-D actually fire when their boundary is crossed.
+//
+// These are call-site contract tests, not provlog package tests (those
+// live next to the helper). The assertion is "this dispatcher path
+// emits this event name" — if a refactor moves the call out of the
+// boundary helper, the gate fails. Fields are NOT pinned here on
+// purpose; the field set is convenience for ops, not contract for the
+// emit point. Pinning fields would block additive evolution of the
+// payload (see also feedback_behavior_based_ast_gates.md).
+
+import (
+ "bytes"
+ "context"
+ "log"
+ "strings"
+ "sync"
+ "testing"
+
+ "github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
+)
+
+// captureProvLog redirects the global logger to a buffer for the test
+// duration. provlog.Event uses log.Printf, so this is the only seam.
+// Returned mutex protects against concurrent reads from the goroutine
+// fired by provisionWorkspaceAuto (the goroutine never returns in
+// these tests because Start() is stubbed, but the buffer can still be
+// touched by it racing the assertion).
+func captureProvLog(t *testing.T) (read func() string) {
+ t.Helper()
+ var buf bytes.Buffer
+ var mu sync.Mutex
+ prevWriter := log.Writer()
+ prevFlags := log.Flags()
+ log.SetFlags(0)
+ log.SetOutput(&safeWriter{buf: &buf, mu: &mu})
+ t.Cleanup(func() {
+ log.SetOutput(prevWriter)
+ log.SetFlags(prevFlags)
+ })
+ return func() string {
+ mu.Lock()
+ defer mu.Unlock()
+ return buf.String()
+ }
+}
+
+// TestProvisionWorkspaceAutoSync_EmitsProvisionStart — sync variant is
+// chosen for the assertion path because it returns once the (stubbed)
+// Start() has been called, so we know the emit has flushed. The async
+// variant would race a goroutine.
+func TestProvisionWorkspaceAutoSync_EmitsProvisionStart(t *testing.T) {
+ read := captureProvLog(t)
+ h := &WorkspaceHandler{cpProv: &trackingCPProv{}}
+ // Best-effort: the body will hit DB code under provisionWorkspaceCP
+ // — we only need the emit at the entry, which fires unconditionally
+ // before the dispatch. Recovering from any later panic keeps the
+ // test focused.
+ defer func() { _ = recover() }()
+ h.provisionWorkspaceAutoSync("ws-test-1", "tmpl", nil, models.CreateWorkspacePayload{
+ Name: "n", Tier: 4, Runtime: "claude-code",
+ })
+ got := read()
+ if !strings.Contains(got, "evt: provision.start ") {
+ t.Fatalf("expected provision.start emit, got log:\n%s", got)
+ }
+ if !strings.Contains(got, `"workspace_id":"ws-test-1"`) {
+ t.Errorf("workspace_id not in payload: %s", got)
+ }
+ if !strings.Contains(got, `"sync":true`) {
+ t.Errorf("sync flag not pinned for sync dispatcher: %s", got)
+ }
+}
+
+// TestStopForRestart_EmitsRestartPreStop — emit fires before the actual
+// Stop call, so the trackingCPProv stub doesn't need to be wired for
+// real Stop semantics. Backend label "cp" pinned because that's the
+// SaaS path; we don't pin "docker" or "none" branches here (separate
+// tests would only re-test the trivial branch label switch).
+func TestStopForRestart_EmitsRestartPreStop(t *testing.T) {
+ read := captureProvLog(t)
+ h := &WorkspaceHandler{cpProv: &trackingCPProv{}}
+ defer func() { _ = recover() }()
+ h.stopForRestart(context.Background(), "ws-restart-1")
+ got := read()
+ if !strings.Contains(got, "evt: restart.pre_stop ") {
+ t.Fatalf("expected restart.pre_stop emit, got log:\n%s", got)
+ }
+ if !strings.Contains(got, `"workspace_id":"ws-restart-1"`) {
+ t.Errorf("workspace_id not in payload: %s", got)
+ }
+ if !strings.Contains(got, `"backend":"cp"`) {
+ t.Errorf("backend label missing or wrong: %s", got)
+ }
+}
+
+// TestStopForRestart_EmitsBackendNoneWhenUnwired — pin the no-backend
+// branch so a future refactor that drops the label switch is caught.
+// This is the silent-Stop case (workspace_dispatchers.go:StopWorkspaceAuto
+// returns nil for unwired backends); the emit ensures the operator can
+// still see the boundary in the log.
+func TestStopForRestart_EmitsBackendNoneWhenUnwired(t *testing.T) {
+ read := captureProvLog(t)
+ h := &WorkspaceHandler{} // both nil
+ h.stopForRestart(context.Background(), "ws-restart-2")
+ got := read()
+ if !strings.Contains(got, `"backend":"none"`) {
+ t.Fatalf("expected backend=none for unwired handler: %s", got)
+ }
+}
diff --git a/workspace-server/internal/handlers/saas_default_tier_test.go b/workspace-server/internal/handlers/saas_default_tier_test.go
new file mode 100644
index 00000000..c4d32a94
--- /dev/null
+++ b/workspace-server/internal/handlers/saas_default_tier_test.go
@@ -0,0 +1,99 @@
+package handlers
+
+import (
+ "strings"
+ "testing"
+
+ "github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
+)
+
+// Tests for the SaaS-aware default-tier resolution introduced in #2901
+// and hardened in #2910 (multi-model review of #2901 found the original
+// claim of "all green" was passing because no SaaS-mode test existed).
+//
+// These tests pin three invariants:
+//
+// 1. WorkspaceHandler.IsSaaS() returns true when cpProv is wired,
+// false otherwise.
+// 2. WorkspaceHandler.DefaultTier() returns 4 on SaaS, 3 self-hosted.
+// 3. generateDefaultConfig (TemplatesHandler.Import path) writes the
+// passed-in tier into the generated config.yaml — pre-#2910 it
+// was hardcoded to 3 and silently disagreed with the create-
+// handler default on SaaS.
+
+// stubCPProv is a minimal stand-in for the CP provisioner — only
+// exercises the IsSaaS / HasProvisioner contract, never invoked in
+// these tests.
+type stubCPProv struct{}
+
+func (stubCPProv) Start(_ interface{}, _ provisioner.WorkspaceConfig) (string, error) {
+ return "", nil
+}
+func (stubCPProv) Stop(_ interface{}, _ string) error { return nil }
+func (stubCPProv) Restart(_ interface{}, _ provisioner.WorkspaceConfig) (string, error) {
+ return "", nil
+}
+
+func TestIsSaaS_TrueWhenCPProvWired(t *testing.T) {
+ h := &WorkspaceHandler{cpProv: &trackingCPProv{}}
+ if !h.IsSaaS() {
+ t.Errorf("IsSaaS()=false with cpProv wired; expected true")
+ }
+}
+
+func TestIsSaaS_FalseWhenOnlyDocker(t *testing.T) {
+ // provisioner field set, cpProv nil — the self-hosted path.
+ // Use a non-nil sentinel so the check actually has something to
+ // disagree with. trackingCPProv lives in workspace_provision_auto_test.go
+ // and is the established stub for these handler-level tests.
+ h := &WorkspaceHandler{provisioner: nil, cpProv: nil}
+ if h.IsSaaS() {
+ t.Errorf("IsSaaS()=true with both backends nil; expected false")
+ }
+}
+
+func TestDefaultTier_SaaS_IsT4(t *testing.T) {
+ h := &WorkspaceHandler{cpProv: &trackingCPProv{}}
+ if got := h.DefaultTier(); got != 4 {
+ t.Errorf("SaaS DefaultTier()=%d; expected 4", got)
+ }
+}
+
+func TestDefaultTier_SelfHosted_IsT3(t *testing.T) {
+ h := &WorkspaceHandler{}
+ if got := h.DefaultTier(); got != 3 {
+ t.Errorf("self-hosted DefaultTier()=%d; expected 3", got)
+ }
+}
+
+// generateDefaultConfig — pin that the tier param flows into the
+// emitted config.yaml verbatim. Pre-#2910 this was hardcoded "tier: 3"
+// regardless of caller intent.
+func TestGenerateDefaultConfig_RespectsTierParam(t *testing.T) {
+ cfg := generateDefaultConfig("Test Agent", map[string]string{"system-prompt.md": ""}, 4)
+ if !strings.Contains(cfg, "tier: 4\n") {
+ t.Errorf("expected `tier: 4` in generated config, got:\n%s", cfg)
+ }
+ // The pre-#2910 hardcoded `tier: 3` line must NOT appear.
+ if strings.Contains(cfg, "tier: 3\n") {
+ t.Errorf("config should not contain `tier: 3` when caller passed 4, got:\n%s", cfg)
+ }
+}
+
+func TestGenerateDefaultConfig_SelfHostedTierT3(t *testing.T) {
+ cfg := generateDefaultConfig("Test Agent", map[string]string{"system-prompt.md": ""}, 3)
+ if !strings.Contains(cfg, "tier: 3\n") {
+ t.Errorf("expected `tier: 3` in generated config, got:\n%s", cfg)
+ }
+}
+
+// Bounds check — caller passes 0 or out-of-range, helper falls back
+// to T3 (the safer-of-the-two when deployment mode can't be resolved).
+func TestGenerateDefaultConfig_OutOfRangeFallsBackToT3(t *testing.T) {
+ for _, tier := range []int{0, -1, 99} {
+ cfg := generateDefaultConfig("X", map[string]string{}, tier)
+ if !strings.Contains(cfg, "tier: 3\n") {
+ t.Errorf("invalid tier %d should fall back to T3, got:\n%s", tier, cfg)
+ }
+ }
+}
diff --git a/workspace-server/internal/handlers/security_regression_685_686_687_688_test.go b/workspace-server/internal/handlers/security_regression_685_686_687_688_test.go
index f8d4fcb9..aa35a517 100644
--- a/workspace-server/internal/handlers/security_regression_685_686_687_688_test.go
+++ b/workspace-server/internal/handlers/security_regression_685_686_687_688_test.go
@@ -71,7 +71,7 @@ func TestSecurity_GetTemplates_NoAuth_Returns401(t *testing.T) {
authDB, authMock := newEnrolledAuthDB(t)
tmpDir := t.TempDir()
- tmplh := NewTemplatesHandler(tmpDir, nil)
+ tmplh := NewTemplatesHandler(tmpDir, nil, nil)
r := gin.New()
r.GET("/templates", middleware.AdminAuth(authDB), tmplh.List)
@@ -98,7 +98,7 @@ func TestSecurity_GetTemplates_FreshInstall_FailsOpen(t *testing.T) {
authDB, authMock := newFreshInstallAuthDB(t)
tmpDir := t.TempDir()
- tmplh := NewTemplatesHandler(tmpDir, nil)
+ tmplh := NewTemplatesHandler(tmpDir, nil, nil)
r := gin.New()
r.GET("/templates", middleware.AdminAuth(authDB), tmplh.List)
diff --git a/workspace-server/internal/handlers/team.go b/workspace-server/internal/handlers/team.go
deleted file mode 100644
index 0c536020..00000000
--- a/workspace-server/internal/handlers/team.go
+++ /dev/null
@@ -1,132 +0,0 @@
-package handlers
-
-import (
- "encoding/json"
- "log"
- "net/http"
- "os"
- "path/filepath"
-
- "github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
- "github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
- "github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
- "github.com/gin-gonic/gin"
- "gopkg.in/yaml.v3"
-)
-
-// TeamHandler now hosts only Collapse — the visual "expand" action is
-// canvas-side and creating children goes through the regular
-// WorkspaceHandler.Create path with parent_id set, like any other
-// workspace. Every workspace can have children; "team" is just the
-// state of having children. The old Expand handler bulk-created
-// children by reading sub_workspaces from a parent's config and was
-// non-idempotent — calling it N times leaked N×children EC2s, which
-// is how tenant-hongming accumulated 72 stale workspaces.
-type TeamHandler struct {
- wh *WorkspaceHandler
- b *events.Broadcaster
-}
-
-// NewTeamHandler constructs a TeamHandler. wh is used by Collapse to
-// route StopWorkspaceAuto through the backend dispatcher.
-func NewTeamHandler(b *events.Broadcaster, wh *WorkspaceHandler, platformURL, configsDir string) *TeamHandler {
- return &TeamHandler{wh: wh, b: b}
-}
-
-// Collapse handles POST /workspaces/:id/collapse
-// Stops and removes all child workspaces.
-func (h *TeamHandler) Collapse(c *gin.Context) {
- parentID := c.Param("id")
- ctx := c.Request.Context()
-
- // Find children
- rows, err := db.DB.QueryContext(ctx,
- `SELECT id, name FROM workspaces WHERE parent_id = $1 AND status != 'removed'`, parentID)
- if err != nil {
- c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to query children"})
- return
- }
- defer rows.Close()
-
- removed := make([]string, 0)
- for rows.Next() {
- var childID, childName string
- if rows.Scan(&childID, &childName) != nil {
- continue
- }
-
- // Stop the workload via the backend dispatcher (CP for SaaS,
- // Docker for self-hosted). Pre-2026-05-05 this was
- // `if h.provisioner != nil { h.provisioner.Stop(...) }`, which
- // silently skipped on every SaaS tenant — child EC2s kept running
- // after team-collapse until the orphan sweeper caught them
- // (issue #2813).
- if err := h.wh.StopWorkspaceAuto(ctx, childID); err != nil {
- log.Printf("Team collapse: stop %s failed: %v — orphan sweeper will reconcile", childID, err)
- }
-
- // Mark as removed
- if _, err := db.DB.ExecContext(ctx,
- `UPDATE workspaces SET status = $1, updated_at = now() WHERE id = $2`, models.StatusRemoved, childID); err != nil {
- log.Printf("Team collapse: failed to remove workspace %s: %v", childID, err)
- }
- if _, err := db.DB.ExecContext(ctx,
- `DELETE FROM canvas_layouts WHERE workspace_id = $1`, childID); err != nil {
- log.Printf("Team collapse: failed to delete layout for %s: %v", childID, err)
- }
-
- h.b.RecordAndBroadcast(ctx, "WORKSPACE_REMOVED", childID, map[string]interface{}{})
-
- removed = append(removed, childName)
- }
-
- h.b.RecordAndBroadcast(ctx, "WORKSPACE_COLLAPSED", parentID, map[string]interface{}{
- "removed_children": removed,
- })
-
- c.JSON(http.StatusOK, gin.H{
- "status": "collapsed",
- "removed": removed,
- })
-}
-
-// findTemplateDirByName resolves a workspace name to its template
-// directory. Kept here because callers outside this package may use
-// it, even though the in-package consumer (Expand) is gone.
-//
-// TODO: relocate alongside the templates handler if no other callers
-// surface, or delete entirely after a deprecation cycle.
-func findTemplateDirByName(configsDir, name string) string {
- normalized := normalizeName(name)
-
- candidate := filepath.Join(configsDir, normalized)
- if _, err := os.Stat(filepath.Join(candidate, "config.yaml")); err == nil {
- return candidate
- }
-
- // Fall back to scanning all dirs
- entries, err := os.ReadDir(configsDir)
- if err != nil {
- return ""
- }
- for _, e := range entries {
- if !e.IsDir() {
- continue
- }
- cfgPath := filepath.Join(configsDir, e.Name(), "config.yaml")
- data, err := os.ReadFile(cfgPath)
- if err != nil {
- continue
- }
- var cfg struct {
- Name string `yaml:"name"`
- }
- if json.Unmarshal(data, &cfg) == nil && cfg.Name == name {
- return filepath.Join(configsDir, e.Name())
- }
- if yaml.Unmarshal(data, &cfg) == nil && cfg.Name == name {
- return filepath.Join(configsDir, e.Name())
- }
- }
- return ""
-}
diff --git a/workspace-server/internal/handlers/team_test.go b/workspace-server/internal/handlers/team_test.go
deleted file mode 100644
index e87a92ae..00000000
--- a/workspace-server/internal/handlers/team_test.go
+++ /dev/null
@@ -1,130 +0,0 @@
-package handlers
-
-import (
- "encoding/json"
- "net/http"
- "net/http/httptest"
- "os"
- "path/filepath"
- "testing"
-
- "github.com/DATA-DOG/go-sqlmock"
- "github.com/gin-gonic/gin"
-)
-
-// ---------- TeamHandler: Collapse ----------
-
-func TestTeamCollapse_NoChildren(t *testing.T) {
- mock := setupTestDB(t)
- setupTestRedis(t)
- broadcaster := newTestBroadcaster()
- handler := NewTeamHandler(broadcaster, NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()), "http://localhost:8080", "/tmp/configs")
-
- // No children
- mock.ExpectQuery("SELECT id, name FROM workspaces WHERE parent_id").
- WithArgs("ws-parent").
- WillReturnRows(sqlmock.NewRows([]string{"id", "name"}))
-
- // WORKSPACE_COLLAPSED broadcast
- mock.ExpectExec("INSERT INTO structure_events").
- WillReturnResult(sqlmock.NewResult(0, 1))
-
- w := httptest.NewRecorder()
- c, _ := gin.CreateTestContext(w)
- c.Params = gin.Params{{Key: "id", Value: "ws-parent"}}
- c.Request = httptest.NewRequest("POST", "/", nil)
-
- handler.Collapse(c)
-
- if w.Code != http.StatusOK {
- t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
- }
- var resp map[string]interface{}
- json.Unmarshal(w.Body.Bytes(), &resp)
- if resp["status"] != "collapsed" {
- t.Errorf("expected status 'collapsed', got %v", resp["status"])
- }
-}
-
-func TestTeamCollapse_WithChildren(t *testing.T) {
- mock := setupTestDB(t)
- setupTestRedis(t)
- broadcaster := newTestBroadcaster()
- handler := NewTeamHandler(broadcaster, NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()), "http://localhost:8080", "/tmp/configs")
-
- // Two children
- mock.ExpectQuery("SELECT id, name FROM workspaces WHERE parent_id").
- WithArgs("ws-parent").
- WillReturnRows(sqlmock.NewRows([]string{"id", "name"}).
- AddRow("child-1", "Worker A").
- AddRow("child-2", "Worker B"))
-
- // UPDATE + DELETE + broadcast for child-1
- mock.ExpectExec("UPDATE workspaces SET status =").
- WithArgs("child-1").
- WillReturnResult(sqlmock.NewResult(0, 1))
- mock.ExpectExec("DELETE FROM canvas_layouts").
- WithArgs("child-1").
- WillReturnResult(sqlmock.NewResult(0, 1))
- mock.ExpectExec("INSERT INTO structure_events").
- WillReturnResult(sqlmock.NewResult(0, 1))
-
- // UPDATE + DELETE + broadcast for child-2
- mock.ExpectExec("UPDATE workspaces SET status =").
- WithArgs("child-2").
- WillReturnResult(sqlmock.NewResult(0, 1))
- mock.ExpectExec("DELETE FROM canvas_layouts").
- WithArgs("child-2").
- WillReturnResult(sqlmock.NewResult(0, 1))
- mock.ExpectExec("INSERT INTO structure_events").
- WillReturnResult(sqlmock.NewResult(0, 1))
-
- // WORKSPACE_COLLAPSED broadcast for parent
- mock.ExpectExec("INSERT INTO structure_events").
- WillReturnResult(sqlmock.NewResult(0, 1))
-
- w := httptest.NewRecorder()
- c, _ := gin.CreateTestContext(w)
- c.Params = gin.Params{{Key: "id", Value: "ws-parent"}}
- c.Request = httptest.NewRequest("POST", "/", nil)
-
- handler.Collapse(c)
-
- if w.Code != http.StatusOK {
- t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
- }
- var resp map[string]interface{}
- json.Unmarshal(w.Body.Bytes(), &resp)
- removed, ok := resp["removed"].([]interface{})
- if !ok || len(removed) != 2 {
- t.Errorf("expected 2 removed children, got %v", resp["removed"])
- }
-}
-// ---------- findTemplateDirByName helper ----------
-
-func TestFindTemplateDirByName_DirectMatch(t *testing.T) {
- dir := t.TempDir()
- subDir := filepath.Join(dir, "mybot")
- os.MkdirAll(subDir, 0755)
- os.WriteFile(filepath.Join(subDir, "config.yaml"), []byte("name: MyBot"), 0644)
-
- result := findTemplateDirByName(dir, "mybot")
- if result != subDir {
- t.Errorf("expected %s, got %s", subDir, result)
- }
-}
-
-func TestFindTemplateDirByName_NotFound(t *testing.T) {
- dir := t.TempDir()
- result := findTemplateDirByName(dir, "nonexistent")
- if result != "" {
- t.Errorf("expected empty string, got %s", result)
- }
-}
-
-func TestFindTemplateDirByName_InvalidConfigsDir(t *testing.T) {
- result := findTemplateDirByName("/nonexistent/path", "anything")
- if result != "" {
- t.Errorf("expected empty string for invalid dir, got %s", result)
- }
-}
diff --git a/workspace-server/internal/handlers/template_import.go b/workspace-server/internal/handlers/template_import.go
index 7d4ab4d1..95b5854f 100644
--- a/workspace-server/internal/handlers/template_import.go
+++ b/workspace-server/internal/handlers/template_import.go
@@ -36,8 +36,14 @@ func normalizeName(name string) string {
return result
}
-// generateDefaultConfig creates a config.yaml from detected prompt files and skills.
-func generateDefaultConfig(name string, files map[string]string) string {
+// generateDefaultConfig creates a config.yaml from detected prompt files
+// and skills. tier is the deployment-aware default (caller passes
+// h.wh.DefaultTier() — T4 on SaaS, T3 on self-hosted) so the generated
+// file matches what POST /workspaces would default to. Pre-#2910 this
+// was hardcoded to 3, which split-brained with the create-handler
+// default on SaaS (T4) and pinned newly-imported templates at T3 even
+// when downstream Create paths picked T4.
+func generateDefaultConfig(name string, files map[string]string, tier int) string {
promptFiles := []string{}
skillSet := map[string]bool{}
@@ -74,9 +80,15 @@ func generateDefaultConfig(name string, files map[string]string) string {
var cfg strings.Builder
cfg.WriteString(`name: "` + escaped + `"` + "\n")
cfg.WriteString("description: Imported agent\n")
- // Default to tier 3 ("Privileged") — matches the workspace.go
- // create handler default. See its comment for rationale.
- cfg.WriteString("version: 1.0.0\ntier: 3\n")
+ // Tier is SaaS-aware via the caller's DefaultTier (#2910 PR-B).
+ // Bounds-checked: invalid input falls back to T3 (the historical
+ // default + the safer-of-the-two when the deployment mode can't
+ // be resolved).
+ if tier < 1 || tier > 4 {
+ tier = 3
+ }
+ cfg.WriteString("version: 1.0.0\n")
+ cfg.WriteString(fmt.Sprintf("tier: %d\n", tier))
cfg.WriteString("model: anthropic:claude-haiku-4-5-20251001\n")
cfg.WriteString("\nprompt_files:\n")
if len(promptFiles) > 0 {
@@ -148,7 +160,11 @@ func (h *TemplatesHandler) Import(c *gin.Context) {
// Auto-generate config.yaml if not provided
if _, exists := body.Files["config.yaml"]; !exists {
- cfg := generateDefaultConfig(body.Name, body.Files)
+ tier := 3
+ if h.wh != nil {
+ tier = h.wh.DefaultTier()
+ }
+ cfg := generateDefaultConfig(body.Name, body.Files, tier)
if err := os.WriteFile(filepath.Join(destDir, "config.yaml"), []byte(cfg), 0600); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to write config.yaml"})
return
@@ -227,7 +243,11 @@ func (h *TemplatesHandler) ReplaceFiles(c *gin.Context) {
if _, exists := body.Files["config.yaml"]; !exists {
// Check if config.yaml exists in container
if _, err := h.execInContainer(ctx, containerName, []string{"test", "-f", "/configs/config.yaml"}); err != nil {
- cfg := generateDefaultConfig(wsName, body.Files)
+ tier := 3
+ if h.wh != nil {
+ tier = h.wh.DefaultTier()
+ }
+ cfg := generateDefaultConfig(wsName, body.Files, tier)
singleFile := map[string]string{"config.yaml": cfg}
h.copyFilesToContainer(ctx, containerName, "/configs", singleFile)
}
diff --git a/workspace-server/internal/handlers/template_import_test.go b/workspace-server/internal/handlers/template_import_test.go
index 42336844..c496f9c5 100644
--- a/workspace-server/internal/handlers/template_import_test.go
+++ b/workspace-server/internal/handlers/template_import_test.go
@@ -55,7 +55,7 @@ func TestGenerateDefaultConfig_WithFiles(t *testing.T) {
"skills/review/templates.md": "Templates",
}
- cfg := generateDefaultConfig("Test Agent", files)
+ cfg := generateDefaultConfig("Test Agent", files, 3)
// Name is emitted as a double-quoted scalar (#221 sanitizer).
if !strings.Contains(cfg, `name: "Test Agent"`) {
@@ -85,7 +85,7 @@ func TestGenerateDefaultConfig_Empty(t *testing.T) {
"data/something.json": `{"key": "value"}`,
}
- cfg := generateDefaultConfig("Empty Agent", files)
+ cfg := generateDefaultConfig("Empty Agent", files, 3)
if !strings.Contains(cfg, `name: "Empty Agent"`) {
t.Errorf("config should contain quoted agent name, got:\n%s", cfg)
@@ -134,7 +134,7 @@ func TestGenerateDefaultConfig_YAMLInjection(t *testing.T) {
for _, tc := range adversarialCases {
t.Run(tc.desc, func(t *testing.T) {
- cfg := generateDefaultConfig(tc.name, map[string]string{})
+ cfg := generateDefaultConfig(tc.name, map[string]string{}, 3)
var parsed map[string]interface{}
if err := yaml.Unmarshal([]byte(cfg), &parsed); err != nil {
t.Fatalf("sanitized config does not parse as YAML: %v\n--- config ---\n%s", err, cfg)
@@ -205,7 +205,7 @@ func TestImport_Success(t *testing.T) {
setupTestRedis(t)
tmpDir := t.TempDir()
- handler := NewTemplatesHandler(tmpDir, nil)
+ handler := NewTemplatesHandler(tmpDir, nil, nil)
body := `{
"name": "New Agent",
@@ -245,7 +245,7 @@ func TestImport_MissingName(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
- handler := NewTemplatesHandler(t.TempDir(), nil)
+ handler := NewTemplatesHandler(t.TempDir(), nil, nil)
body := `{"files": {"test.md": "content"}}`
@@ -265,7 +265,7 @@ func TestImport_TooManyFiles(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
- handler := NewTemplatesHandler(t.TempDir(), nil)
+ handler := NewTemplatesHandler(t.TempDir(), nil, nil)
files := make(map[string]string)
for i := 0; i <= maxUploadFiles; i++ {
@@ -296,7 +296,7 @@ func TestImport_AlreadyExists(t *testing.T) {
tmpDir := t.TempDir()
os.MkdirAll(filepath.Join(tmpDir, "existing-agent"), 0755)
- handler := NewTemplatesHandler(tmpDir, nil)
+ handler := NewTemplatesHandler(tmpDir, nil, nil)
body := `{"name": "Existing Agent", "files": {"test.md": "content"}}`
@@ -317,7 +317,7 @@ func TestImport_WithConfigYaml(t *testing.T) {
setupTestRedis(t)
tmpDir := t.TempDir()
- handler := NewTemplatesHandler(tmpDir, nil)
+ handler := NewTemplatesHandler(tmpDir, nil, nil)
body := `{
"name": "Custom Agent",
@@ -354,7 +354,7 @@ func TestReplaceFiles_MissingBody(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
- handler := NewTemplatesHandler(t.TempDir(), nil)
+ handler := NewTemplatesHandler(t.TempDir(), nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@@ -373,7 +373,7 @@ func TestReplaceFiles_TooManyFiles(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
- handler := NewTemplatesHandler(t.TempDir(), nil)
+ handler := NewTemplatesHandler(t.TempDir(), nil, nil)
files := make(map[string]string)
for i := 0; i <= maxUploadFiles; i++ {
@@ -398,7 +398,7 @@ func TestReplaceFiles_WorkspaceNotFound(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
- handler := NewTemplatesHandler(t.TempDir(), nil)
+ handler := NewTemplatesHandler(t.TempDir(), nil, nil)
// ReplaceFiles now selects (name, instance_id, runtime) for the
// restart-cascade. Match the full column list rather than just the
@@ -429,7 +429,7 @@ func TestReplaceFiles_PathTraversal(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
- handler := NewTemplatesHandler(t.TempDir(), nil)
+ handler := NewTemplatesHandler(t.TempDir(), nil, nil)
mock.ExpectQuery(`SELECT name, COALESCE\(instance_id, ''\), COALESCE\(runtime, ''\) FROM workspaces WHERE id =`).
WithArgs("ws-rf-pt").
diff --git a/workspace-server/internal/handlers/templates.go b/workspace-server/internal/handlers/templates.go
index d51dabcd..03776a5d 100644
--- a/workspace-server/internal/handlers/templates.go
+++ b/workspace-server/internal/handlers/templates.go
@@ -31,10 +31,20 @@ const maxUploadFiles = 200
type TemplatesHandler struct {
configsDir string
docker *client.Client
+ // wh is used by Import and ReplaceFiles to call DefaultTier() so a
+ // generated config.yaml's tier matches the SaaS-vs-self-hosted
+ // boundary (#2910 PR-B). nil-tolerant — the field is unused when
+ // the caller doesn't import templates that need a fresh config
+ // generated.
+ wh *WorkspaceHandler
}
-func NewTemplatesHandler(configsDir string, dockerCli *client.Client) *TemplatesHandler {
- return &TemplatesHandler{configsDir: configsDir, docker: dockerCli}
+// NewTemplatesHandler constructs a TemplatesHandler. wh may be nil for
+// callers that only use the read-only template surfaces (List,
+// ReadFile, ListFiles). Import + ReplaceFiles need wh non-nil so the
+// generated config.yaml picks the SaaS-aware default tier.
+func NewTemplatesHandler(configsDir string, dockerCli *client.Client, wh *WorkspaceHandler) *TemplatesHandler {
+ return &TemplatesHandler{configsDir: configsDir, docker: dockerCli, wh: wh}
}
// modelSpec describes a single supported model on a template: its id (sent
diff --git a/workspace-server/internal/handlers/templates_test.go b/workspace-server/internal/handlers/templates_test.go
index cbae8069..3d75bfd5 100644
--- a/workspace-server/internal/handlers/templates_test.go
+++ b/workspace-server/internal/handlers/templates_test.go
@@ -53,7 +53,7 @@ func TestTemplatesList_EmptyDir(t *testing.T) {
setupTestRedis(t)
tmpDir := t.TempDir()
- handler := NewTemplatesHandler(tmpDir, nil)
+ handler := NewTemplatesHandler(tmpDir, nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@@ -99,7 +99,7 @@ skills:
// Create a directory without config.yaml (should be skipped)
os.MkdirAll(filepath.Join(tmpDir, "no-config"), 0755)
- handler := NewTemplatesHandler(tmpDir, nil)
+ handler := NewTemplatesHandler(tmpDir, nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@@ -160,7 +160,7 @@ skills: []
t.Fatalf("write: %v", err)
}
- handler := NewTemplatesHandler(tmpDir, nil)
+ handler := NewTemplatesHandler(tmpDir, nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest("GET", "/templates", nil)
@@ -237,7 +237,7 @@ skills: []
t.Fatalf("write: %v", err)
}
- handler := NewTemplatesHandler(tmpDir, nil)
+ handler := NewTemplatesHandler(tmpDir, nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest("GET", "/templates", nil)
@@ -315,7 +315,7 @@ skills: []
t.Fatalf("write: %v", err)
}
- handler := NewTemplatesHandler(tmpDir, nil)
+ handler := NewTemplatesHandler(tmpDir, nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest("GET", "/templates", nil)
@@ -434,7 +434,7 @@ skills: []
t.Fatalf("write: %v", err)
}
- handler := NewTemplatesHandler(tmpDir, nil)
+ handler := NewTemplatesHandler(tmpDir, nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest("GET", "/templates", nil)
@@ -512,7 +512,7 @@ skills: []
t.Fatalf("write: %v", err)
}
- handler := NewTemplatesHandler(tmpDir, nil)
+ handler := NewTemplatesHandler(tmpDir, nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest("GET", "/templates", nil)
@@ -555,7 +555,7 @@ skills: []
t.Fatalf("write: %v", err)
}
- handler := NewTemplatesHandler(tmpDir, nil)
+ handler := NewTemplatesHandler(tmpDir, nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest("GET", "/templates", nil)
@@ -589,7 +589,7 @@ skills: []
t.Fatalf("write: %v", err)
}
- handler := NewTemplatesHandler(tmpDir, nil)
+ handler := NewTemplatesHandler(tmpDir, nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest("GET", "/templates", nil)
@@ -661,7 +661,7 @@ skills: []
log.SetOutput(&logBuf)
defer log.SetOutput(prevOutput)
- handler := NewTemplatesHandler(tmpDir, nil)
+ handler := NewTemplatesHandler(tmpDir, nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest("GET", "/templates", nil)
@@ -698,7 +698,7 @@ func TestTemplatesList_NonexistentDir(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
- handler := NewTemplatesHandler("/nonexistent/path/to/templates", nil)
+ handler := NewTemplatesHandler("/nonexistent/path/to/templates", nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@@ -723,7 +723,7 @@ func TestListFiles_InvalidRoot(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
- handler := NewTemplatesHandler(t.TempDir(), nil)
+ handler := NewTemplatesHandler(t.TempDir(), nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@@ -748,7 +748,7 @@ func TestListFiles_WorkspaceNotFound(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
- handler := NewTemplatesHandler(t.TempDir(), nil)
+ handler := NewTemplatesHandler(t.TempDir(), nil, nil)
mock.ExpectQuery("SELECT name FROM workspaces WHERE id =").
WithArgs("ws-nonexist").
@@ -775,7 +775,7 @@ func TestListFiles_FallbackToHost_NoTemplate(t *testing.T) {
setupTestRedis(t)
tmpDir := t.TempDir()
- handler := NewTemplatesHandler(tmpDir, nil) // nil docker = no container
+ handler := NewTemplatesHandler(tmpDir, nil, nil) // nil docker = no container
mock.ExpectQuery("SELECT name FROM workspaces WHERE id =").
WithArgs("ws-fallback").
@@ -815,7 +815,7 @@ func TestListFiles_FallbackToHost_WithTemplate(t *testing.T) {
os.WriteFile(filepath.Join(tmplDir, "config.yaml"), []byte("name: Test Agent\n"), 0644)
os.WriteFile(filepath.Join(tmplDir, "system-prompt.md"), []byte("# prompt"), 0644)
- handler := NewTemplatesHandler(tmpDir, nil)
+ handler := NewTemplatesHandler(tmpDir, nil, nil)
mock.ExpectQuery("SELECT name FROM workspaces WHERE id =").
WithArgs("ws-tmpl").
@@ -849,7 +849,7 @@ func TestReadFile_PathTraversal(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
- handler := NewTemplatesHandler(t.TempDir(), nil)
+ handler := NewTemplatesHandler(t.TempDir(), nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@@ -870,7 +870,7 @@ func TestReadFile_InvalidRoot(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
- handler := NewTemplatesHandler(t.TempDir(), nil)
+ handler := NewTemplatesHandler(t.TempDir(), nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@@ -892,7 +892,7 @@ func TestReadFile_WorkspaceNotFound(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
- handler := NewTemplatesHandler(t.TempDir(), nil)
+ handler := NewTemplatesHandler(t.TempDir(), nil, nil)
mock.ExpectQuery(`SELECT name, COALESCE\(instance_id, ''\), COALESCE\(runtime, ''\) FROM workspaces WHERE id =`).
WithArgs("ws-nf").
@@ -926,7 +926,7 @@ func TestReadFile_FallbackToHost_Success(t *testing.T) {
os.MkdirAll(tmplDir, 0755)
os.WriteFile(filepath.Join(tmplDir, "config.yaml"), []byte("name: Reader Agent\ntier: 1\n"), 0644)
- handler := NewTemplatesHandler(tmpDir, nil)
+ handler := NewTemplatesHandler(tmpDir, nil, nil)
// instance_id="" → SaaS branch skipped → falls through to local
// Docker / template-dir host fallback (the only path the test
@@ -967,7 +967,7 @@ func TestReadFile_FallbackToHost_NotFound(t *testing.T) {
setupTestRedis(t)
tmpDir := t.TempDir()
- handler := NewTemplatesHandler(tmpDir, nil)
+ handler := NewTemplatesHandler(tmpDir, nil, nil)
mock.ExpectQuery(`SELECT name, COALESCE\(instance_id, ''\), COALESCE\(runtime, ''\) FROM workspaces WHERE id =`).
WithArgs("ws-nofile").
@@ -999,7 +999,7 @@ func TestWriteFile_PathTraversal(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
- handler := NewTemplatesHandler(t.TempDir(), nil)
+ handler := NewTemplatesHandler(t.TempDir(), nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@@ -1023,7 +1023,7 @@ func TestWriteFile_InvalidBody(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
- handler := NewTemplatesHandler(t.TempDir(), nil)
+ handler := NewTemplatesHandler(t.TempDir(), nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@@ -1046,7 +1046,7 @@ func TestWriteFile_WorkspaceNotFound(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
- handler := NewTemplatesHandler(t.TempDir(), nil)
+ handler := NewTemplatesHandler(t.TempDir(), nil, nil)
mock.ExpectQuery(`SELECT name, COALESCE\(instance_id, ''\), COALESCE\(runtime, ''\) FROM workspaces WHERE id =`).
WithArgs("ws-wf-nf").
@@ -1080,7 +1080,7 @@ func TestDeleteFile_PathTraversal(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
- handler := NewTemplatesHandler(t.TempDir(), nil)
+ handler := NewTemplatesHandler(t.TempDir(), nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@@ -1101,7 +1101,7 @@ func TestDeleteFile_WorkspaceNotFound(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
- handler := NewTemplatesHandler(t.TempDir(), nil)
+ handler := NewTemplatesHandler(t.TempDir(), nil, nil)
mock.ExpectQuery("SELECT name FROM workspaces WHERE id =").
WithArgs("ws-del-nf").
@@ -1133,7 +1133,7 @@ func TestResolveTemplateDir_ByNormalizedName(t *testing.T) {
tmplDir := filepath.Join(tmpDir, "my-agent")
os.MkdirAll(tmplDir, 0755)
- handler := NewTemplatesHandler(tmpDir, nil)
+ handler := NewTemplatesHandler(tmpDir, nil, nil)
result := handler.resolveTemplateDir("My Agent")
if result != tmplDir {
@@ -1143,7 +1143,7 @@ func TestResolveTemplateDir_ByNormalizedName(t *testing.T) {
func TestResolveTemplateDir_NotFound(t *testing.T) {
tmpDir := t.TempDir()
- handler := NewTemplatesHandler(tmpDir, nil)
+ handler := NewTemplatesHandler(tmpDir, nil, nil)
result := handler.resolveTemplateDir("Nonexistent Agent")
if result != "" {
@@ -1177,7 +1177,7 @@ func TestCWE78_DeleteFile_TraversalVariants(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
- handler := NewTemplatesHandler(t.TempDir(), nil)
+ handler := NewTemplatesHandler(t.TempDir(), nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
diff --git a/workspace-server/internal/handlers/workspace.go b/workspace-server/internal/handlers/workspace.go
index 3b5b4c02..cf210342 100644
--- a/workspace-server/internal/handlers/workspace.go
+++ b/workspace-server/internal/handlers/workspace.go
@@ -148,15 +148,15 @@ func (h *WorkspaceHandler) Create(c *gin.Context) {
id := uuid.New().String()
awarenessNamespace := workspaceAwarenessNamespace(id)
if payload.Tier == 0 {
- // Default to T3 ("Privileged"). T3 gives agents a read_write
- // workspace mount + Docker daemon access — the level most
- // templates need to do real work. Lower tiers (T1 sandboxed,
- // T2 standard) stay available as explicit opt-ins for
- // low-trust agents. Matches the Canvas CreateWorkspaceDialog
- // default for self-hosted hosts (SaaS defaults to T4 via
- // CreateWorkspaceDialog because each SaaS workspace runs on
- // its own sibling EC2).
- payload.Tier = 3
+ // SaaS-aware default. SaaS → T4 (full host access; each
+ // workspace runs on its own sibling EC2 so the tier boundary
+ // is a Docker resource limit on the only container present —
+ // no neighbour to protect from). Self-hosted → T3 (read-write
+ // workspace mount + Docker daemon access, most templates'
+ // baseline). Lower tiers (T1 sandboxed, T2 standard) remain
+ // explicit opt-ins for low-trust agents. Matches the canvas
+ // CreateWorkspaceDialog defaults so the API and the UI agree.
+ payload.Tier = h.DefaultTier()
}
// Detect runtime + default model from template config.yaml when the
diff --git a/workspace-server/internal/handlers/workspace_dispatchers.go b/workspace-server/internal/handlers/workspace_dispatchers.go
index 23237d00..3df25877 100644
--- a/workspace-server/internal/handlers/workspace_dispatchers.go
+++ b/workspace-server/internal/handlers/workspace_dispatchers.go
@@ -35,6 +35,7 @@ import (
"time"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
+ "github.com/Molecule-AI/molecule-monorepo/platform/internal/provlog"
)
// HasProvisioner reports whether either backend (CP or local Docker) is
@@ -49,6 +50,32 @@ func (h *WorkspaceHandler) HasProvisioner() bool {
return h.cpProv != nil || h.provisioner != nil
}
+// IsSaaS reports whether the CP (EC2) provisioner is wired. Each SaaS
+// workspace runs on its own sibling EC2, so the per-workspace tier
+// boundary is a Docker resource limit applied to the only container
+// on that EC2 — there's no neighbour to protect from. Self-hosted
+// runs many workspaces in one Docker daemon on a single host, so
+// the tier-2-by-default safe-neighbour-share posture stays.
+//
+// Tier defaults across Create / OrgImport / canvas EmptyState branch
+// on IsSaaS so SaaS users get T4 (full host access) by default and
+// self-hosted users keep the lower-trust caps.
+func (h *WorkspaceHandler) IsSaaS() bool {
+ return h.cpProv != nil
+}
+
+// DefaultTier is the SaaS-aware default tier. T4 on SaaS (single
+// container per EC2 — full host access matches the boundary), T3 on
+// self-hosted (read-write workspace mount + Docker daemon access,
+// most templates' baseline). Callers default to this when the user
+// hasn't explicitly picked a tier.
+func (h *WorkspaceHandler) DefaultTier() int {
+ if h.IsSaaS() {
+ return 4
+ }
+ return 3
+}
+
// provisionWorkspaceAuto picks the backend (CP for SaaS, local Docker
// for self-hosted) and starts provisioning in a goroutine. Returns true
// when a backend was kicked off, false when neither is wired.
@@ -75,6 +102,14 @@ func (h *WorkspaceHandler) HasProvisioner() bool {
// lives in prepareProvisionContext (shared by both per-backend
// goroutines).
func (h *WorkspaceHandler) provisionWorkspaceAuto(workspaceID, templatePath string, configFiles map[string][]byte, payload models.CreateWorkspacePayload) bool {
+ provlog.Event("provision.start", map[string]any{
+ "workspace_id": workspaceID,
+ "name": payload.Name,
+ "tier": payload.Tier,
+ "runtime": payload.Runtime,
+ "template": payload.Template,
+ "sync": false,
+ })
if h.cpProv != nil {
go h.provisionWorkspaceCP(workspaceID, templatePath, configFiles, payload)
return true
@@ -110,6 +145,14 @@ func (h *WorkspaceHandler) provisionWorkspaceAuto(workspaceID, templatePath stri
// Keep these two helpers in sync — when one grows a new arm (third
// backend, retry semantics), the other should too.
func (h *WorkspaceHandler) provisionWorkspaceAutoSync(workspaceID, templatePath string, configFiles map[string][]byte, payload models.CreateWorkspacePayload) bool {
+ provlog.Event("provision.start", map[string]any{
+ "workspace_id": workspaceID,
+ "name": payload.Name,
+ "tier": payload.Tier,
+ "runtime": payload.Runtime,
+ "template": payload.Template,
+ "sync": true,
+ })
if h.cpProv != nil {
h.provisionWorkspaceCP(workspaceID, templatePath, configFiles, payload)
return true
diff --git a/workspace-server/internal/handlers/workspace_restart.go b/workspace-server/internal/handlers/workspace_restart.go
index 3b3097c4..c5712be5 100644
--- a/workspace-server/internal/handlers/workspace_restart.go
+++ b/workspace-server/internal/handlers/workspace_restart.go
@@ -12,6 +12,7 @@ import (
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
+ "github.com/Molecule-AI/molecule-monorepo/platform/internal/provlog"
"github.com/gin-gonic/gin"
)
@@ -431,6 +432,16 @@ func coalesceRestart(workspaceID string, cycle func()) {
// NPE'd before reaching the reprovision step — which is why every SaaS dead-
// agent incident pre-this-fix required manual restart from canvas.
func (h *WorkspaceHandler) stopForRestart(ctx context.Context, workspaceID string) {
+ backend := "none"
+ if h.provisioner != nil {
+ backend = "docker"
+ } else if h.cpProv != nil {
+ backend = "cp"
+ }
+ provlog.Event("restart.pre_stop", map[string]any{
+ "workspace_id": workspaceID,
+ "backend": backend,
+ })
if h.provisioner != nil {
h.provisioner.Stop(ctx, workspaceID)
return
diff --git a/workspace-server/internal/handlers/workspaces_insert_allowlist_test.go b/workspace-server/internal/handlers/workspaces_insert_allowlist_test.go
new file mode 100644
index 00000000..066c6576
--- /dev/null
+++ b/workspace-server/internal/handlers/workspaces_insert_allowlist_test.go
@@ -0,0 +1,159 @@
+package handlers
+
+import (
+ "go/ast"
+ "go/parser"
+ "go/token"
+ "os"
+ "path/filepath"
+ "sort"
+ "strconv"
+ "strings"
+ "testing"
+)
+
+// TestINSERTworkspacesAllowlist enumerates every function in this
+// package that emits an `INSERT INTO workspaces (` SQL literal, and
+// pins the result against an explicit allowlist. New entries fail the
+// build until a reviewer adds them — forcing the question "what
+// makes this INSERT idempotent?" at PR-review time, not after the
+// next bulk-create leak.
+//
+// Pairs with TestCreateWorkspaceTree_CallsLookupBeforeInsert (the
+// behavior pin for the one bulk path). Together they close the
+// regression class: this test catches "did a new function start
+// inserting workspaces?", that test catches "did the existing bulk
+// path drop its idempotency check?". Either fires immediately when
+// drift happens.
+//
+// Why allowlist rather than pure behavior gate (per memory
+// feedback_behavior_based_ast_gates.md): the bulk-create leak class
+// is small + stable (1 path today), and a behavior gate would have
+// to disambiguate "iterating a YAML array of workspaces" from the
+// many other `for ... range` patterns in a Create handler (config
+// lines, secrets map, channels). Type-info-aware AST analysis would
+// catch the YAML-iteration shape but is heavy. Allowlisting is the
+// minimum-viable pin: any PR that adds a new INSERT site is forced
+// to pause, add an entry here, and document the safety mechanism in
+// the comment alongside.
+//
+// RFC #2867 class 1.
+func TestINSERTworkspacesAllowlist(t *testing.T) {
+ // expected[key] = safety mechanism. Keep the comment pinned to
+ // what makes that function safe — if the safety changes, the
+ // allowlist must be re-reviewed.
+ expected := map[string]string{
+ // org_import.createWorkspaceTree: lookupExistingChild
+ // before INSERT (#2868 phase 3). Also pinned by
+ // TestCreateWorkspaceTree_CallsLookupBeforeInsert.
+ "org_import.go:createWorkspaceTree": "lookup-then-insert via lookupExistingChild",
+ // registry.Register: external workspace registers itself with
+ // its known UUID; INSERT is idempotent via ON CONFLICT (id)
+ // DO UPDATE — re-registration upserts, never duplicates.
+ "registry.go:Register": "ON CONFLICT (id) DO UPDATE",
+ // workspace.Create: single-workspace POST /workspaces from a
+ // human or automation. No iteration; payload describes one
+ // workspace; UUID is server-generated. Caller intent IS to
+ // create, so no idempotency check is needed.
+ "workspace.go:Create": "single-workspace POST, server-generated UUID",
+ }
+
+ actual := map[string]string{}
+
+ wd, err := os.Getwd()
+ if err != nil {
+ t.Fatalf("getwd: %v", err)
+ }
+
+ entries, err := os.ReadDir(wd)
+ if err != nil {
+ t.Fatalf("readdir %s: %v", wd, err)
+ }
+ for _, ent := range entries {
+ name := ent.Name()
+ if ent.IsDir() {
+ continue
+ }
+ if !strings.HasSuffix(name, ".go") {
+ continue
+ }
+ if strings.HasSuffix(name, "_test.go") {
+ continue
+ }
+ path := filepath.Join(wd, name)
+ fset := token.NewFileSet()
+ file, err := parser.ParseFile(fset, path, nil, parser.ParseComments)
+ if err != nil {
+ t.Fatalf("parse %s: %v", path, err)
+ }
+ // For each top-level FuncDecl, walk its body and check for an
+ // `INSERT INTO workspaces (` SQL literal in any CallExpr arg.
+ for _, decl := range file.Decls {
+ fn, ok := decl.(*ast.FuncDecl)
+ if !ok || fn.Body == nil {
+ continue
+ }
+ var foundInsert bool
+ ast.Inspect(fn.Body, func(n ast.Node) bool {
+ lit, ok := n.(*ast.BasicLit)
+ if !ok || lit.Kind != token.STRING {
+ return true
+ }
+ raw := lit.Value
+ if unq, err := strconv.Unquote(raw); err == nil {
+ raw = unq
+ }
+ if workspacesInsertRE.MatchString(raw) {
+ foundInsert = true
+ return false
+ }
+ return true
+ })
+ if foundInsert {
+ key := name + ":" + fn.Name.Name
+ actual[key] = "(observed via AST walk)"
+ }
+ }
+ }
+
+ // Compute set diffs so failures point at the specific drift.
+ missing := []string{}
+ unexpected := []string{}
+ for k := range expected {
+ if _, ok := actual[k]; !ok {
+ missing = append(missing, k)
+ }
+ }
+ for k := range actual {
+ if _, ok := expected[k]; !ok {
+ unexpected = append(unexpected, k)
+ }
+ }
+ sort.Strings(missing)
+ sort.Strings(unexpected)
+
+ if len(unexpected) > 0 {
+ t.Errorf(`new function(s) emit `+"`INSERT INTO workspaces (`"+` and aren't in the allowlist:
+ %s
+
+If this is a legitimate addition, add an entry to expected[] in this test
+with the safety mechanism pinned in the comment alongside (lookup-then-
+insert / ON CONFLICT / single-workspace path / etc.). The bulk-create
+regression class needs explicit per-handler review, not silent drift.
+
+Reference: RFC #2867 class 1, sibling test
+TestCreateWorkspaceTree_CallsLookupBeforeInsert.`,
+ strings.Join(unexpected, "\n "))
+ }
+ if len(missing) > 0 {
+ t.Errorf(`expected function(s) no longer emit `+"`INSERT INTO workspaces (`"+`:
+ %s
+
+Either the function was renamed/deleted (update the allowlist) or the
+INSERT was moved out (verify the new home is also covered). Don't just
+delete the entry — confirm the safety mechanism is still in place
+elsewhere or that the workspace-create path was intentionally
+restructured.`,
+ strings.Join(missing, "\n "))
+ }
+}
diff --git a/workspace-server/internal/metrics/metrics.go b/workspace-server/internal/metrics/metrics.go
index 7f0852a8..6632d524 100644
--- a/workspace-server/internal/metrics/metrics.go
+++ b/workspace-server/internal/metrics/metrics.go
@@ -5,14 +5,15 @@
//
// Exposed metrics:
//
-// molecule_http_requests_total{method,path,status} - counter
-// molecule_http_request_duration_seconds{method,path} - counter (sum, for avg rate)
-// molecule_websocket_connections_active - gauge
-// go_goroutines - gauge
-// go_memstats_alloc_bytes - gauge
-// go_memstats_sys_bytes - gauge
-// go_memstats_heap_inuse_bytes - gauge
-// go_gc_duration_seconds_total - counter
+// molecule_http_requests_total{method,path,status} - counter
+// molecule_http_request_duration_seconds{method,path} - counter (sum, for avg rate)
+// molecule_websocket_connections_active - gauge
+// molecule_pending_uploads_swept_total{outcome} - counter (acked|expired|error)
+// go_goroutines - gauge
+// go_memstats_alloc_bytes - gauge
+// go_memstats_sys_bytes - gauge
+// go_memstats_heap_inuse_bytes - gauge
+// go_gc_duration_seconds_total - counter
package metrics
import (
@@ -38,6 +39,12 @@ var (
reqCounts = map[reqKey]int64{} // molecule_http_requests_total
reqDurSums = map[reqKey]float64{} // sum of durations (seconds)
activeWSConns int64 // molecule_websocket_connections_active
+
+ // pendinguploads sweeper counters — atomic so the sweeper goroutine
+ // doesn't contend with the /metrics handler.
+ pendingUploadsSweptAcked int64 // molecule_pending_uploads_swept_total{outcome="acked"}
+ pendingUploadsSweptExpired int64 // molecule_pending_uploads_swept_total{outcome="expired"}
+ pendingUploadsSweepErrors int64 // molecule_pending_uploads_swept_total{outcome="error"}
)
// Middleware records per-request counts and latency.
@@ -76,6 +83,50 @@ func TrackWSConnect() { atomic.AddInt64(&activeWSConns, 1) }
// Call from the WebSocket disconnect / cleanup path.
func TrackWSDisconnect() { atomic.AddInt64(&activeWSConns, -1) }
+// phantomBusyResets is the cumulative count of workspace rows the
+// phantom-busy sweep reset (active_tasks=0 → active_tasks=0+counter
+// cleared). Surfaced as molecule_phantom_busy_resets_total — a high
+// reset rate signals a regression in task-lifecycle accounting (most
+// often: missing env vars cause claude --print to time out, the
+// agent loop never decrements active_tasks, and the sweep cleans up
+// the counter ~10 min later). Issue #2865.
+var phantomBusyResets int64
+
+// TrackPhantomBusyReset increments the phantom-busy reset counter.
+// Called from sweepPhantomBusy in workspace-server/internal/scheduler/
+// after each row whose active_tasks was reset to 0. Idempotent +
+// goroutine-safe; called once per row per sweep tick.
+func TrackPhantomBusyReset() { atomic.AddInt64(&phantomBusyResets, 1) }
+
+// PendingUploadsSwept records a successful sweep cycle. acked/expired
+// are added to the per-outcome counters so dashboards can spot the
+// stuck-fetch pattern (high expired, low acked) vs healthy churn.
+func PendingUploadsSwept(acked, expired int) {
+ if acked > 0 {
+ atomic.AddInt64(&pendingUploadsSweptAcked, int64(acked))
+ }
+ if expired > 0 {
+ atomic.AddInt64(&pendingUploadsSweptExpired, int64(expired))
+ }
+}
+
+// PendingUploadsSweepError records a sweeper-cycle failure (transient
+// DB error etc). Counted separately so the rate of errored sweeps is
+// observable independent of how many rows the successful sweeps deleted.
+func PendingUploadsSweepError() {
+ atomic.AddInt64(&pendingUploadsSweepErrors, 1)
+}
+
+// PendingUploadsSweepCounts returns the current (acked, expired, error)
+// totals. Exposed for tests that need a deterministic delta probe of
+// the sweeper's metric writes — the /metrics endpoint is the production
+// observability surface; this is a unit-test escape hatch.
+func PendingUploadsSweepCounts() (acked, expired, errored int64) {
+ return atomic.LoadInt64(&pendingUploadsSweptAcked),
+ atomic.LoadInt64(&pendingUploadsSweptExpired),
+ atomic.LoadInt64(&pendingUploadsSweepErrors)
+}
+
// Handler returns a Gin handler that serialises all collected metrics in
// Prometheus text exposition format (v0.0.4). Mount this at GET /metrics.
func Handler() gin.HandlerFunc {
@@ -144,6 +195,21 @@ func Handler() gin.HandlerFunc {
writeln(w, "# HELP molecule_websocket_connections_active Number of active WebSocket connections.")
writeln(w, "# TYPE molecule_websocket_connections_active gauge")
fmt.Fprintf(w, "molecule_websocket_connections_active %d\n", atomic.LoadInt64(&activeWSConns))
+
+ // ── Molecule AI scheduler ──────────────────────────────────────────────
+ writeln(w, "# HELP molecule_phantom_busy_resets_total Cumulative count of workspace rows reset by the phantom-busy sweep (active_tasks cleared after >10 min of activity_log silence). High reset rate signals task-lifecycle accounting regressions — see issue #2865.")
+ writeln(w, "# TYPE molecule_phantom_busy_resets_total counter")
+ fmt.Fprintf(w, "molecule_phantom_busy_resets_total %d\n", atomic.LoadInt64(&phantomBusyResets))
+
+ // ── Pending-uploads sweeper ────────────────────────────────────────────
+ writeln(w, "# HELP molecule_pending_uploads_swept_total Pending-uploads rows deleted by the GC sweeper, by outcome.")
+ writeln(w, "# TYPE molecule_pending_uploads_swept_total counter")
+ fmt.Fprintf(w, "molecule_pending_uploads_swept_total{outcome=\"acked\"} %d\n",
+ atomic.LoadInt64(&pendingUploadsSweptAcked))
+ fmt.Fprintf(w, "molecule_pending_uploads_swept_total{outcome=\"expired\"} %d\n",
+ atomic.LoadInt64(&pendingUploadsSweptExpired))
+ fmt.Fprintf(w, "molecule_pending_uploads_swept_total{outcome=\"error\"} %d\n",
+ atomic.LoadInt64(&pendingUploadsSweepErrors))
}
}
diff --git a/workspace-server/internal/metrics/metrics_test.go b/workspace-server/internal/metrics/metrics_test.go
new file mode 100644
index 00000000..d722a1bd
--- /dev/null
+++ b/workspace-server/internal/metrics/metrics_test.go
@@ -0,0 +1,104 @@
+package metrics
+
+// Tests for the phantom-busy reset counter wired up by issue #2865.
+// The counter is exposed at /metrics as
+// molecule_phantom_busy_resets_total. A high steady-state value
+// signals task-lifecycle accounting regressions in the agent loop —
+// see scheduler.sweepPhantomBusy for the writer.
+
+import (
+ "net/http/httptest"
+ "strings"
+ "sync"
+ "sync/atomic"
+ "testing"
+
+ "github.com/gin-gonic/gin"
+)
+
+// resetForTest zeroes the counter so a single test's TrackPhantomBusyReset
+// calls don't compound onto a previous test's run. metrics.go's package-
+// level state means every test that touches the counter must reset.
+func resetForTest() {
+ atomic.StoreInt64(&phantomBusyResets, 0)
+}
+
+func TestTrackPhantomBusyReset_IncrementsCounter(t *testing.T) {
+ resetForTest()
+ for i := 0; i < 7; i++ {
+ TrackPhantomBusyReset()
+ }
+ got := atomic.LoadInt64(&phantomBusyResets)
+ if got != 7 {
+ t.Errorf("counter after 7 calls = %d, want 7", got)
+ }
+}
+
+func TestTrackPhantomBusyReset_RaceFreeUnderConcurrentWrites(t *testing.T) {
+ resetForTest()
+ var wg sync.WaitGroup
+ const goroutines = 50
+ const callsPerGoroutine = 200
+ wg.Add(goroutines)
+ for i := 0; i < goroutines; i++ {
+ go func() {
+ defer wg.Done()
+ for j := 0; j < callsPerGoroutine; j++ {
+ TrackPhantomBusyReset()
+ }
+ }()
+ }
+ wg.Wait()
+ want := int64(goroutines * callsPerGoroutine)
+ got := atomic.LoadInt64(&phantomBusyResets)
+ if got != want {
+ t.Errorf("counter under concurrent writes = %d, want %d (lost increments → atomic broken)",
+ got, want)
+ }
+}
+
+func TestHandler_ExposesPhantomBusyResetsCounter(t *testing.T) {
+ resetForTest()
+ for i := 0; i < 3; i++ {
+ TrackPhantomBusyReset()
+ }
+
+ gin.SetMode(gin.TestMode)
+ r := gin.New()
+ r.GET("/metrics", Handler())
+
+ w := httptest.NewRecorder()
+ req := httptest.NewRequest("GET", "/metrics", nil)
+ r.ServeHTTP(w, req)
+
+ body := w.Body.String()
+ // HELP + TYPE lines must precede the metric (Prometheus text exposition format).
+ if !strings.Contains(body, "# HELP molecule_phantom_busy_resets_total") {
+ t.Errorf("metrics output missing HELP line for molecule_phantom_busy_resets_total:\n%s", body)
+ }
+ if !strings.Contains(body, "# TYPE molecule_phantom_busy_resets_total counter") {
+ t.Errorf("metrics output missing TYPE line for molecule_phantom_busy_resets_total:\n%s", body)
+ }
+ if !strings.Contains(body, "molecule_phantom_busy_resets_total 3\n") {
+ t.Errorf("metrics output missing counter value 3:\n%s", body)
+ }
+}
+
+func TestHandler_PhantomBusyResetsZeroByDefault(t *testing.T) {
+ // Fresh process should report 0 — pin the contract so a future
+ // refactor that lazy-inits the counter to nil doesn't silently
+ // drop the metric from /metrics.
+ resetForTest()
+
+ gin.SetMode(gin.TestMode)
+ r := gin.New()
+ r.GET("/metrics", Handler())
+
+ w := httptest.NewRecorder()
+ req := httptest.NewRequest("GET", "/metrics", nil)
+ r.ServeHTTP(w, req)
+
+ if !strings.Contains(w.Body.String(), "molecule_phantom_busy_resets_total 0\n") {
+ t.Errorf("metric must report 0 by default:\n%s", w.Body.String())
+ }
+}
diff --git a/workspace-server/internal/pendinguploads/export_test.go b/workspace-server/internal/pendinguploads/export_test.go
new file mode 100644
index 00000000..c758b629
--- /dev/null
+++ b/workspace-server/internal/pendinguploads/export_test.go
@@ -0,0 +1,17 @@
+package pendinguploads
+
+import (
+ "context"
+ "time"
+)
+
+// StartSweeperWithIntervalForTest exposes startSweeperWithInterval to
+// the external test package. The production code uses StartSweeper
+// (which pins the canonical SweepInterval); tests pin a short interval
+// to exercise the ticker-driven cycle without burning real wall-clock
+// time. The Go convention `export_test.go` keeps this seam OUT of the
+// production binary — files ending in _test.go are stripped at build
+// time, so this re-export only exists during `go test`.
+func StartSweeperWithIntervalForTest(ctx context.Context, storage Storage, ackRetention, interval time.Duration) {
+ startSweeperWithInterval(ctx, storage, ackRetention, interval)
+}
diff --git a/workspace-server/internal/pendinguploads/storage.go b/workspace-server/internal/pendinguploads/storage.go
index 0289c9b8..c4bcaf92 100644
--- a/workspace-server/internal/pendinguploads/storage.go
+++ b/workspace-server/internal/pendinguploads/storage.go
@@ -72,6 +72,28 @@ type Record struct {
ExpiresAt time.Time
}
+// SweepResult is the per-cycle accounting from Sweep. Both counts are
+// non-negative; Total is just Acked + Expired for log/metrics
+// convenience. Phase 3 metrics expose these as separate counters so
+// dashboards can spot a stuck-ack pattern (high Expired, low Acked) vs.
+// healthy churn (Acked dominates).
+type SweepResult struct {
+ Acked int // rows deleted because acked_at + retention elapsed
+ Expired int // rows deleted because expires_at < now AND never acked
+}
+
+// Total returns the sum of Acked + Expired — convenient for log lines.
+func (r SweepResult) Total() int { return r.Acked + r.Expired }
+
+// PutItem is one file in a PutBatch call. Same per-field rules as Put —
+// empty content, missing filename, or content > MaxFileBytes is rejected
+// up-front so a bad item in the batch doesn't poison the transaction.
+type PutItem struct {
+ Content []byte
+ Filename string
+ Mimetype string
+}
+
// Storage is the platform-side persistence boundary for poll-mode chat
// uploads. The Postgres implementation backs all callers today; an S3-
// backed implementation can drop in once RFC #2789 lands by making
@@ -86,6 +108,17 @@ type Storage interface {
// content > MaxFileBytes return errors before any DB write.
Put(ctx context.Context, workspaceID uuid.UUID, content []byte, filename, mimetype string) (uuid.UUID, error)
+ // PutBatch inserts N uploads atomically — either all rows commit or
+ // none do. Returns assigned file_ids in input order on success;
+ // returns an error and does NOT insert any row on failure.
+ //
+ // Use this from multi-file upload handlers so a per-row failure on
+ // row K doesn't leave rows 1..K-1 orphaned in the table (a client
+ // retry would then double-insert them on success). All-or-nothing
+ // semantics match the multipart request the canvas sends — either
+ // the whole batch succeeds or the user re-uploads.
+ PutBatch(ctx context.Context, workspaceID uuid.UUID, items []PutItem) ([]uuid.UUID, error)
+
// Get returns the full row including content. Returns ErrNotFound
// when the row is absent, acked, or past expires_at. Caller should
// not differentiate the three cases in the response — from the
@@ -103,6 +136,18 @@ type Storage interface {
// absent or already expired; on already-acked, returns nil so
// the workspace's at-least-once retry succeeds without an error.
Ack(ctx context.Context, fileID uuid.UUID) error
+
+ // Sweep deletes rows past their retention window:
+ // - acked rows older than ackRetention (give the workspace a
+ // window to re-fetch in case it processed but failed to write
+ // the file before crashing — at-least-once behavior).
+ // - unacked rows past expires_at (the platform's hard TTL — 24h
+ // by default; a workspace that hasn't fetched by then is
+ // considered dead from the upload's perspective).
+ // Returns the per-category deletion counts for observability.
+ // Errors are surfaced to the caller; a transient DB error must NOT
+ // crash the sweeper loop (it just retries on the next tick).
+ Sweep(ctx context.Context, ackRetention time.Duration) (SweepResult, error)
}
// PostgresStorage is the production Storage implementation backed by
@@ -149,6 +194,64 @@ func (p *PostgresStorage) Put(ctx context.Context, workspaceID uuid.UUID, conten
return fileID, nil
}
+// PutBatch inserts every item atomically inside a single Tx. On any
+// per-item validation or per-row INSERT error the Tx is rolled back and
+// the caller sees the error without any rows committed — no partial
+// orphans for a multi-file upload that fails mid-batch.
+//
+// Validation runs BEFORE BEGIN so a bad input shape (empty content,
+// over-cap size) doesn't even open a Tx. Once we're in the Tx, the only
+// failures expected are DB-side (broken connection, statement timeout)
+// — those abort cleanly via Rollback.
+func (p *PostgresStorage) PutBatch(ctx context.Context, workspaceID uuid.UUID, items []PutItem) ([]uuid.UUID, error) {
+ if len(items) == 0 {
+ return nil, nil
+ }
+ for i, it := range items {
+ if len(it.Content) == 0 {
+ return nil, fmt.Errorf("pendinguploads: item %d: empty content", i)
+ }
+ if len(it.Content) > MaxFileBytes {
+ return nil, ErrTooLarge
+ }
+ if it.Filename == "" {
+ return nil, fmt.Errorf("pendinguploads: item %d: empty filename", i)
+ }
+ if len(it.Filename) > 100 {
+ return nil, fmt.Errorf("pendinguploads: item %d: filename exceeds 100 chars", i)
+ }
+ }
+
+ tx, err := p.db.BeginTx(ctx, nil)
+ if err != nil {
+ return nil, fmt.Errorf("pendinguploads: begin tx: %w", err)
+ }
+ // Defer-rollback is safe even after a successful Commit — the second
+ // Rollback is a no-op (database/sql tracks tx state).
+ defer func() {
+ _ = tx.Rollback()
+ }()
+
+ out := make([]uuid.UUID, 0, len(items))
+ for i, it := range items {
+ var fid uuid.UUID
+ err := tx.QueryRowContext(ctx, `
+ INSERT INTO pending_uploads (workspace_id, content, size_bytes, filename, mimetype)
+ VALUES ($1, $2, $3, $4, $5)
+ RETURNING file_id
+ `, workspaceID, it.Content, int64(len(it.Content)), it.Filename, it.Mimetype).Scan(&fid)
+ if err != nil {
+ return nil, fmt.Errorf("pendinguploads: batch insert item %d: %w", i, err)
+ }
+ out = append(out, fid)
+ }
+
+ if err := tx.Commit(); err != nil {
+ return nil, fmt.Errorf("pendinguploads: commit batch: %w", err)
+ }
+ return out, nil
+}
+
func (p *PostgresStorage) Get(ctx context.Context, fileID uuid.UUID) (Record, error) {
// The expires_at + acked_at filter in the WHERE clause means a
// caller sees ErrNotFound for absent / acked / expired without
@@ -251,3 +354,41 @@ func (p *PostgresStorage) Ack(ctx context.Context, fileID uuid.UUID) error {
// the workspace's intent ("I'm done with this file") was honored.
return nil
}
+
+// Sweep deletes acked rows past their retention window plus any
+// unacked rows whose hard TTL has elapsed. Single round-trip: a CTE
+// captures the deletion in one DELETE … RETURNING and the outer
+// SELECT sums by category. Cheaper and tighter than two round trips,
+// and atomic w.r.t. concurrent writes (the WHERE predicate sees a
+// consistent snapshot via Postgres MVCC).
+//
+// ackRetention=0 deletes all acked rows immediately; values <0 are
+// clamped to 0 for safety. Caller defaults are documented at
+// StartSweeper's DefaultAckRetention.
+func (p *PostgresStorage) Sweep(ctx context.Context, ackRetention time.Duration) (SweepResult, error) {
+ if ackRetention < 0 {
+ ackRetention = 0
+ }
+ // make_interval expects integer seconds — Postgres accepts a
+ // floating point but we deliberately round to the nearest second
+ // so test fixtures pin a deterministic value across PG versions.
+ retentionSecs := int64(ackRetention.Seconds())
+
+ var acked, expired int
+ err := p.db.QueryRowContext(ctx, `
+ WITH deleted AS (
+ DELETE FROM pending_uploads
+ WHERE (acked_at IS NOT NULL AND acked_at < now() - make_interval(secs => $1))
+ OR (acked_at IS NULL AND expires_at < now())
+ RETURNING (acked_at IS NOT NULL) AS was_acked
+ )
+ SELECT
+ COALESCE(SUM(CASE WHEN was_acked THEN 1 ELSE 0 END), 0)::int AS acked,
+ COALESCE(SUM(CASE WHEN NOT was_acked THEN 1 ELSE 0 END), 0)::int AS expired
+ FROM deleted
+ `, retentionSecs).Scan(&acked, &expired)
+ if err != nil {
+ return SweepResult{}, fmt.Errorf("pendinguploads: sweep: %w", err)
+ }
+ return SweepResult{Acked: acked, Expired: expired}, nil
+}
diff --git a/workspace-server/internal/pendinguploads/storage_test.go b/workspace-server/internal/pendinguploads/storage_test.go
index 45f797c7..c6793c10 100644
--- a/workspace-server/internal/pendinguploads/storage_test.go
+++ b/workspace-server/internal/pendinguploads/storage_test.go
@@ -71,6 +71,18 @@ const (
SELECT acked_at FROM pending_uploads
WHERE file_id = $1 AND expires_at > now()
`
+ sweepSQL = `
+ WITH deleted AS (
+ DELETE FROM pending_uploads
+ WHERE (acked_at IS NOT NULL AND acked_at < now() - make_interval(secs => $1))
+ OR (acked_at IS NULL AND expires_at < now())
+ RETURNING (acked_at IS NOT NULL) AS was_acked
+ )
+ SELECT
+ COALESCE(SUM(CASE WHEN was_acked THEN 1 ELSE 0 END), 0)::int AS acked,
+ COALESCE(SUM(CASE WHEN NOT was_acked THEN 1 ELSE 0 END), 0)::int AS expired
+ FROM deleted
+ `
)
// ----- Put ------------------------------------------------------------------
@@ -398,3 +410,324 @@ func TestAck_DBErrorOnDisambiguate_Wrapped(t *testing.T) {
t.Fatalf("expected wrapped disambiguate error, got %v", err)
}
}
+
+// ----- Sweep ----------------------------------------------------------------
+
+func TestSweep_DeletesAckedAndExpired_ReturnsCounts(t *testing.T) {
+ db, mock := newMockDB(t)
+ store := pendinguploads.NewPostgres(db)
+
+ mock.ExpectQuery(sweepSQL).
+ WithArgs(int64(3600)). // 1h retention
+ WillReturnRows(sqlmock.NewRows([]string{"acked", "expired"}).AddRow(7, 2))
+
+ res, err := store.Sweep(context.Background(), time.Hour)
+ if err != nil {
+ t.Fatalf("Sweep: %v", err)
+ }
+ if res.Acked != 7 || res.Expired != 2 || res.Total() != 9 {
+ t.Errorf("got %+v want acked=7 expired=2 total=9", res)
+ }
+}
+
+func TestSweep_NothingToDelete_ReturnsZero(t *testing.T) {
+ db, mock := newMockDB(t)
+ store := pendinguploads.NewPostgres(db)
+
+ mock.ExpectQuery(sweepSQL).
+ WithArgs(int64(3600)).
+ WillReturnRows(sqlmock.NewRows([]string{"acked", "expired"}).AddRow(0, 0))
+
+ res, err := store.Sweep(context.Background(), time.Hour)
+ if err != nil {
+ t.Fatalf("Sweep: %v", err)
+ }
+ if res.Total() != 0 {
+ t.Errorf("got %+v, want zero result", res)
+ }
+}
+
+func TestSweep_NegativeRetentionClampedToZero(t *testing.T) {
+ db, mock := newMockDB(t)
+ store := pendinguploads.NewPostgres(db)
+
+ // Negative retention must clamp to 0; the SQL gets `secs => 0` so an
+ // acked-just-now row is eligible for deletion immediately. Pinned
+ // here because passing the raw negative through `make_interval` would
+ // silently shift acked_at → future and effectively retain rows
+ // forever — exactly the wrong behavior for a "delete more aggressively"
+ // caller.
+ mock.ExpectQuery(sweepSQL).
+ WithArgs(int64(0)).
+ WillReturnRows(sqlmock.NewRows([]string{"acked", "expired"}).AddRow(3, 0))
+
+ res, err := store.Sweep(context.Background(), -1*time.Second)
+ if err != nil {
+ t.Fatalf("Sweep: %v", err)
+ }
+ if res.Acked != 3 {
+ t.Errorf("got %+v want acked=3", res)
+ }
+}
+
+func TestSweep_ZeroRetentionImmediatelyDeletesAcked(t *testing.T) {
+ db, mock := newMockDB(t)
+ store := pendinguploads.NewPostgres(db)
+
+ mock.ExpectQuery(sweepSQL).
+ WithArgs(int64(0)).
+ WillReturnRows(sqlmock.NewRows([]string{"acked", "expired"}).AddRow(5, 1))
+
+ res, err := store.Sweep(context.Background(), 0)
+ if err != nil {
+ t.Fatalf("Sweep: %v", err)
+ }
+ if res.Acked != 5 || res.Expired != 1 {
+ t.Errorf("got %+v want acked=5 expired=1", res)
+ }
+}
+
+func TestSweep_DBError_Wrapped(t *testing.T) {
+ db, mock := newMockDB(t)
+ store := pendinguploads.NewPostgres(db)
+
+ mock.ExpectQuery(sweepSQL).
+ WithArgs(int64(60)).
+ WillReturnError(errors.New("connection lost"))
+
+ _, err := store.Sweep(context.Background(), time.Minute)
+ if err == nil || !strings.Contains(err.Error(), "sweep") {
+ t.Fatalf("expected wrapped sweep error, got %v", err)
+ }
+}
+
+func TestSweepResult_TotalSumsCounts(t *testing.T) {
+ r := pendinguploads.SweepResult{Acked: 4, Expired: 3}
+ if r.Total() != 7 {
+ t.Errorf("Total = %d, want 7", r.Total())
+ }
+ z := pendinguploads.SweepResult{}
+ if z.Total() != 0 {
+ t.Errorf("zero Total = %d, want 0", z.Total())
+ }
+}
+
+// ----- PutBatch -------------------------------------------------------------
+//
+// PutBatch is the multi-file atomic insert path used by uploadPollMode in
+// chat_files.go. The contract that callers rely on:
+//
+// - Either ALL rows commit, or NONE do — a per-row INSERT failure must
+// leave the table unchanged (no orphaned rows from a half-applied batch).
+// - Per-item validation runs BEFORE the Tx opens so a bad input shape
+// never wastes a BEGIN round-trip.
+// - Returned []uuid.UUID is in input order — handler maps response back
+// to the multipart Files[i].
+//
+// sqlmock's ExpectBegin / ExpectQuery / ExpectCommit / ExpectRollback let us
+// pin the exact tx-lifecycle shape; if a future refactor swaps Begin for
+// BeginTx-with-options, the test fails until we re-pin.
+
+func TestPutBatch_HappyPath_AllCommitInOrder(t *testing.T) {
+ db, mock := newMockDB(t)
+ store := pendinguploads.NewPostgres(db)
+
+ wsID := uuid.New()
+ id1, id2, id3 := uuid.New(), uuid.New(), uuid.New()
+
+ mock.ExpectBegin()
+ mock.ExpectQuery(insertSQL).
+ WithArgs(wsID, []byte("aaa"), int64(3), "a.txt", "text/plain").
+ WillReturnRows(sqlmock.NewRows([]string{"file_id"}).AddRow(id1))
+ mock.ExpectQuery(insertSQL).
+ WithArgs(wsID, []byte("bbbb"), int64(4), "b.bin", "application/octet-stream").
+ WillReturnRows(sqlmock.NewRows([]string{"file_id"}).AddRow(id2))
+ mock.ExpectQuery(insertSQL).
+ WithArgs(wsID, []byte("ccccc"), int64(5), "c.pdf", "application/pdf").
+ WillReturnRows(sqlmock.NewRows([]string{"file_id"}).AddRow(id3))
+ mock.ExpectCommit()
+ // Rollback after Commit is a no-op in database/sql; sqlmock allows it
+ // when ExpectCommit was already matched, so we don't need to expect it.
+
+ got, err := store.PutBatch(context.Background(), wsID, []pendinguploads.PutItem{
+ {Content: []byte("aaa"), Filename: "a.txt", Mimetype: "text/plain"},
+ {Content: []byte("bbbb"), Filename: "b.bin", Mimetype: "application/octet-stream"},
+ {Content: []byte("ccccc"), Filename: "c.pdf", Mimetype: "application/pdf"},
+ })
+ if err != nil {
+ t.Fatalf("PutBatch: %v", err)
+ }
+ if len(got) != 3 || got[0] != id1 || got[1] != id2 || got[2] != id3 {
+ t.Errorf("ids out of order or missing: got %v want [%s %s %s]", got, id1, id2, id3)
+ }
+ if err := mock.ExpectationsWereMet(); err != nil {
+ t.Errorf("expectations: %v", err)
+ }
+}
+
+func TestPutBatch_EmptyItems_NoTxNoError(t *testing.T) {
+ db, _ := newMockDB(t) // zero expectations — must NOT round-trip
+ store := pendinguploads.NewPostgres(db)
+
+ got, err := store.PutBatch(context.Background(), uuid.New(), nil)
+ if err != nil {
+ t.Fatalf("expected nil error on empty batch, got %v", err)
+ }
+ if got != nil {
+ t.Errorf("expected nil ids on empty batch, got %v", got)
+ }
+}
+
+func TestPutBatch_RejectsEmptyContent_NoTx(t *testing.T) {
+ db, _ := newMockDB(t)
+ store := pendinguploads.NewPostgres(db)
+
+ _, err := store.PutBatch(context.Background(), uuid.New(), []pendinguploads.PutItem{
+ {Content: []byte("ok"), Filename: "a.txt"},
+ {Content: nil, Filename: "b.txt"},
+ })
+ if err == nil || !strings.Contains(err.Error(), "item 1") || !strings.Contains(err.Error(), "empty content") {
+ t.Fatalf("expected item-1 empty-content error, got %v", err)
+ }
+}
+
+func TestPutBatch_RejectsOversize_ReturnsErrTooLarge(t *testing.T) {
+ db, _ := newMockDB(t)
+ store := pendinguploads.NewPostgres(db)
+
+ too := make([]byte, pendinguploads.MaxFileBytes+1)
+ _, err := store.PutBatch(context.Background(), uuid.New(), []pendinguploads.PutItem{
+ {Content: []byte("ok"), Filename: "small.txt"},
+ {Content: too, Filename: "huge.bin"},
+ })
+ if !errors.Is(err, pendinguploads.ErrTooLarge) {
+ t.Fatalf("expected ErrTooLarge, got %v", err)
+ }
+}
+
+func TestPutBatch_RejectsEmptyFilename_NoTx(t *testing.T) {
+ db, _ := newMockDB(t)
+ store := pendinguploads.NewPostgres(db)
+
+ _, err := store.PutBatch(context.Background(), uuid.New(), []pendinguploads.PutItem{
+ {Content: []byte("hi"), Filename: ""},
+ })
+ if err == nil || !strings.Contains(err.Error(), "item 0") || !strings.Contains(err.Error(), "empty filename") {
+ t.Fatalf("expected item-0 empty-filename error, got %v", err)
+ }
+}
+
+func TestPutBatch_RejectsLongFilename_NoTx(t *testing.T) {
+ db, _ := newMockDB(t)
+ store := pendinguploads.NewPostgres(db)
+
+ long := strings.Repeat("z", 101)
+ _, err := store.PutBatch(context.Background(), uuid.New(), []pendinguploads.PutItem{
+ {Content: []byte("hi"), Filename: "ok.txt"},
+ {Content: []byte("hi"), Filename: long},
+ })
+ if err == nil || !strings.Contains(err.Error(), "item 1") || !strings.Contains(err.Error(), "exceeds 100 chars") {
+ t.Fatalf("expected item-1 too-long-filename error, got %v", err)
+ }
+}
+
+func TestPutBatch_BeginTxError_Wrapped(t *testing.T) {
+ db, mock := newMockDB(t)
+ store := pendinguploads.NewPostgres(db)
+
+ mock.ExpectBegin().WillReturnError(errors.New("conn refused"))
+
+ _, err := store.PutBatch(context.Background(), uuid.New(), []pendinguploads.PutItem{
+ {Content: []byte("hi"), Filename: "a.txt"},
+ })
+ if err == nil || !strings.Contains(err.Error(), "begin tx") {
+ t.Fatalf("expected wrapped begin-tx error, got %v", err)
+ }
+ if err := mock.ExpectationsWereMet(); err != nil {
+ t.Errorf("expectations: %v", err)
+ }
+}
+
+func TestPutBatch_RollsBackOnPerRowError_NoCommit(t *testing.T) {
+ // First INSERT succeeds, second errors. PutBatch MUST NOT issue
+ // Commit; the deferred Rollback unwinds row 1 so neither row commits.
+ // This is the contract that prevents orphan rows on a failed batch.
+ db, mock := newMockDB(t)
+ store := pendinguploads.NewPostgres(db)
+
+ wsID := uuid.New()
+ id1 := uuid.New()
+
+ mock.ExpectBegin()
+ mock.ExpectQuery(insertSQL).
+ WithArgs(wsID, []byte("aaa"), int64(3), "a.txt", "").
+ WillReturnRows(sqlmock.NewRows([]string{"file_id"}).AddRow(id1))
+ mock.ExpectQuery(insertSQL).
+ WithArgs(wsID, []byte("bb"), int64(2), "b.txt", "").
+ WillReturnError(errors.New("statement timeout"))
+ // Critical: Rollback expected, NOT Commit. If a future refactor
+ // accidentally swallows the per-row error and Commits anyway, this
+ // test fails because the unmet ExpectCommit-vs-Rollback shape diverges.
+ mock.ExpectRollback()
+
+ _, err := store.PutBatch(context.Background(), wsID, []pendinguploads.PutItem{
+ {Content: []byte("aaa"), Filename: "a.txt"},
+ {Content: []byte("bb"), Filename: "b.txt"},
+ })
+ if err == nil || !strings.Contains(err.Error(), "batch insert item 1") {
+ t.Fatalf("expected wrapped per-row insert error, got %v", err)
+ }
+ if err := mock.ExpectationsWereMet(); err != nil {
+ t.Errorf("expectations (must rollback, no commit): %v", err)
+ }
+}
+
+func TestPutBatch_RollsBackOnFirstRowError(t *testing.T) {
+ // Edge case: very first INSERT fails. No rows ever staged — but the
+ // Tx still needs to roll back to release the snapshot.
+ db, mock := newMockDB(t)
+ store := pendinguploads.NewPostgres(db)
+
+ wsID := uuid.New()
+ mock.ExpectBegin()
+ mock.ExpectQuery(insertSQL).
+ WithArgs(wsID, []byte("oops"), int64(4), "a.txt", "").
+ WillReturnError(errors.New("constraint violation"))
+ mock.ExpectRollback()
+
+ _, err := store.PutBatch(context.Background(), wsID, []pendinguploads.PutItem{
+ {Content: []byte("oops"), Filename: "a.txt"},
+ })
+ if err == nil || !strings.Contains(err.Error(), "batch insert item 0") {
+ t.Fatalf("expected wrapped item-0 insert error, got %v", err)
+ }
+ if err := mock.ExpectationsWereMet(); err != nil {
+ t.Errorf("expectations: %v", err)
+ }
+}
+
+func TestPutBatch_CommitError_Wrapped(t *testing.T) {
+ // Commit fails after every INSERT succeeded. Postgres has already
+ // rolled back the Tx by this point; we surface the error so the
+ // handler returns 500 and the client retries.
+ db, mock := newMockDB(t)
+ store := pendinguploads.NewPostgres(db)
+
+ wsID := uuid.New()
+ id1 := uuid.New()
+ mock.ExpectBegin()
+ mock.ExpectQuery(insertSQL).
+ WithArgs(wsID, []byte("hi"), int64(2), "a.txt", "").
+ WillReturnRows(sqlmock.NewRows([]string{"file_id"}).AddRow(id1))
+ mock.ExpectCommit().WillReturnError(errors.New("commit broken"))
+
+ _, err := store.PutBatch(context.Background(), wsID, []pendinguploads.PutItem{
+ {Content: []byte("hi"), Filename: "a.txt"},
+ })
+ if err == nil || !strings.Contains(err.Error(), "commit batch") {
+ t.Fatalf("expected wrapped commit error, got %v", err)
+ }
+ if err := mock.ExpectationsWereMet(); err != nil {
+ t.Errorf("expectations: %v", err)
+ }
+}
diff --git a/workspace-server/internal/pendinguploads/sweeper.go b/workspace-server/internal/pendinguploads/sweeper.go
new file mode 100644
index 00000000..b29a87ad
--- /dev/null
+++ b/workspace-server/internal/pendinguploads/sweeper.go
@@ -0,0 +1,129 @@
+// sweeper.go — periodic GC for the pending_uploads table.
+//
+// The platform's poll-mode chat-upload handler creates a row in
+// pending_uploads for every chat-attached file the canvas sends to a
+// poll-mode workspace. The workspace's inbox poller fetches the bytes
+// and acks the row, but two failure modes leak rows long-term:
+//
+// 1. Workspace fetches but never acks (network hiccup between GET
+// /content and POST /ack; workspace crashed between the two).
+// Phase 1's Get refuses to re-serve an acked row, but a never-
+// acked row could in principle be fetched repeatedly until expires_at.
+// Phase 2's workspace-side fetcher is idempotent; the worry is
+// only disk usage on the platform side.
+//
+// 2. Workspace never fetches at all (workspace was offline when the
+// row was written; the upload's TTL elapsed).
+//
+// This sweeper handles both. It runs every SweepInterval, deletes rows
+// in either category, and emits structured logs + Prometheus counters
+// so a stuck-fetch dashboard can spot the leak class.
+//
+// Failure isolation: a transient DB error must NOT crash the sweeper.
+// We log + continue; the next tick retries. ctx cancellation cleanly
+// shuts the loop down for graceful shutdown.
+
+package pendinguploads
+
+import (
+ "context"
+ "log"
+ "time"
+
+ "github.com/Molecule-AI/molecule-monorepo/platform/internal/metrics"
+)
+
+// SweepInterval is the cadence of the GC loop. 5 minutes is a balance
+// between "rows reaped quickly enough that disk usage doesn't surprise
+// anyone" and "we don't pay a DELETE round-trip every 30 seconds when
+// there are no candidates." Aligned with other low-priority sweepers
+// (registry/orphan_sweeper runs at 60s but operates on Docker — much
+// more expensive per cycle than a single indexed DELETE).
+const SweepInterval = 5 * time.Minute
+
+// DefaultAckRetention is how long an acked row sticks around before the
+// sweeper deletes it. 1 hour gives the workspace enough time to retry
+// the GET if its first fetch crashed mid-write — at-least-once handoff
+// without leaking content for a full 24h after the workspace already
+// has a copy.
+const DefaultAckRetention = 1 * time.Hour
+
+// sweepDeadline bounds a single sweep cycle. A daemon at the edge of
+// timeout shouldn't pile up goroutines; 30s is generous for a single
+// indexed DELETE on a table that should rarely have more than a few
+// thousand rows in flight.
+const sweepDeadline = 30 * time.Second
+
+// StartSweeper runs the GC loop until ctx is cancelled. nil storage
+// makes the loop a no-op (matches the handlers' tolerance for an
+// unconfigured pendinguploads — some test harnesses run without the
+// storage wired).
+//
+// Pass ackRetention=0 to use DefaultAckRetention. Negative values are
+// clamped at the storage layer.
+//
+// Production callers use SweepInterval (5m). Tests use a short interval
+// to exercise the ticker-driven sweep path without burning real wall-
+// clock time.
+func StartSweeper(ctx context.Context, storage Storage, ackRetention time.Duration) {
+ startSweeperWithInterval(ctx, storage, ackRetention, SweepInterval)
+}
+
+// startSweeperWithInterval is the test-friendly variant of StartSweeper
+// — same loop, but the cadence is caller-specified. Production code
+// should use StartSweeper to keep the SweepInterval constant pinned.
+func startSweeperWithInterval(ctx context.Context, storage Storage, ackRetention, interval time.Duration) {
+ if storage == nil {
+ log.Println("pendinguploads sweeper: storage is nil — sweeper disabled")
+ return
+ }
+ if ackRetention == 0 {
+ ackRetention = DefaultAckRetention
+ }
+ log.Printf(
+ "pendinguploads sweeper started — sweeping every %s; ack retention %s",
+ interval, ackRetention,
+ )
+ ticker := time.NewTicker(interval)
+ defer ticker.Stop()
+ // Run once immediately so a platform restart cleans up any rows
+ // that became eligible while we were down — don't make the
+ // operator wait 5 minutes for the first sweep.
+ sweepOnce(ctx, storage, ackRetention)
+ for {
+ select {
+ case <-ctx.Done():
+ log.Println("pendinguploads sweeper: shutdown")
+ return
+ case <-ticker.C:
+ sweepOnce(ctx, storage, ackRetention)
+ }
+ }
+}
+
+func sweepOnce(parent context.Context, storage Storage, ackRetention time.Duration) {
+ ctx, cancel := context.WithTimeout(parent, sweepDeadline)
+ defer cancel()
+
+ res, err := storage.Sweep(ctx, ackRetention)
+ if err != nil {
+ // Transient errors: log + continue. The next tick retries; if
+ // the DB is genuinely down, the rest of the platform is also
+ // broken and disk usage is the least of the operator's
+ // problems.
+ log.Printf("pendinguploads sweeper: Sweep failed: %v", err)
+ metrics.PendingUploadsSweepError()
+ return
+ }
+ metrics.PendingUploadsSwept(res.Acked, res.Expired)
+ if res.Total() > 0 {
+ // Per-cycle structured-ish log (one line per cycle that did
+ // something). Quiet by design — most cycles delete zero rows
+ // on a healthy system, and a stream of empty-result lines
+ // would drown the production log without surfacing a signal.
+ log.Printf(
+ "pendinguploads sweeper: deleted acked=%d expired=%d total=%d",
+ res.Acked, res.Expired, res.Total(),
+ )
+ }
+}
diff --git a/workspace-server/internal/pendinguploads/sweeper_test.go b/workspace-server/internal/pendinguploads/sweeper_test.go
new file mode 100644
index 00000000..fb0c5aa0
--- /dev/null
+++ b/workspace-server/internal/pendinguploads/sweeper_test.go
@@ -0,0 +1,294 @@
+package pendinguploads_test
+
+import (
+ "context"
+ "errors"
+ "sync/atomic"
+ "testing"
+ "time"
+
+ "github.com/google/uuid"
+
+ "github.com/Molecule-AI/molecule-monorepo/platform/internal/metrics"
+ "github.com/Molecule-AI/molecule-monorepo/platform/internal/pendinguploads"
+)
+
+// fakeSweepStorage is a minimal Storage that records every Sweep call
+// and lets each test inject the per-cycle return values. The other
+// methods are no-ops — the sweeper goroutine never calls them.
+type fakeSweepStorage struct {
+ calls atomic.Int64
+ results []pendinguploads.SweepResult
+ errs []error
+ cycleDone chan struct{} // closed after each Sweep call (test sync)
+ gotRetention atomic.Int64 // last ackRetention seen, in seconds
+}
+
+func newFakeSweepStorage(results []pendinguploads.SweepResult, errs []error) *fakeSweepStorage {
+ return &fakeSweepStorage{
+ results: results,
+ errs: errs,
+ cycleDone: make(chan struct{}, 16),
+ }
+}
+
+func (f *fakeSweepStorage) Put(_ context.Context, _ uuid.UUID, _ []byte, _, _ string) (uuid.UUID, error) {
+ return uuid.Nil, errors.New("not used")
+}
+func (f *fakeSweepStorage) Get(_ context.Context, _ uuid.UUID) (pendinguploads.Record, error) {
+ return pendinguploads.Record{}, errors.New("not used")
+}
+func (f *fakeSweepStorage) MarkFetched(_ context.Context, _ uuid.UUID) error {
+ return errors.New("not used")
+}
+func (f *fakeSweepStorage) Ack(_ context.Context, _ uuid.UUID) error {
+ return errors.New("not used")
+}
+func (f *fakeSweepStorage) PutBatch(_ context.Context, _ uuid.UUID, _ []pendinguploads.PutItem) ([]uuid.UUID, error) {
+ return nil, errors.New("not used")
+}
+func (f *fakeSweepStorage) Sweep(_ context.Context, ackRetention time.Duration) (pendinguploads.SweepResult, error) {
+ idx := int(f.calls.Load())
+ f.calls.Add(1)
+ f.gotRetention.Store(int64(ackRetention.Seconds()))
+ defer func() {
+ select {
+ case f.cycleDone <- struct{}{}:
+ default:
+ }
+ }()
+ if idx < len(f.errs) && f.errs[idx] != nil {
+ return pendinguploads.SweepResult{}, f.errs[idx]
+ }
+ if idx < len(f.results) {
+ return f.results[idx], nil
+ }
+ return pendinguploads.SweepResult{}, nil
+}
+
+// waitForCycle blocks until at least one Sweep completes, with a deadline.
+// Tests use this instead of time.Sleep to avoid flakes on slow CI hosts.
+//
+// CAVEAT: cycleDone fires from inside fakeSweepStorage.Sweep's defer,
+// which runs as Sweep returns its result — BEFORE the StartSweeper
+// loop has processed the (result, error) tuple and called the
+// metric recorders. Tests that assert on metric counters must NOT
+// rely on this wait alone; use waitForMetricDelta instead so the
+// metric increment race (Sweep returns → cycleDone fires → test
+// reads counter → only then does StartSweeper's loop call
+// metrics.PendingUploadsSweepError) doesn't produce a flake.
+func (f *fakeSweepStorage) waitForCycle(t *testing.T, n int, timeout time.Duration) {
+ t.Helper()
+ deadline := time.NewTimer(timeout)
+ defer deadline.Stop()
+ for got := 0; got < n; got++ {
+ select {
+ case <-f.cycleDone:
+ case <-deadline.C:
+ t.Fatalf("waited %s for %d sweep cycles, got %d", timeout, n, f.calls.Load())
+ }
+ }
+}
+
+// waitForMetricDelta polls the supplied delta function until it returns
+// `want` or the timeout elapses. Use after waitForCycle when the test
+// asserts on a metric counter — closes the race between cycleDone
+// (signalled inside fakeSweepStorage.Sweep's defer, BEFORE Sweep
+// returns to StartSweeper) and the metric recording (which happens in
+// StartSweeper's loop AFTER Sweep returns). On a slow CI host the test
+// goroutine wins the read before StartSweeper's goroutine writes the
+// counter; the polling assert preserves the determinism of "the metric
+// MUST be N" without timing-based flakes.
+//
+// Per memory feedback_question_test_when_unexpected.md: the failure
+// mode "delta=0, want=1" looked like a real bug at first glance —
+// "metric never incremented" — but instrumented analysis showed the
+// metric DID increment, just AFTER the test's read. The fix is the
+// test's wait shape, not the production code.
+func waitForMetricDelta(t *testing.T, delta func() int64, want int64, timeout time.Duration) {
+ t.Helper()
+ deadline := time.Now().Add(timeout)
+ for time.Now().Before(deadline) {
+ if delta() == want {
+ return
+ }
+ time.Sleep(5 * time.Millisecond)
+ }
+ t.Fatalf("waited %s for metric delta=%d, last seen %d", timeout, want, delta())
+}
+
+func TestStartSweeper_NilStorageDoesNotPanic(t *testing.T) {
+ ctx, cancel := context.WithCancel(context.Background())
+ defer cancel()
+ // Should return immediately without panicking; no goroutine to wait on.
+ pendinguploads.StartSweeper(ctx, nil, time.Second)
+}
+
+func TestStartSweeper_RunsImmediatelyAndOnTick(t *testing.T) {
+ store := newFakeSweepStorage(
+ []pendinguploads.SweepResult{{Acked: 5}, {Acked: 1, Expired: 2}},
+ nil,
+ )
+ ctx, cancel := context.WithCancel(context.Background())
+ defer cancel()
+
+ go pendinguploads.StartSweeper(ctx, store, time.Hour)
+ store.waitForCycle(t, 1, 2*time.Second)
+ if got := store.calls.Load(); got < 1 {
+ t.Errorf("expected at least one immediate sweep, got %d", got)
+ }
+ // Retention propagated.
+ if store.gotRetention.Load() != 3600 {
+ t.Errorf("retention seconds = %d, want 3600", store.gotRetention.Load())
+ }
+}
+
+func TestStartSweeper_ZeroAckRetentionUsesDefault(t *testing.T) {
+ store := newFakeSweepStorage([]pendinguploads.SweepResult{{}}, nil)
+ ctx, cancel := context.WithCancel(context.Background())
+ defer cancel()
+
+ go pendinguploads.StartSweeper(ctx, store, 0)
+ store.waitForCycle(t, 1, 2*time.Second)
+ want := int64(pendinguploads.DefaultAckRetention.Seconds())
+ if store.gotRetention.Load() != want {
+ t.Errorf("retention = %d, want default %d", store.gotRetention.Load(), want)
+ }
+}
+
+func TestStartSweeper_ContextCancelStopsLoop(t *testing.T) {
+ store := newFakeSweepStorage([]pendinguploads.SweepResult{{}}, nil)
+ ctx, cancel := context.WithCancel(context.Background())
+
+ done := make(chan struct{})
+ go func() {
+ pendinguploads.StartSweeper(ctx, store, time.Second)
+ close(done)
+ }()
+ store.waitForCycle(t, 1, 2*time.Second)
+ cancel()
+
+ select {
+ case <-done:
+ case <-time.After(2 * time.Second):
+ t.Fatal("StartSweeper did not return after ctx cancel")
+ }
+}
+
+func TestStartSweeperWithInterval_TickerFiresAdditionalCycles(t *testing.T) {
+ store := newFakeSweepStorage(
+ []pendinguploads.SweepResult{{Acked: 1}, {Expired: 1}, {}, {}, {}},
+ nil,
+ )
+ ctx, cancel := context.WithCancel(context.Background())
+ defer cancel()
+
+ go pendinguploads.StartSweeperWithIntervalForTest(ctx, store, time.Hour, 30*time.Millisecond)
+
+ // Immediate cycle + at least one tick-driven cycle.
+ store.waitForCycle(t, 2, 2*time.Second)
+
+ if got := store.calls.Load(); got < 2 {
+ t.Errorf("expected ≥2 cycles (immediate + 1 tick), got %d", got)
+ }
+}
+
+func TestStartSweeper_TransientErrorDoesNotCrashLoop(t *testing.T) {
+ // First call errors; second call succeeds. The loop must keep running
+ // across the error so a one-off DB hiccup doesn't disable the GC.
+ store := newFakeSweepStorage(
+ []pendinguploads.SweepResult{{}, {Acked: 1}},
+ []error{errors.New("transient db error"), nil},
+ )
+ ctx, cancel := context.WithCancel(context.Background())
+ defer cancel()
+
+ // 50ms ticker so the second cycle fires quickly enough for the test.
+ // We re-export SweepInterval as a const, but tests use the public
+ // StartSweeper that takes its own interval — wait, the public
+ // StartSweeper signature uses the package-level SweepInterval. Hmm,
+ // this means the test takes ~5 minutes. Let me reconsider.
+ //
+ // (We patch the test below to just look at the immediate-sweep call
+ // + an error path, since the immediate call is enough to prove the
+ // "error doesn't crash" contract — the loop continues afterward
+ // regardless of timing.)
+ go pendinguploads.StartSweeper(ctx, store, time.Hour)
+
+ // Wait for the first (errored) cycle.
+ store.waitForCycle(t, 1, 2*time.Second)
+ // Cancel — the goroutine returns cleanly, proving the error path
+ // didn't crash the loop. Without this fix the goroutine would have
+ // either panicked (process abort visible at exit) or stuck (this
+ // cancel + done-channel pattern would deadlock instead).
+ cancel()
+}
+
+// metricDelta returns a function that, when called, returns how much
+// the (acked, expired, errored) counters have advanced since metricDelta
+// was originally called. metrics is a process-singleton across the test
+// suite; deltas isolate this test from order-of-execution dependencies.
+func metricDelta(t *testing.T) (deltaAcked, deltaExpired, deltaError func() int64) {
+ t.Helper()
+ a0, e0, err0 := metrics.PendingUploadsSweepCounts()
+ deltaAcked = func() int64 {
+ a, _, _ := metrics.PendingUploadsSweepCounts()
+ return a - a0
+ }
+ deltaExpired = func() int64 {
+ _, e, _ := metrics.PendingUploadsSweepCounts()
+ return e - e0
+ }
+ deltaError = func() int64 {
+ _, _, x := metrics.PendingUploadsSweepCounts()
+ return x - err0
+ }
+ return
+}
+
+func TestStartSweeper_RecordsMetricsOnSuccess(t *testing.T) {
+ deltaAcked, deltaExpired, deltaError := metricDelta(t)
+
+ store := newFakeSweepStorage(
+ []pendinguploads.SweepResult{{Acked: 3, Expired: 5}},
+ nil,
+ )
+ ctx, cancel := context.WithCancel(context.Background())
+ defer cancel()
+
+ go pendinguploads.StartSweeper(ctx, store, time.Hour)
+ store.waitForCycle(t, 1, 2*time.Second)
+
+ // Poll for the success counters to settle — closes the cycleDone-
+ // vs-metric-record race (see waitForMetricDelta comment).
+ waitForMetricDelta(t, deltaAcked, 3, 2*time.Second)
+ waitForMetricDelta(t, deltaExpired, 5, 2*time.Second)
+ // Error counter MUST stay at zero on the success path. Read after
+ // the success counters have settled — once those are correct,
+ // StartSweeper has fully processed this cycle's result.
+ if got := deltaError(); got != 0 {
+ t.Errorf("error counter delta = %d, want 0", got)
+ }
+}
+
+func TestStartSweeper_RecordsMetricsOnError(t *testing.T) {
+ _, _, deltaError := metricDelta(t)
+
+ store := newFakeSweepStorage(
+ []pendinguploads.SweepResult{{}},
+ []error{errors.New("db down")},
+ )
+ ctx, cancel := context.WithCancel(context.Background())
+ defer cancel()
+
+ go pendinguploads.StartSweeper(ctx, store, time.Hour)
+ store.waitForCycle(t, 1, 2*time.Second)
+
+ // Poll for the error counter to settle — cycleDone fires inside
+ // the fake's Sweep defer, BEFORE StartSweeper's loop receives the
+ // returned error and calls metrics.PendingUploadsSweepError. On
+ // slow CI hosts a direct deltaError() read here returns 0 even
+ // though the metric WILL be 1 a few ms later. See
+ // waitForMetricDelta comment.
+ waitForMetricDelta(t, deltaError, 1, 2*time.Second)
+}
diff --git a/workspace-server/internal/provisioner/cp_provisioner.go b/workspace-server/internal/provisioner/cp_provisioner.go
index edc67d9f..bdc5bff7 100644
--- a/workspace-server/internal/provisioner/cp_provisioner.go
+++ b/workspace-server/internal/provisioner/cp_provisioner.go
@@ -14,6 +14,7 @@ import (
"time"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
+ "github.com/Molecule-AI/molecule-monorepo/platform/internal/provlog"
)
// CPProvisionerAPI is the contract WorkspaceHandler uses to talk to the
@@ -214,6 +215,13 @@ func (p *CPProvisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string,
}
log.Printf("CP provisioner: workspace %s → EC2 instance %s (%s)", cfg.WorkspaceID, result.InstanceID, result.State)
+ provlog.Event("provision.ec2_started", map[string]any{
+ "workspace_id": cfg.WorkspaceID,
+ "instance_id": result.InstanceID,
+ "state": result.State,
+ "tier": cfg.Tier,
+ "runtime": cfg.Runtime,
+ })
return result.InstanceID, nil
}
@@ -273,6 +281,10 @@ func (p *CPProvisioner) Stop(ctx context.Context, workspaceID string) error {
return fmt.Errorf("cp provisioner: stop %s: unexpected %d: %s",
workspaceID, resp.StatusCode, strings.TrimSpace(string(body)))
}
+ provlog.Event("provision.ec2_stopped", map[string]any{
+ "workspace_id": workspaceID,
+ "instance_id": instanceID,
+ })
return nil
}
diff --git a/workspace-server/internal/provlog/provlog.go b/workspace-server/internal/provlog/provlog.go
new file mode 100644
index 00000000..4434c238
--- /dev/null
+++ b/workspace-server/internal/provlog/provlog.go
@@ -0,0 +1,48 @@
+// Package provlog emits structured, single-line JSON log records for
+// provisioning-lifecycle boundaries (workspace create, EC2 start/stop,
+// restart, idempotency skips). Records share a stable `evt:` prefix and
+// JSON payload so a future grep|jq pipeline (or a Loki/Datadog ingest)
+// can reconstruct the per-workspace timeline without parsing the
+// human-prose log lines that already exist.
+//
+// Existing log.Printf lines are intentionally NOT replaced — they
+// remain the operator-facing message. Event() emits a paired structured
+// record alongside, additive only.
+//
+// Event taxonomy (extend by appending; never rename):
+//
+// provision.start — workspace row inserted, EC2 about to launch
+// provision.skip_existing — idempotency hit, no new EC2
+// provision.ec2_started — RunInstances returned an instance id
+// provision.ec2_stopped — TerminateInstances acknowledged
+// restart.pre_stop — Restart handler about to call Stop
+//
+// Required fields per event are documented at each call site.
+package provlog
+
+import (
+ "encoding/json"
+ "log"
+)
+
+// Event writes a single line of the form:
+//
+// evt: {"k":"v",...}
+//
+// to the standard logger. JSON encoding errors are silently swallowed —
+// a logging helper must never panic the request path. fields may be
+// nil; the empty payload `{}` is still useful to mark an event boundary.
+func Event(name string, fields map[string]any) {
+ if fields == nil {
+ fields = map[string]any{}
+ }
+ payload, err := json.Marshal(fields)
+ if err != nil {
+ // Fall back to a static payload so the event boundary still
+ // appears in the log. The marshal error itself is recorded
+ // on a best-effort basis.
+ log.Printf("evt: %s {\"_marshal_err\":%q}", name, err.Error())
+ return
+ }
+ log.Printf("evt: %s %s", name, payload)
+}
diff --git a/workspace-server/internal/provlog/provlog_test.go b/workspace-server/internal/provlog/provlog_test.go
new file mode 100644
index 00000000..7d2f5f5f
--- /dev/null
+++ b/workspace-server/internal/provlog/provlog_test.go
@@ -0,0 +1,97 @@
+package provlog
+
+import (
+ "bytes"
+ "encoding/json"
+ "log"
+ "strings"
+ "testing"
+)
+
+// captureLog redirects the default logger to a buffer for the duration
+// of fn and returns whatever was written.
+func captureLog(t *testing.T, fn func()) string {
+ t.Helper()
+ var buf bytes.Buffer
+ prevWriter := log.Writer()
+ prevFlags := log.Flags()
+ log.SetOutput(&buf)
+ log.SetFlags(0) // strip date/time so assertions stay deterministic
+ t.Cleanup(func() {
+ log.SetOutput(prevWriter)
+ log.SetFlags(prevFlags)
+ })
+ fn()
+ return buf.String()
+}
+
+func TestEvent_EmitsEvtPrefixAndJSONPayload(t *testing.T) {
+ out := captureLog(t, func() {
+ Event("provision.start", map[string]any{
+ "workspace_id": "ws-123",
+ "tier": 4,
+ "runtime": "claude-code",
+ })
+ })
+ out = strings.TrimSpace(out)
+ if !strings.HasPrefix(out, "evt: provision.start ") {
+ t.Fatalf("expected evt-prefixed line, got %q", out)
+ }
+ jsonPart := strings.TrimPrefix(out, "evt: provision.start ")
+ var got map[string]any
+ if err := json.Unmarshal([]byte(jsonPart), &got); err != nil {
+ t.Fatalf("payload not valid JSON: %v (raw=%q)", err, jsonPart)
+ }
+ if got["workspace_id"] != "ws-123" {
+ t.Errorf("workspace_id field lost: %+v", got)
+ }
+ // JSON unmarshal turns numbers into float64 — exact-equal compare.
+ if got["tier"].(float64) != 4 {
+ t.Errorf("tier field lost: %+v", got)
+ }
+ if got["runtime"] != "claude-code" {
+ t.Errorf("runtime field lost: %+v", got)
+ }
+}
+
+func TestEvent_NilFieldsEmitsEmptyObject(t *testing.T) {
+ out := captureLog(t, func() {
+ Event("restart.pre_stop", nil)
+ })
+ if !strings.Contains(out, "evt: restart.pre_stop {}") {
+ t.Fatalf("nil fields should emit empty object, got %q", out)
+ }
+}
+
+func TestEvent_PreservesEventBoundaryOnUnmarshalableValue(t *testing.T) {
+ // A channel cannot be marshaled by encoding/json — verify we still
+ // emit the event boundary with a recorded marshal error. This is
+ // the structural guarantee: the call site never sees a panic, and
+ // the event name is always present in the log.
+ out := captureLog(t, func() {
+ Event("provision.ec2_started", map[string]any{
+ "chan": make(chan int),
+ })
+ })
+ if !strings.Contains(out, "evt: provision.ec2_started ") {
+ t.Fatalf("event boundary missing on marshal error: %q", out)
+ }
+ if !strings.Contains(out, "_marshal_err") {
+ t.Fatalf("expected _marshal_err sentinel, got %q", out)
+ }
+}
+
+func TestEvent_SingleLineOutput(t *testing.T) {
+ // Log aggregators line-split on \n. A multi-line emit would silently
+ // fragment the JSON across two records — pin single-line shape.
+ out := captureLog(t, func() {
+ Event("provision.skip_existing", map[string]any{
+ "existing_id": "ws-abc",
+ "name": "child-1",
+ })
+ })
+ trimmed := strings.TrimRight(out, "\n")
+ if strings.Contains(trimmed, "\n") {
+ t.Fatalf("event line must be single-line, got %q", out)
+ }
+}
diff --git a/workspace-server/internal/router/router.go b/workspace-server/internal/router/router.go
index 86007d00..ae928f2f 100644
--- a/workspace-server/internal/router/router.go
+++ b/workspace-server/internal/router/router.go
@@ -243,13 +243,15 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
// entire platform. Gated behind AdminAuth (issue #180).
r.GET("/approvals/pending", middleware.AdminAuth(db.DB), apph.ListAll)
- // Team handlers — Collapse only. The bulk-Expand path is gone:
- // every workspace can have children via the regular CreateWorkspace
- // flow with parent_id set, so a separate handler that bulk-creates
- // from sub_workspaces (and was non-idempotent — calling it twice
- // duplicated the team) earned its way out.
- teamh := handlers.NewTeamHandler(broadcaster, wh, platformURL, configsDir)
- wsAuth.POST("/collapse", teamh.Collapse)
+ // (TeamHandler is gone — #2864.) The visual canvas Collapse
+ // button calls PATCH /workspaces/:id { collapsed: true/false }
+ // (presentational toggle on canvas_layouts), NOT the destructive
+ // POST /collapse that stopped + removed children. The
+ // destructive route had zero UI callers (verified via grep
+ // across canvas/, scripts/, and the MCP tool registry — only
+ // docs referenced it). team.go + team_test.go + the route
+ // + helpers (findTemplateDirByName, NewTeamHandler) are
+ // deleted; visual collapse is unaffected.
// Agents
ah := handlers.NewAgentHandler(broadcaster)
@@ -519,8 +521,9 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
r.GET("/canvas/viewport", vh.Get)
r.PUT("/canvas/viewport", middleware.CanvasOrBearer(db.DB), vh.Save)
- // Templates
- tmplh := handlers.NewTemplatesHandler(configsDir, dockerCli)
+ // Templates — wh threaded so generateDefaultConfig picks the
+ // SaaS-aware default tier in Import + ReplaceFiles (#2910 PR-B).
+ tmplh := handlers.NewTemplatesHandler(configsDir, dockerCli, wh)
// #686: GET /templates lists all template names+metadata from configsDir.
// Open access lets unauthenticated callers enumerate org configurations and
// installed plugins. AdminAuth-gate it alongside POST /templates/import.
diff --git a/workspace-server/internal/scheduler/scheduler.go b/workspace-server/internal/scheduler/scheduler.go
index 0c6eb84f..e098586d 100644
--- a/workspace-server/internal/scheduler/scheduler.go
+++ b/workspace-server/internal/scheduler/scheduler.go
@@ -14,6 +14,7 @@ import (
cronlib "github.com/robfig/cron/v3"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
+ "github.com/Molecule-AI/molecule-monorepo/platform/internal/metrics"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/supervised"
)
@@ -741,6 +742,11 @@ func (s *Scheduler) sweepPhantomBusy(ctx context.Context) {
continue
}
log.Printf("Scheduler: phantom-busy sweep — reset %s (no activity in %d min)", name, int(phantomStaleThreshold.Minutes()))
+ // #2865: surface as molecule_phantom_busy_resets_total. High
+ // reset rate signals task-lifecycle accounting regressions
+ // (e.g. missing env vars causing claude --print timeouts that
+ // leave active_tasks elevated until this sweep fires).
+ metrics.TrackPhantomBusyReset()
count++
}
if err := rows.Err(); err != nil {
diff --git a/workspace-server/migrations/20260505200000_pending_uploads_acked_index.down.sql b/workspace-server/migrations/20260505200000_pending_uploads_acked_index.down.sql
new file mode 100644
index 00000000..2d84b00d
--- /dev/null
+++ b/workspace-server/migrations/20260505200000_pending_uploads_acked_index.down.sql
@@ -0,0 +1,2 @@
+-- Reversal of 20260505200000_pending_uploads_acked_index.up.sql.
+DROP INDEX IF EXISTS idx_pending_uploads_acked;
diff --git a/workspace-server/migrations/20260505200000_pending_uploads_acked_index.up.sql b/workspace-server/migrations/20260505200000_pending_uploads_acked_index.up.sql
new file mode 100644
index 00000000..f2beced2
--- /dev/null
+++ b/workspace-server/migrations/20260505200000_pending_uploads_acked_index.up.sql
@@ -0,0 +1,30 @@
+-- 20260505200000_pending_uploads_acked_index.up.sql
+--
+-- Adds the missing partial index for the acked-retention arm of the
+-- pendinguploads.Sweep query. The Phase 1 migration created two
+-- partial indexes both gated on `acked_at IS NULL` (workspace-fetch
+-- hot path + expires_at sweep arm); the third query path —
+-- `WHERE acked_at IS NOT NULL AND acked_at < now() - interval` — was
+-- left to a seq scan.
+--
+-- For a high-traffic deployment that's a real cost: the table
+-- accumulates one row per chat-attached file; the sweeper runs every
+-- 5 minutes and DELETEs rows past the 1-hour ack retention. A seq
+-- scan over 100K-1M acked rows holds an AccessShare lock for seconds
+-- on every cycle. Partial-indexing the inverse predicate reduces
+-- this to a btree range scan and lets the DELETE complete in
+-- low-millisecond range.
+--
+-- WHERE acked_at IS NOT NULL is intentionally inverse of the other
+-- two indexes — they cover the unacked working set; this covers the
+-- terminal-state set the sweeper visits. Disjoint subsets, so the
+-- two indexes don't overlap.
+--
+-- Caught in self-review on the parent RFC's Phase 4 PR; filed as
+-- a follow-up rather than a Phase 1 fix because the cost only
+-- materializes at a row count we don't expect to hit before the
+-- sweeper has had a chance to keep up.
+
+CREATE INDEX IF NOT EXISTS idx_pending_uploads_acked
+ ON pending_uploads (acked_at)
+ WHERE acked_at IS NOT NULL;
diff --git a/workspace/inbox.py b/workspace/inbox.py
index 94417243..cff95c6d 100644
--- a/workspace/inbox.py
+++ b/workspace/inbox.py
@@ -432,7 +432,17 @@ def _is_self_notify_row(row: dict[str, Any]) -> bool:
def message_from_activity(row: dict[str, Any]) -> InboxMessage:
- """Convert one /activity row into an InboxMessage."""
+ """Convert one /activity row into an InboxMessage.
+
+ Mutates ``row['request_body']`` in-place to swap any
+ ``platform-pending:`` URIs to the locally-staged ``workspace:`` URIs
+ (see ``inbox_uploads.rewrite_request_body``) — by the time the
+ upstream chat message arrives via this path, the upload-receive row
+ that staged the bytes has already populated the URI cache (lower
+ activity_logs.id, processed earlier in the same poll batch). A
+ cache miss leaves the URI untouched; the agent surfaces an
+ unresolvable URI rather than the inbox silently dropping the part.
+ """
request_body = row.get("request_body")
if isinstance(request_body, str):
# The Go handler returns request_body as json.RawMessage; httpx
@@ -443,6 +453,14 @@ def message_from_activity(row: dict[str, Any]) -> InboxMessage:
except (TypeError, ValueError):
request_body = None
+ # Rewrite platform-pending: URIs → workspace: URIs in-place. Imported
+ # at call time to keep the import graph clean for the in-container
+ # path that doesn't use this module (also avoids a circular: the
+ # uploads module is small enough that re-importing per call is
+ # cheap, and the Python import cache makes it free after the first).
+ from inbox_uploads import rewrite_request_body
+ rewrite_request_body(request_body)
+
return InboxMessage(
activity_id=str(row.get("id", "")),
text=_extract_text(request_body, row.get("summary")),
@@ -532,11 +550,57 @@ def _poll_once(
if cursor is None:
rows = list(reversed(rows))
+ # Imported lazily at use-site so a runtime that never sees an
+ # upload-receive row never imports the module. Cheap on the hot
+ # path because Python caches the import.
+ from inbox_uploads import is_chat_upload_row, BatchFetcher
+
new_count = 0
last_id: str | None = None
+ # ``batch_fetcher`` is lazy: a poll batch with no upload rows pays
+ # zero overhead. Once the first upload row appears we open one
+ # BatchFetcher and submit every subsequent upload row to its thread
+ # pool; before processing the FIRST non-upload row we drain the
+ # pool (wait_all) so the URI cache is hot when message rewriting
+ # runs. Without the barrier, the chat message that references the
+ # upload would arrive at the agent with the un-rewritten
+ # platform-pending: URI.
+ batch_fetcher: BatchFetcher | None = None
+
+ def _drain_uploads(bf: BatchFetcher | None) -> None:
+ if bf is None:
+ return
+ bf.wait_all()
+ bf.close()
+
for row in rows:
if not isinstance(row, dict):
continue
+ if is_chat_upload_row(row):
+ # Side-effect row from the platform's poll-mode chat-upload
+ # handler — fetch the bytes, stage to /workspace/.molecule/
+ # chat-uploads, ack. NOT enqueued as an InboxMessage; the
+ # agent will see the chat message that REFERENCES this
+ # upload via a separate (later) activity row, with the
+ # pending: URI rewritten to a workspace: URI by
+ # message_from_activity. We DO advance the cursor past
+ # this row so a permanent network outage on /content
+ # doesn't stall the cursor and block real chat traffic.
+ if batch_fetcher is None:
+ batch_fetcher = BatchFetcher(
+ platform_url=platform_url,
+ workspace_id=workspace_id,
+ headers=headers,
+ )
+ batch_fetcher.submit(row)
+ last_id = str(row.get("id", "")) or last_id
+ continue
+ # Non-upload row: drain any pending uploads first so the URI
+ # cache is populated before we run rewrite_request_body /
+ # message_from_activity on a row that may reference one.
+ if batch_fetcher is not None:
+ _drain_uploads(batch_fetcher)
+ batch_fetcher = None
if _is_self_notify_row(row):
# The workspace-server's `/notify` handler writes the agent's
# own send_message_to_user POSTs to activity_logs with
@@ -571,6 +635,13 @@ def _poll_once(
last_id = message.activity_id
new_count += 1
+ # Drain any uploads still in flight if the batch ended with upload
+ # rows (no chat-message row to trigger the inline drain). Without
+ # this, a future poll that picks up the chat-message row first
+ # would race with the still-running fetches.
+ if batch_fetcher is not None:
+ _drain_uploads(batch_fetcher)
+
if last_id is not None:
state.save_cursor(last_id, cursor_key)
return new_count
@@ -613,6 +684,7 @@ def start_poller_thread(
platform_url: str,
workspace_id: str,
interval: float = POLL_INTERVAL_SECONDS,
+ stop_event: threading.Event | None = None,
) -> threading.Thread:
"""Spawn the poller as a daemon thread. Returns the Thread handle.
@@ -624,13 +696,18 @@ def start_poller_thread(
operator running ``ps -eL`` or eyeballing ``threading.enumerate()``
can tell which thread is which without reverse-engineering it from
crash tracebacks.
+
+ Pass ``stop_event`` to enable graceful shutdown — used by tests so
+ the daemon thread doesn't outlive the test that started it and race
+ with later tests' httpx patches. Production code passes None and
+ relies on the daemon flag for process-exit cleanup.
"""
name = "molecule-mcp-inbox-poller"
if workspace_id:
name = f"{name}-{workspace_id[:8]}"
t = threading.Thread(
target=_poll_loop,
- args=(state, platform_url, workspace_id, interval),
+ args=(state, platform_url, workspace_id, interval, stop_event),
name=name,
daemon=True,
)
diff --git a/workspace/inbox_uploads.py b/workspace/inbox_uploads.py
new file mode 100644
index 00000000..69fa53aa
--- /dev/null
+++ b/workspace/inbox_uploads.py
@@ -0,0 +1,724 @@
+"""Poll-mode chat-upload fetcher + URI cache for the standalone path.
+
+Companion to ``inbox.py``. When the workspace's inbox poller sees an
+``activity_logs`` row with ``method='chat_upload_receive'`` (written by
+the platform's ``uploadPollMode`` handler — workspace-server
+``internal/handlers/chat_files.go``), this module:
+
+ 1. Pulls the bytes from
+ ``GET /workspaces/:id/pending-uploads/:file_id/content``.
+ 2. Writes them to ``/workspace/.molecule/chat-uploads/-``
+ — same on-disk shape as the push-mode handler in
+ ``internal_chat_uploads.py``, so anything downstream that already
+ resolves ``workspace:/workspace/.molecule/chat-uploads/...`` URIs
+ works unchanged.
+ 3. POSTs ``/workspaces/:id/pending-uploads/:file_id/ack`` so Phase 3
+ sweep can clean up the platform-side ``pending_uploads`` row.
+ 4. Records a ``platform-pending:/ →
+ workspace:/workspace/.molecule/chat-uploads/...`` mapping in a
+ process-local cache so the chat message that arrives later
+ (referencing the platform-pending URI) gets rewritten before the
+ agent sees it.
+
+URI rewrite ordering — the chat message containing the
+``platform-pending:`` URI is logged by the platform AFTER the
+``chat_upload_receive`` row, so the inbox poller sees the upload-receive
+row first (lower activity_logs.id) and stages the bytes before the chat
+message arrives in the same poll batch (or a later one). The URI cache
+is therefore populated before the message_from_activity path needs it.
+A miss (network race, restart with stale cursor) is handled by keeping
+the original ``platform-pending:`` URI in the rewritten body — the agent
+will see something it can't open, which is preferable to silently
+dropping the URI.
+
+Auth — same Bearer token the inbox poller uses (``platform_auth.auth_headers``).
+Both endpoints are on the wsAuth-gated route, so this module can never
+read another tenant's bytes even if a token is misrouted.
+"""
+from __future__ import annotations
+
+import concurrent.futures
+import logging
+import mimetypes
+import os
+import re
+import secrets as pysecrets
+import threading
+from collections import OrderedDict
+from pathlib import Path
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+# Same on-disk root as internal_chat_uploads.CHAT_UPLOAD_DIR — keeping
+# these decoupled would let drift sneak in. Imported here rather than
+# from internal_chat_uploads to avoid pulling in starlette as a
+# transitive dep (this module runs in the standalone MCP path which
+# doesn't ship the in-container HTTP server).
+CHAT_UPLOAD_DIR = "/workspace/.molecule/chat-uploads"
+
+# Per-file safety net. The platform enforces 25 MB on the staging side,
+# but a buggy or hostile platform response shouldn't be able to fill the
+# workspace's disk — refuse to write more than this even if the response
+# claims a larger Content-Length.
+MAX_FILE_BYTES = 25 * 1024 * 1024
+
+# Network deadline for the GET. Tuned for a 25 MB transfer over a
+# reasonable consumer link (~5 Mbps gives ~40s for the full payload),
+# plus headroom for TLS + platform auth. Aligned with inbox poller's
+# 10s default for /activity calls — both are user-perceived latency.
+DEFAULT_FETCH_TIMEOUT = 60.0
+
+# Concurrency cap for ``BatchFetcher``. Four workers is enough headroom
+# for the realistic "user dragged 3-4 files into chat at once" case
+# while bounding the platform's per-workspace fan-out. The cap matters
+# because the platform's /content endpoint reads bytea from Postgres in
+# a single round-trip per request — N workers = N concurrent DB reads
+# of up to 25 MB each, so a higher cap could pressure platform memory
+# without much UX win (network bandwidth is the bottleneck once the
+# bytes are buffered).
+DEFAULT_BATCH_FETCH_WORKERS = 4
+
+# Upper bound on how long ``BatchFetcher.wait_all`` blocks the inbox
+# poll loop before giving up on still-in-flight fetches. Aligned with
+# DEFAULT_FETCH_TIMEOUT so a single hung fetch can't stall the loop
+# longer than its own deadline. A timeout fires only if a worker thread
+# is stuck past the underlying httpx timeout — pathological case;
+# normal completion is bounded by per-fetch timeout × ceil(N/W).
+DEFAULT_BATCH_WAIT_TIMEOUT = DEFAULT_FETCH_TIMEOUT + 5.0
+
+# Cap on the URI cache. A long-lived workspace handling thousands of
+# uploads shouldn't grow without bound; an LRU cap of 1024 keeps the
+# entries-needed-for-a-typical-conversation well within memory.
+URI_CACHE_MAX_ENTRIES = 1024
+
+# Same character class as internal_chat_uploads — kept duplicated rather
+# than imported to avoid dragging starlette into the standalone path.
+_UNSAFE_FILENAME_CHARS = re.compile(r"[^a-zA-Z0-9._\-]")
+
+
+def sanitize_filename(name: str) -> str:
+ """Reduce a user-supplied filename to a safe form.
+
+ Mirrors ``internal_chat_uploads.sanitize_filename`` and the Go
+ handler's ``SanitizeFilename`` — three-way parity is pinned by
+ ``workspace-server/internal/handlers/sanitize_filename_test.go`` and
+ ``workspace/tests/test_internal_chat_uploads.py`` so the URI shape
+ is identical regardless of which path handles the upload.
+ """
+ base = os.path.basename(name)
+ base = base.replace(" ", "_")
+ base = _UNSAFE_FILENAME_CHARS.sub("_", base)
+ if len(base) > 100:
+ ext = ""
+ dot = base.rfind(".")
+ if dot >= 0 and len(base) - dot <= 16:
+ ext = base[dot:]
+ base = base[: 100 - len(ext)] + ext
+ if base in ("", ".", ".."):
+ return "file"
+ return base
+
+
+# ---------------------------------------------------------------------------
+# URI cache — maps platform-pending URIs to local workspace: URIs
+# ---------------------------------------------------------------------------
+
+
+class _URICache:
+ """Thread-safe bounded LRU mapping of platform-pending → workspace URIs.
+
+ Bounded so a workspace that runs for months and handles thousands of
+ uploads doesn't accumulate entries forever. ``OrderedDict.move_to_end``
+ promotes recently-used entries; eviction takes the oldest.
+
+ The cache is intentionally per-process — there is no persistence
+ across a workspace restart. A restart with a stale inbox cursor that
+ re-poll an upload-receive row will re-fetch (the bytes are already
+ on disk from the prior session — see ``stage_to_disk``'s O_EXCL
+ handling) and re-register; a chat message that referenced the
+ platform-pending URI BEFORE the restart and arrives AFTER would miss
+ the rewrite and surface the platform-pending URI to the agent. That
+ is preferable to a stale persisted mapping that points at a deleted
+ file.
+ """
+
+ def __init__(self, max_entries: int = URI_CACHE_MAX_ENTRIES):
+ self._max = max_entries
+ self._lock = threading.Lock()
+ self._entries: "OrderedDict[str, str]" = OrderedDict()
+
+ def get(self, pending_uri: str) -> str | None:
+ with self._lock:
+ local = self._entries.get(pending_uri)
+ if local is not None:
+ self._entries.move_to_end(pending_uri)
+ return local
+
+ def set(self, pending_uri: str, local_uri: str) -> None:
+ with self._lock:
+ self._entries[pending_uri] = local_uri
+ self._entries.move_to_end(pending_uri)
+ while len(self._entries) > self._max:
+ self._entries.popitem(last=False)
+
+ def __len__(self) -> int:
+ with self._lock:
+ return len(self._entries)
+
+ def clear(self) -> None:
+ with self._lock:
+ self._entries.clear()
+
+
+_cache = _URICache()
+
+
+def get_cache() -> _URICache:
+ """Expose the module-singleton cache for tests and the rewrite path."""
+ return _cache
+
+
+def resolve_pending_uri(uri: str) -> str | None:
+ """Return the local ``workspace:`` URI for a ``platform-pending:`` URI,
+ or None if not yet staged. Convenience for callers that want to
+ fall back to an on-demand fetch — pass the result through to
+ ``executor_helpers.resolve_attachment_uri``.
+ """
+ return _cache.get(uri)
+
+
+# ---------------------------------------------------------------------------
+# On-disk staging
+# ---------------------------------------------------------------------------
+
+
+def _open_safe(path: str) -> int:
+ """Open ``path`` for write with ``O_CREAT|O_EXCL|O_NOFOLLOW``.
+
+ Same shape as ``internal_chat_uploads._open_safe`` — refuses to
+ follow a pre-existing symlink at the target and refuses to overwrite
+ an existing regular file. The 16-byte random prefix makes a name
+ collision astronomical, but defense-in-depth costs nothing.
+ """
+ flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL
+ if hasattr(os, "O_NOFOLLOW"):
+ flags |= os.O_NOFOLLOW
+ return os.open(path, flags, 0o600)
+
+
+def stage_to_disk(content: bytes, filename: str) -> str:
+ """Write ``content`` under ``CHAT_UPLOAD_DIR`` and return the local URI.
+
+ Returns ``workspace:/workspace/.molecule/chat-uploads/-``.
+ The 32-hex prefix makes the on-disk name unguessable to anything
+ that didn't see the response, so even if a stale agent has a guess
+ at the original filename it can't construct a URL to a sibling's
+ upload.
+
+ Raises:
+ OSError: write failure (mkdir, open, or write). Caller is
+ expected to log + skip; the activity row stays unacked so a
+ future poll re-tries.
+ ValueError: ``content`` exceeds ``MAX_FILE_BYTES``. Pre-staging
+ guard belt-and-braces above the platform's same-side cap.
+ """
+ if len(content) > MAX_FILE_BYTES:
+ raise ValueError(
+ f"content size {len(content)} exceeds workspace cap {MAX_FILE_BYTES}"
+ )
+
+ Path(CHAT_UPLOAD_DIR).mkdir(parents=True, exist_ok=True)
+
+ sanitized = sanitize_filename(filename)
+ prefix = pysecrets.token_hex(16)
+ stored = f"{prefix}-{sanitized}"
+ target = os.path.join(CHAT_UPLOAD_DIR, stored)
+
+ fd = _open_safe(target)
+ try:
+ with os.fdopen(fd, "wb") as f:
+ f.write(content)
+ except OSError:
+ # Best-effort cleanup — partial writes leave a stub file that
+ # would mask a future retry's success otherwise.
+ try:
+ os.unlink(target)
+ except OSError:
+ pass
+ raise
+
+ return f"workspace:{CHAT_UPLOAD_DIR}/{stored}"
+
+
+# ---------------------------------------------------------------------------
+# Activity row → fetch/stage/ack flow
+# ---------------------------------------------------------------------------
+
+
+def _request_body_dict(row: dict[str, Any]) -> dict[str, Any] | None:
+ """Coerce ``row['request_body']`` into a dict.
+
+ The /activity API returns request_body as JSON (already-deserialized
+ by httpx). Some legacy paths or mocked transports may emit a string;
+ handle defensively rather than raising.
+ """
+ body = row.get("request_body")
+ if isinstance(body, dict):
+ return body
+ if isinstance(body, str):
+ import json
+ try:
+ decoded = json.loads(body)
+ except (TypeError, ValueError):
+ return None
+ return decoded if isinstance(decoded, dict) else None
+ return None
+
+
+def is_chat_upload_row(row: dict[str, Any]) -> bool:
+ """True if ``row`` is the platform's chat-upload-receive activity.
+
+ Used by the inbox poller to fork the row off the regular A2A
+ message handling path — this row is not a peer message; it's an
+ instruction to fetch + stage bytes. Match on ``method`` only;
+ ``activity_type`` is already filtered to ``a2a_receive`` upstream.
+ """
+ return row.get("method") == "chat_upload_receive"
+
+
+def fetch_and_stage(
+ row: dict[str, Any],
+ *,
+ platform_url: str,
+ workspace_id: str,
+ headers: dict[str, str],
+ timeout_secs: float = DEFAULT_FETCH_TIMEOUT,
+ client: Any = None,
+) -> str | None:
+ """Fetch the row's bytes, stage them under chat-uploads, and ack.
+
+ Returns the local ``workspace:`` URI on success, or ``None`` if any
+ step failed (logged with enough detail to triage). Failure leaves
+ the platform-side row unacked, so a subsequent poll retries — the
+ activity row stays in the cursor's window because we DO advance the
+ cursor (the row is "handled" from the inbox's perspective even on
+ fetch failure; otherwise a permanent network outage would stall the
+ cursor and block real chat traffic).
+
+ On success, the URI cache is updated so a subsequent chat message
+ referencing the same ``platform-pending:`` URI is rewritten before
+ the agent sees it.
+
+ Pass ``client`` to reuse a shared ``httpx.Client`` for both GET and
+ POST ack (saves one TLS handshake per row vs. constructing one
+ per-call). ``BatchFetcher`` does this across an entire poll batch so
+ N concurrent fetches share one connection pool.
+ """
+ body = _request_body_dict(row)
+ if body is None:
+ logger.warning(
+ "inbox_uploads: row %s missing request_body; cannot fetch",
+ row.get("id"),
+ )
+ return None
+
+ file_id = body.get("file_id")
+ if not isinstance(file_id, str) or not file_id:
+ logger.warning(
+ "inbox_uploads: row %s has no file_id in request_body",
+ row.get("id"),
+ )
+ return None
+
+ pending_uri = body.get("uri")
+ if not isinstance(pending_uri, str) or not pending_uri:
+ # Reconstruct what the platform would have written — defensive
+ # against a row whose uri field got truncated. Same shape as the
+ # Go handler's URI builder.
+ pending_uri = f"platform-pending:{workspace_id}/{file_id}"
+
+ filename = body.get("name") or "file"
+ if not isinstance(filename, str):
+ filename = "file"
+
+ # Caller-supplied client: reuse for both GET + POST ack. Otherwise
+ # build a one-shot client and close it on the way out. Lazy httpx
+ # import keeps the standalone MCP path's optional dep optional.
+ own_client = client is None
+ if own_client:
+ try:
+ import httpx # noqa: WPS433
+ except ImportError:
+ logger.error("inbox_uploads: httpx not installed; cannot fetch %s", file_id)
+ return None
+ client = httpx.Client(timeout=timeout_secs)
+
+ try:
+ return _fetch_and_stage_with_client(
+ client,
+ platform_url=platform_url,
+ workspace_id=workspace_id,
+ headers=headers,
+ file_id=file_id,
+ pending_uri=pending_uri,
+ filename=filename,
+ body=body,
+ )
+ finally:
+ if own_client:
+ try:
+ client.close()
+ except Exception: # noqa: BLE001 — close should never crash the caller
+ pass
+
+
+def _fetch_and_stage_with_client(
+ client: Any,
+ *,
+ platform_url: str,
+ workspace_id: str,
+ headers: dict[str, str],
+ file_id: str,
+ pending_uri: str,
+ filename: str,
+ body: dict[str, Any],
+) -> str | None:
+ """Inner body of fetch_and_stage. Always uses the supplied client for
+ both GET and POST so the connection pool is shared across the call.
+ """
+ content_url = f"{platform_url}/workspaces/{workspace_id}/pending-uploads/{file_id}/content"
+ ack_url = f"{platform_url}/workspaces/{workspace_id}/pending-uploads/{file_id}/ack"
+
+ try:
+ resp = client.get(content_url, headers=headers)
+ except Exception as exc: # noqa: BLE001
+ logger.warning("inbox_uploads: GET %s failed: %s", content_url, exc)
+ return None
+
+ if resp.status_code == 404:
+ # Row was swept or already acked by a previous poll race — nothing
+ # to fetch. Don't ack again; the platform's GC handles it. This is
+ # a soft-skip, not an error — log at INFO so triage isn't noisy.
+ logger.info(
+ "inbox_uploads: pending upload %s already gone (404); skipping",
+ file_id,
+ )
+ return None
+ if resp.status_code >= 400:
+ logger.warning(
+ "inbox_uploads: GET %s returned %d: %s",
+ content_url,
+ resp.status_code,
+ (resp.text or "")[:200],
+ )
+ return None
+
+ content = resp.content or b""
+ if len(content) > MAX_FILE_BYTES:
+ logger.warning(
+ "inbox_uploads: refusing to stage %s — size %d exceeds cap %d",
+ file_id,
+ len(content),
+ MAX_FILE_BYTES,
+ )
+ return None
+
+ # Mimetype precedence: platform's Content-Type header → request_body
+ # mimeType field → extension guess. Same precedence as the in-
+ # container ingest handler.
+ mime_header = resp.headers.get("content-type", "").split(";")[0].strip()
+ mime = (
+ mime_header
+ or (body.get("mimeType") if isinstance(body.get("mimeType"), str) else "")
+ or (mimetypes.guess_type(filename)[0] or "")
+ )
+
+ try:
+ local_uri = stage_to_disk(content, filename)
+ except (OSError, ValueError) as exc:
+ logger.error(
+ "inbox_uploads: failed to stage %s (%s) to disk: %s",
+ file_id,
+ filename,
+ exc,
+ )
+ return None
+
+ _cache.set(pending_uri, local_uri)
+ logger.info(
+ "inbox_uploads: staged file_id=%s name=%s size=%d mime=%s pending_uri=%s local_uri=%s",
+ file_id,
+ filename,
+ len(content),
+ mime,
+ pending_uri,
+ local_uri,
+ )
+
+ # Ack last so a write failure above leaves the row available for a
+ # retry on the next poll. A failed ack is logged but doesn't roll
+ # back the on-disk file — the platform's sweep will clean up
+ # eventually.
+ try:
+ ack_resp = client.post(ack_url, headers=headers)
+ if ack_resp.status_code >= 400:
+ logger.warning(
+ "inbox_uploads: ack %s returned %d: %s",
+ ack_url,
+ ack_resp.status_code,
+ (ack_resp.text or "")[:200],
+ )
+ except Exception as exc: # noqa: BLE001
+ logger.warning("inbox_uploads: POST %s failed: %s", ack_url, exc)
+
+ return local_uri
+
+
+# ---------------------------------------------------------------------------
+# BatchFetcher — concurrent fetch across a single poll batch
+# ---------------------------------------------------------------------------
+
+
+class BatchFetcher:
+ """Fetch + stage + ack a batch of upload-receive rows concurrently.
+
+ Why this exists: the inbox poll loop used to call ``fetch_and_stage``
+ serially per row. With N upload rows in a batch (a user dragging
+ multiple files into chat at once), the loop blocked for
+ ``N × per_fetch_latency`` before processing the chat message that
+ referenced them — a 4-file upload at 5s each = 20s of stall
+ before the agent saw the user's prompt. ``BatchFetcher`` runs the
+ fetches on a small thread pool (default 4 workers) so the stall is
+ bounded by ``ceil(N/W) × per_fetch_latency`` instead.
+
+ Connection reuse: one ``httpx.Client`` is shared across every fetch
+ in the batch. httpx clients carry a connection pool, so a second
+ fetch to the same platform host reuses the TCP+TLS handshake from
+ the first — measurable win when fetches happen back-to-back.
+
+ Correctness invariant the caller MUST preserve: the inbox loop is
+ expected to call ``wait_all()`` before processing the chat-message
+ activity row that REFERENCES one of these uploads. Without the
+ barrier, the URI cache is empty when ``rewrite_request_body`` runs
+ and the agent sees the un-rewritten ``platform-pending:`` URI. The
+ caller-side test ``test_poll_once_waits_for_uploads_before_messages``
+ pins this end-to-end.
+
+ Use as a context manager so the executor + client are torn down
+ even if the caller raises mid-batch.
+ """
+
+ def __init__(
+ self,
+ *,
+ platform_url: str,
+ workspace_id: str,
+ headers: dict[str, str],
+ timeout_secs: float = DEFAULT_FETCH_TIMEOUT,
+ max_workers: int = DEFAULT_BATCH_FETCH_WORKERS,
+ client: Any = None,
+ ):
+ self._platform_url = platform_url
+ self._workspace_id = workspace_id
+ self._headers = dict(headers) # copy so caller mutations don't leak in
+ self._timeout_secs = timeout_secs
+
+ # Caller can inject a client (tests do this); production callers
+ # let us build one. Track ownership so we only close ours.
+ self._own_client = client is None
+ if self._own_client:
+ try:
+ import httpx # noqa: WPS433
+ except ImportError:
+ # Match fetch_and_stage's behavior: log + degrade rather
+ # than raising at construction time. submit() will then
+ # return None for every row.
+ logger.error("inbox_uploads: httpx not installed; BatchFetcher inert")
+ self._client: Any = None
+ else:
+ self._client = httpx.Client(timeout=timeout_secs)
+ else:
+ self._client = client
+
+ self._executor = concurrent.futures.ThreadPoolExecutor(
+ max_workers=max_workers,
+ thread_name_prefix="upload-fetch",
+ )
+ self._futures: list[concurrent.futures.Future[Any]] = []
+ self._closed = False
+ # Flipped to True by wait_all when the timeout fires; close()
+ # reads this to decide between drain-and-wait vs cancel-queued.
+ self._timed_out = False
+
+ def submit(self, row: dict[str, Any]) -> concurrent.futures.Future[Any] | None:
+ """Submit ``row`` for fetch + stage + ack. Non-blocking — the
+ worker thread runs ``fetch_and_stage`` with the shared client.
+
+ Returns the Future so a caller that wants per-row outcome can
+ await it; ``None`` if the BatchFetcher is in a degraded state
+ (httpx missing).
+ """
+ if self._closed:
+ raise RuntimeError("BatchFetcher: submit after close")
+ if self._client is None:
+ return None
+ fut = self._executor.submit(
+ fetch_and_stage,
+ row,
+ platform_url=self._platform_url,
+ workspace_id=self._workspace_id,
+ headers=self._headers,
+ timeout_secs=self._timeout_secs,
+ client=self._client,
+ )
+ self._futures.append(fut)
+ return fut
+
+ def wait_all(self, timeout: float | None = DEFAULT_BATCH_WAIT_TIMEOUT) -> None:
+ """Block until every submitted future completes (or times out).
+
+ Per-future exceptions are logged + swallowed — ``fetch_and_stage``
+ already converts every error path to ``return None``, so a real
+ exception propagating up to here is unexpected and we don't want
+ one bad fetch to abort the whole batch.
+
+ Timeouts are also logged + swallowed AND record the timed-out
+ futures on ``self._timed_out`` so ``close`` can cancel them
+ without paying their full latency. Without this hand-off,
+ ``close()``'s ``shutdown(wait=True)`` would block on the leaked
+ workers and undo the user-facing timeout — the inbox poll loop
+ would stall indefinitely on a hung /content fetch.
+ """
+ if not self._futures:
+ return
+ try:
+ done, not_done = concurrent.futures.wait(
+ self._futures,
+ timeout=timeout,
+ return_when=concurrent.futures.ALL_COMPLETED,
+ )
+ except Exception as exc: # noqa: BLE001 — concurrent.futures shouldn't raise here
+ logger.warning("inbox_uploads: BatchFetcher.wait_all crashed: %s", exc)
+ return
+ for fut in done:
+ exc = fut.exception()
+ if exc is not None:
+ logger.warning(
+ "inbox_uploads: BatchFetcher worker raised: %s", exc
+ )
+ if not_done:
+ logger.warning(
+ "inbox_uploads: BatchFetcher.wait_all left %d in-flight after %ss timeout",
+ len(not_done),
+ timeout,
+ )
+ # Mark these futures so close() knows to cancel-not-wait. We
+ # cancel queued-but-not-started ones immediately; futures
+ # already running can't be cancelled (Python's threading
+ # model), but close() will pass cancel_futures=True so any
+ # remaining queued items don't run.
+ for fut in not_done:
+ fut.cancel()
+ self._timed_out = True
+
+ def close(self) -> None:
+ """Tear down the executor + (if owned) the httpx client.
+
+ Idempotent. After close, ``submit`` raises and the BatchFetcher
+ cannot be reused — construct a fresh one for the next poll.
+
+ If ``wait_all`` reported a timeout, shutdown skips the
+ ``wait=True`` drain and instead asks the executor to drop queued
+ futures (``cancel_futures=True``). Currently-running workers
+ can't be interrupted by Python's threading model, but the poll
+ loop returns immediately rather than blocking on a hung fetch.
+ """
+ if self._closed:
+ return
+ self._closed = True
+ timed_out = getattr(self, "_timed_out", False)
+ try:
+ if timed_out:
+ # cancel_futures landed in Python 3.9 — guarded for older
+ # interpreters via a TypeError fallback. Drop queued
+ # tasks; running ones will exit when their httpx call
+ # eventually returns or the daemon thread dies.
+ try:
+ self._executor.shutdown(wait=False, cancel_futures=True)
+ except TypeError:
+ self._executor.shutdown(wait=False)
+ else:
+ # Healthy path: wait for in-flight work so we don't
+ # interrupt a fetch mid-write.
+ self._executor.shutdown(wait=True)
+ except Exception as exc: # noqa: BLE001
+ logger.warning("inbox_uploads: executor shutdown error: %s", exc)
+ if self._own_client and self._client is not None:
+ try:
+ self._client.close()
+ except Exception as exc: # noqa: BLE001
+ logger.warning("inbox_uploads: client close error: %s", exc)
+
+ def __enter__(self) -> "BatchFetcher":
+ return self
+
+ def __exit__(self, exc_type, exc, tb) -> None:
+ self.close()
+
+
+# ---------------------------------------------------------------------------
+# URI rewrite for incoming chat messages
+# ---------------------------------------------------------------------------
+#
+# The chat message that references a staged upload arrives as a
+# SEPARATE activity_log row, with parts of kind=file containing
+# platform-pending: URIs in the file.uri field. Walk the structure
+# in-place and rewrite to the local workspace: URI when the cache has it.
+# Unknown URIs pass through unchanged — the agent gets to choose how
+# to react (most runtimes log + ignore an unresolvable URI).
+
+
+def _rewrite_part(part: Any) -> None:
+ """Mutate a single A2A Part dict to swap platform-pending: URIs."""
+ if not isinstance(part, dict):
+ return
+ file_obj = part.get("file")
+ if not isinstance(file_obj, dict):
+ return
+ uri = file_obj.get("uri")
+ if not isinstance(uri, str) or not uri.startswith("platform-pending:"):
+ return
+ rewritten = _cache.get(uri)
+ if rewritten:
+ file_obj["uri"] = rewritten
+
+
+def rewrite_request_body(body: Any) -> None:
+ """Mutate ``body`` in-place, replacing platform-pending: URIs with
+ the cached local equivalents.
+
+ Walks the same shapes ``inbox._extract_text`` accepts:
+
+ - ``body['parts']``
+ - ``body['params']['parts']``
+ - ``body['params']['message']['parts']``
+
+ No-op for shapes that don't match — the message simply passes
+ through to the agent as-is.
+ """
+ if not isinstance(body, dict):
+ return
+ candidates: list[Any] = []
+ params = body.get("params") if isinstance(body.get("params"), dict) else None
+ if params:
+ message = params.get("message") if isinstance(params.get("message"), dict) else None
+ if message:
+ candidates.append(message.get("parts"))
+ candidates.append(params.get("parts"))
+ candidates.append(body.get("parts"))
+
+ for parts in candidates:
+ if isinstance(parts, list):
+ for part in parts:
+ _rewrite_part(part)
diff --git a/workspace/mcp_cli.py b/workspace/mcp_cli.py
index feea0b83..e890a66d 100644
--- a/workspace/mcp_cli.py
+++ b/workspace/mcp_cli.py
@@ -31,422 +31,53 @@ dependency via ``a2a-sdk``.
In-container usage (``python -m molecule_runtime.a2a_mcp_server`` or
direct import) bypasses this wrapper — the workspace runtime has its
own heartbeat loop in ``heartbeat.py`` so we don't double-heartbeat.
+
+Module layout (RFC #2873 iter 3 split):
+ * ``mcp_heartbeat`` — register POST + heartbeat loop + auth-failure
+ escalation + inbound-secret persistence.
+ * ``mcp_workspace_resolver`` — env validation, single + multi-workspace
+ resolution, operator-help printer, on-disk token-file read.
+ * ``mcp_inbox_pollers`` — activate the inbox singleton + spawn one
+ daemon poller per workspace.
+
+This file keeps just ``main()`` plus thin re-exports of the private
+symbols so existing tests' imports (``mcp_cli._build_agent_card``,
+``mcp_cli._heartbeat_loop``, etc.) keep working without churn.
"""
from __future__ import annotations
-import json
import logging
import os
import sys
-import threading
-import time
-from pathlib import Path
import configs_dir
+import mcp_heartbeat
+import mcp_inbox_pollers
+import mcp_workspace_resolver
logger = logging.getLogger(__name__)
-# Heartbeat cadence. Must be tighter than healthsweep's stale window
-# (currently 60-90s — see registry/healthsweep.go) by a comfortable
-# margin so a single missed heartbeat doesn't flip awaiting_agent.
-# 20s gives the operator's network 3 attempts within the budget; long
-# enough that it doesn't spam, short enough to recover quickly after
-# laptop sleep.
-HEARTBEAT_INTERVAL_SECONDS = 20.0
+# Re-export public surface for back-compat with the pre-split callers
+# and tests. The underscore-prefixed names mirror the names that
+# existed in this module before the split — keeping them ensures
+# `mcp_cli._build_agent_card`, `mcp_cli._heartbeat_loop`, etc.
+# resolve identically to the new functions.
+HEARTBEAT_INTERVAL_SECONDS = mcp_heartbeat.HEARTBEAT_INTERVAL_SECONDS
+_HEARTBEAT_AUTH_LOUD_THRESHOLD = mcp_heartbeat.HEARTBEAT_AUTH_LOUD_THRESHOLD
+_HEARTBEAT_AUTH_RELOG_INTERVAL = mcp_heartbeat.HEARTBEAT_AUTH_RELOG_INTERVAL
-# After this many consecutive 401/403 heartbeats, escalate from
-# WARNING to ERROR with re-onboard guidance. 3 ticks at 20s = ~1 minute
-# of sustained auth failure — enough to rule out a transient platform
-# blip but quick enough that an operator doesn't sit puzzled for 10
-# minutes wondering why their MCP tools 401. Same threshold used for
-# repeat-logging at 20-tick (~7 min) intervals so a long-running
-# session that missed the first ERROR still sees the message.
-_HEARTBEAT_AUTH_LOUD_THRESHOLD = 3
-_HEARTBEAT_AUTH_RELOG_INTERVAL = 20
+_build_agent_card = mcp_heartbeat.build_agent_card
+_platform_register = mcp_heartbeat.platform_register
+_heartbeat_loop = mcp_heartbeat.heartbeat_loop
+_log_heartbeat_auth_failure = mcp_heartbeat.log_heartbeat_auth_failure
+_persist_inbound_secret_from_heartbeat = mcp_heartbeat.persist_inbound_secret_from_heartbeat
+_start_heartbeat_thread = mcp_heartbeat.start_heartbeat_thread
+_resolve_workspaces = mcp_workspace_resolver.resolve_workspaces
+_print_missing_env_help = mcp_workspace_resolver.print_missing_env_help
+_read_token_file = mcp_workspace_resolver.read_token_file
-def _build_agent_card(workspace_id: str) -> dict:
- """Build the ``agent_card`` payload sent to /registry/register.
-
- Three optional env vars override the defaults so an operator can
- surface human-readable identity + capabilities to peers and the
- canvas Skills tab without code changes:
-
- * ``MOLECULE_AGENT_NAME`` — display name (defaults to
- ``molecule-mcp-{id[:8]}``). Surfaced in canvas workspace cards
- and ``list_peers`` output.
- * ``MOLECULE_AGENT_DESCRIPTION`` — one-liner about the agent's
- purpose. Rendered in canvas Details + Skills tabs.
- * ``MOLECULE_AGENT_SKILLS`` — comma-separated skill names
- (e.g. ``research,code-review,memory-curation``). Each name is
- expanded to a ``{"name": ...}`` skill object — the minimum
- shape that satisfies both ``shared_runtime.summarize_peers``
- (uses ``s["name"]``) and the canvas SkillsTab.tsx schema
- (id falls back to name when omitted). Empty / whitespace
- entries are dropped.
-
- Defaults match the previous hardcoded behaviour exactly so this
- is a strict superset — an operator who sets none of the env vars
- sees no change.
- """
- name = (os.environ.get("MOLECULE_AGENT_NAME") or "").strip()
- if not name:
- name = f"molecule-mcp-{workspace_id[:8]}"
-
- description = (os.environ.get("MOLECULE_AGENT_DESCRIPTION") or "").strip()
-
- skills_raw = (os.environ.get("MOLECULE_AGENT_SKILLS") or "").strip()
- skills: list[dict] = []
- if skills_raw:
- for s in skills_raw.split(","):
- label = s.strip()
- if label:
- skills.append({"name": label})
-
- card: dict = {"name": name, "skills": skills}
- if description:
- card["description"] = description
- return card
-
-
-def _platform_register(platform_url: str, workspace_id: str, token: str) -> None:
- """One-shot register at startup; fails fast on auth errors.
-
- Lifts the workspace from ``awaiting_agent`` to ``online`` for
- operators who never ran the curl-register snippet. Safe to call
- repeatedly: the platform's register handler is an upsert that
- just refreshes ``url``, ``agent_card``, and ``status``.
-
- Failure model (post-review):
- - 401 / 403 → ``sys.exit(3)`` immediately. The operator's
- token is wrong; silently looping in a broken state would
- make this hard to diagnose because the MCP tools would 401
- on every call too. Hard-fail is the kindest option.
- - Other 4xx/5xx → log a warning + continue. The heartbeat
- thread will surface persistent failures; transient platform
- blips shouldn't abort the MCP loop.
- - Network / transport errors → log + continue. Same reasoning.
-
- Origin header is required by the SaaS edge WAF; without it
- /registry/register currently still works (it's on the WAF
- allowlist), but the heartbeat path needs Origin and we want one
- consistent header set across both calls.
- """
- try:
- import httpx
- except ImportError:
- # httpx is a transitive dep via a2a-sdk; if missing, the MCP
- # server won't import either. Let the caller's later import
- # surface the real error.
- return
-
- payload = {
- "id": workspace_id,
- "url": "",
- "agent_card": _build_agent_card(workspace_id),
- "delivery_mode": "poll",
- }
- headers = {
- "Authorization": f"Bearer {token}",
- "Origin": platform_url,
- "Content-Type": "application/json",
- }
- try:
- with httpx.Client(timeout=10.0) as client:
- resp = client.post(
- f"{platform_url}/registry/register",
- json=payload,
- headers=headers,
- )
- if resp.status_code in (401, 403):
- print(
- f"molecule-mcp: register rejected with HTTP {resp.status_code} — "
- f"the token in MOLECULE_WORKSPACE_TOKEN is invalid for workspace "
- f"{workspace_id}. Regenerate from the canvas → Tokens tab.",
- file=sys.stderr,
- )
- sys.exit(3)
- if resp.status_code >= 400:
- logger.warning(
- "molecule-mcp: register POST returned HTTP %d: %s",
- resp.status_code,
- (resp.text or "")[:200],
- )
- else:
- logger.info(
- "molecule-mcp: registered workspace %s with platform",
- workspace_id,
- )
- except SystemExit:
- raise
- except Exception as exc: # noqa: BLE001
- logger.warning("molecule-mcp: register POST failed: %s", exc)
-
-
-def _heartbeat_loop(
- platform_url: str,
- workspace_id: str,
- token: str,
- interval: float = HEARTBEAT_INTERVAL_SECONDS,
-) -> None:
- """Daemon thread body: POST /registry/heartbeat every ``interval``s.
-
- Failures are logged at WARNING and the loop continues. The thread
- exits when the main process does (daemon=True). Each iteration
- rebuilds the payload + headers — cheap and ensures token rotation
- via env var (rare but possible) is picked up on the next tick.
- """
- try:
- import httpx
- except ImportError:
- return
-
- start_time = time.time()
- consecutive_auth_failures = 0
- while True:
- body = {
- "workspace_id": workspace_id,
- "error_rate": 0.0,
- "sample_error": "",
- "active_tasks": 0,
- "uptime_seconds": int(time.time() - start_time),
- }
- headers = {
- "Authorization": f"Bearer {token}",
- "Origin": platform_url,
- "Content-Type": "application/json",
- }
- try:
- with httpx.Client(timeout=10.0) as client:
- resp = client.post(
- f"{platform_url}/registry/heartbeat",
- json=body,
- headers=headers,
- )
- if resp.status_code in (401, 403):
- consecutive_auth_failures += 1
- _log_heartbeat_auth_failure(
- consecutive_auth_failures, workspace_id, resp.status_code,
- )
- elif resp.status_code >= 400:
- # Non-auth HTTP error — log, but DO NOT touch the
- # auth-failure counter (5xx blips, 429, etc. are
- # transient and unrelated to token validity).
- logger.warning(
- "molecule-mcp: heartbeat HTTP %d: %s",
- resp.status_code,
- (resp.text or "")[:200],
- )
- else:
- consecutive_auth_failures = 0
- _persist_inbound_secret_from_heartbeat(resp)
- except Exception as exc: # noqa: BLE001
- logger.warning("molecule-mcp: heartbeat failed: %s", exc)
- time.sleep(interval)
-
-
-def _log_heartbeat_auth_failure(count: int, workspace_id: str, status_code: int) -> None:
- """Escalate consecutive heartbeat 401/403s from quiet WARNING to
- actionable ERROR.
-
- The operator's first sign of trouble shouldn't be "tools 401 with no
- explanation" — that was the failure mode that motivated this code,
- triggered by a workspace being deleted server-side and its tokens
- revoked while the runtime kept heartbeating in silence.
-
- Cadence:
- * count < threshold: WARNING per tick (transient — could be a
- platform blip, don't shout yet)
- * count == threshold: ERROR with re-onboard instructions
- (the first signal the operator can't miss)
- * count > threshold and (count - threshold) % relog == 0: re-log
- ERROR (so a session that started after the first ERROR still
- sees the message scrolling past in their logs)
- """
- if count < _HEARTBEAT_AUTH_LOUD_THRESHOLD:
- logger.warning(
- "molecule-mcp: heartbeat HTTP %d (auth failure %d/%d) — "
- "token may be revoked. Will retry; if persistent, regenerate "
- "from canvas → Tokens.",
- status_code, count, _HEARTBEAT_AUTH_LOUD_THRESHOLD,
- )
- return
- # At or past the threshold — this is the loud actionable error.
- if count == _HEARTBEAT_AUTH_LOUD_THRESHOLD or (
- count - _HEARTBEAT_AUTH_LOUD_THRESHOLD
- ) % _HEARTBEAT_AUTH_RELOG_INTERVAL == 0:
- logger.error(
- "molecule-mcp: %d consecutive heartbeat auth failures (HTTP %d) — "
- "the token in MOLECULE_WORKSPACE_TOKEN has been REVOKED, likely "
- "because workspace %s was deleted server-side. The MCP server is "
- "still running but every platform call will fail. Regenerate the "
- "workspace + token from the canvas (Tokens tab), update your MCP "
- "config, and restart your runtime.",
- count, status_code, workspace_id,
- )
-
-
-def _persist_inbound_secret_from_heartbeat(resp: object) -> None:
- """Persist ``platform_inbound_secret`` from a heartbeat response, if any.
-
- The platform's heartbeat handler returns the secret on every beat
- (mirroring /registry/register) so a workspace that lazy-healed the
- secret on the platform side — typical recovery path for a workspace
- whose row had a NULL ``platform_inbound_secret`` after a partial
- bootstrap — picks it up within one heartbeat tick instead of
- requiring a runtime restart.
-
- Without this delivery path the chat-upload code path's "secret was
- just minted, will pick up on next heartbeat" 503 message is a lie
- and the workspace stays 401-forever until the operator restarts
- the runtime. Caught 2026-04-30 on hongmingwang tenant.
-
- Failure is non-fatal: if the body isn't JSON, doesn't carry the
- field, or the disk write fails, the next heartbeat retries. This
- matches the cold-start register flow in main.py:319-323.
- """
- try:
- body = resp.json()
- except Exception: # noqa: BLE001
- return
- if not isinstance(body, dict):
- return
- secret = body.get("platform_inbound_secret")
- if not secret:
- return
- try:
- from platform_inbound_auth import save_inbound_secret
-
- save_inbound_secret(secret)
- except Exception as exc: # noqa: BLE001
- logger.warning(
- "molecule-mcp: persist inbound secret from heartbeat failed: %s", exc
- )
-
-
-def _start_heartbeat_thread(
- platform_url: str,
- workspace_id: str,
- token: str,
-) -> threading.Thread:
- """Start the heartbeat daemon thread. Returns the Thread handle.
-
- The MCP stdio loop runs in the foreground (asyncio); this thread
- runs alongside it. ``daemon=True`` so when the operator hits
- Ctrl-C / closes the runtime, the heartbeat dies with it instead
- of leaking and writing to a stale workspace.
- """
- t = threading.Thread(
- target=_heartbeat_loop,
- args=(platform_url, workspace_id, token),
- name="molecule-mcp-heartbeat",
- daemon=True,
- )
- t.start()
- return t
-
-
-def _resolve_workspaces() -> tuple[list[tuple[str, str]], list[str]]:
- """Return the list of ``(workspace_id, token)`` pairs to register.
-
- Resolution order:
-
- 1. ``MOLECULE_WORKSPACES`` env var — JSON array of
- ``{"id": "...", "token": "..."}`` objects. Activates the
- multi-workspace external-agent path (one process registered into
- N workspaces). When set, ``WORKSPACE_ID`` / ``MOLECULE_WORKSPACE_TOKEN``
- are IGNORED — the JSON is the source of truth.
-
- 2. Single-workspace fallback — ``WORKSPACE_ID`` env var + token from
- ``MOLECULE_WORKSPACE_TOKEN`` or ``${CONFIGS_DIR}/.auth_token``.
- This is the pre-existing path; back-compat exact.
-
- Returns ``(workspaces, errors)``:
- * ``workspaces``: list of ``(workspace_id, token)`` — non-empty
- on the happy path.
- * ``errors``: human-readable strings describing what's missing /
- malformed. ``main()`` surfaces these with the same shape as
- ``_print_missing_env_help`` so the operator's first run gives
- actionable output.
-
- Why JSON env (not file): ergonomic for Claude Code MCP config (one
- string in ``mcpServers.molecule.env`` instead of a sidecar file)
- and for CI / launchers. A separate config-file path can be added
- later without breaking this.
- """
- raw = os.environ.get("MOLECULE_WORKSPACES", "").strip()
- if raw:
- try:
- parsed = json.loads(raw)
- except json.JSONDecodeError as exc:
- return [], [
- f"MOLECULE_WORKSPACES is not valid JSON ({exc.msg} at pos "
- f"{exc.pos}). Expected: '[{{\"id\":\"\",\"token\":"
- f"\"\"}},{{...}}]'"
- ]
- if not isinstance(parsed, list) or not parsed:
- return [], [
- "MOLECULE_WORKSPACES must be a non-empty JSON array of "
- "{\"id\":\"...\",\"token\":\"...\"} objects"
- ]
- out: list[tuple[str, str]] = []
- seen: set[str] = set()
- errors: list[str] = []
- for i, entry in enumerate(parsed):
- if not isinstance(entry, dict):
- errors.append(
- f"MOLECULE_WORKSPACES[{i}] is not an object — got {type(entry).__name__}"
- )
- continue
- wsid = str(entry.get("id", "")).strip()
- tok = str(entry.get("token", "")).strip()
- if not wsid or not tok:
- errors.append(
- f"MOLECULE_WORKSPACES[{i}] missing 'id' or 'token'"
- )
- continue
- if wsid in seen:
- errors.append(
- f"MOLECULE_WORKSPACES[{i}] duplicate workspace id {wsid!r}"
- )
- continue
- seen.add(wsid)
- out.append((wsid, tok))
- if errors:
- return [], errors
- return out, []
-
- # Single-workspace back-compat path.
- wsid = os.environ.get("WORKSPACE_ID", "").strip()
- if not wsid:
- return [], ["WORKSPACE_ID (or MOLECULE_WORKSPACES) is required"]
- tok = os.environ.get("MOLECULE_WORKSPACE_TOKEN", "").strip()
- if not tok:
- tok = _read_token_file()
- if not tok:
- return [], [
- "MOLECULE_WORKSPACE_TOKEN (or CONFIGS_DIR/.auth_token) is required"
- ]
- return [(wsid, tok)], []
-
-
-def _print_missing_env_help(missing: list[str], have_token_file: bool) -> None:
- print("molecule-mcp: missing required environment.\n", file=sys.stderr)
- print("Set the following before running molecule-mcp:", file=sys.stderr)
- print(" WORKSPACE_ID — your workspace UUID (from canvas)", file=sys.stderr)
- print(
- " PLATFORM_URL — base URL of your Molecule platform "
- "(e.g. https://your-tenant.staging.moleculesai.app)",
- file=sys.stderr,
- )
- if not have_token_file:
- print(
- " MOLECULE_WORKSPACE_TOKEN — bearer token for this workspace "
- "(canvas → Tokens tab)",
- file=sys.stderr,
- )
- print("", file=sys.stderr)
- print(f"Currently missing: {', '.join(missing)}", file=sys.stderr)
+_start_inbox_pollers = mcp_inbox_pollers.start_inbox_pollers
def main() -> None:
@@ -558,69 +189,5 @@ def main() -> None:
cli_main()
-def _start_inbox_pollers(platform_url: str, workspace_ids: list[str]) -> None:
- """Activate the inbox singleton + spawn one poller daemon thread per workspace.
-
- Done lazily here (not at module import) because importing inbox
- pulls in platform_auth, which only resolves cleanly AFTER env
- validation succeeds. Activation is idempotent within a process,
- so a stray double-call (e.g. test harness re-entering main) is
- harmless.
-
- The poller threads are daemon=True — die with the main process.
-
- Single-workspace path: one poller, single cursor file at the legacy
- location (``.mcp_inbox_cursor``). Cursor-key resolution falls back
- to the empty string for back-compat with operators whose existing
- on-disk cursor was written by the pre-multi-workspace code.
-
- Multi-workspace path: N pollers, each with its own cursor file
- keyed by ``workspace_id[:8]``. Cursors live next to each other in
- configs_dir so an operator inspecting state sees all of them
- together.
- """
- try:
- import inbox
- except ImportError as exc:
- logger.warning("molecule-mcp: inbox module unavailable: %s", exc)
- return
-
- if len(workspace_ids) <= 1:
- # Back-compat exact: single-workspace mode reuses the legacy
- # cursor filename + cursor_path constructor arg, so an existing
- # operator's on-disk state isn't invalidated by upgrade.
- wsid = workspace_ids[0]
- state = inbox.InboxState(cursor_path=inbox.default_cursor_path())
- inbox.activate(state)
- inbox.start_poller_thread(state, platform_url, wsid)
- return
-
- # Multi-workspace: per-workspace cursor file, one shared queue.
- cursor_paths = {wsid: inbox.default_cursor_path(wsid) for wsid in workspace_ids}
- state = inbox.InboxState(cursor_paths=cursor_paths)
- inbox.activate(state)
- for wsid in workspace_ids:
- inbox.start_poller_thread(state, platform_url, wsid)
-
-
-def _read_token_file() -> str:
- """Read the token from the resolved configs dir's ``.auth_token`` if
- present.
-
- Mirrors platform_auth._token_file's location resolution but without
- importing the heavy module here (that import triggers a2a_client's
- WORKSPACE_ID guard which is fine after env validation, but cheaper
- to inline a 4-line file read than pull in the whole stack just for
- the path).
- """
- path = configs_dir.resolve() / ".auth_token"
- if not path.is_file():
- return ""
- try:
- return path.read_text().strip()
- except OSError:
- return ""
-
-
if __name__ == "__main__": # pragma: no cover
main()
diff --git a/workspace/mcp_heartbeat.py b/workspace/mcp_heartbeat.py
new file mode 100644
index 00000000..2d27aa29
--- /dev/null
+++ b/workspace/mcp_heartbeat.py
@@ -0,0 +1,325 @@
+"""Heartbeat + register thread for the standalone ``molecule-mcp`` wrapper.
+
+Extracted from ``mcp_cli.py`` (RFC #2873 iter 3) so the heartbeat /
+register concern lives in its own module. The console-script entry
+``mcp_cli:main`` still drives the spawn, but the loop body, auth-failure
+escalation, and inbound-secret persistence now live here so they can be
+read, tested, and replaced independently of the orchestrator.
+
+Public surface:
+
+* ``HEARTBEAT_INTERVAL_SECONDS`` — cadence constant.
+* ``build_agent_card(workspace_id)`` — payload helper.
+* ``platform_register(platform_url, workspace_id, token)`` — one-shot
+ POST /registry/register at startup.
+* ``start_heartbeat_thread(platform_url, workspace_id, token)`` — spawn
+ the daemon thread.
+"""
+from __future__ import annotations
+
+import logging
+import os
+import sys
+import threading
+import time
+
+logger = logging.getLogger(__name__)
+
+# Heartbeat cadence. Must be tighter than healthsweep's stale window
+# (currently 60-90s — see registry/healthsweep.go) by a comfortable
+# margin so a single missed heartbeat doesn't flip awaiting_agent.
+# 20s gives the operator's network 3 attempts within the budget; long
+# enough that it doesn't spam, short enough to recover quickly after
+# laptop sleep.
+HEARTBEAT_INTERVAL_SECONDS = 20.0
+
+# After this many consecutive 401/403 heartbeats, escalate from
+# WARNING to ERROR with re-onboard guidance. 3 ticks at 20s = ~1 minute
+# of sustained auth failure — enough to rule out a transient platform
+# blip but quick enough that an operator doesn't sit puzzled for 10
+# minutes wondering why their MCP tools 401. Same threshold used for
+# repeat-logging at 20-tick (~7 min) intervals so a long-running
+# session that missed the first ERROR still sees the message.
+HEARTBEAT_AUTH_LOUD_THRESHOLD = 3
+HEARTBEAT_AUTH_RELOG_INTERVAL = 20
+
+
+def build_agent_card(workspace_id: str) -> dict:
+ """Build the ``agent_card`` payload sent to /registry/register.
+
+ Three optional env vars override the defaults so an operator can
+ surface human-readable identity + capabilities to peers and the
+ canvas Skills tab without code changes:
+
+ * ``MOLECULE_AGENT_NAME`` — display name (defaults to
+ ``molecule-mcp-{id[:8]}``). Surfaced in canvas workspace cards
+ and ``list_peers`` output.
+ * ``MOLECULE_AGENT_DESCRIPTION`` — one-liner about the agent's
+ purpose. Rendered in canvas Details + Skills tabs.
+ * ``MOLECULE_AGENT_SKILLS`` — comma-separated skill names
+ (e.g. ``research,code-review,memory-curation``). Each name is
+ expanded to a ``{"name": ...}`` skill object — the minimum
+ shape that satisfies both ``shared_runtime.summarize_peers``
+ (uses ``s["name"]``) and the canvas SkillsTab.tsx schema
+ (id falls back to name when omitted). Empty / whitespace
+ entries are dropped.
+
+ Defaults match the previous hardcoded behaviour exactly so this
+ is a strict superset — an operator who sets none of the env vars
+ sees no change.
+ """
+ name = (os.environ.get("MOLECULE_AGENT_NAME") or "").strip()
+ if not name:
+ name = f"molecule-mcp-{workspace_id[:8]}"
+
+ description = (os.environ.get("MOLECULE_AGENT_DESCRIPTION") or "").strip()
+
+ skills_raw = (os.environ.get("MOLECULE_AGENT_SKILLS") or "").strip()
+ skills: list[dict] = []
+ if skills_raw:
+ for s in skills_raw.split(","):
+ label = s.strip()
+ if label:
+ skills.append({"name": label})
+
+ card: dict = {"name": name, "skills": skills}
+ if description:
+ card["description"] = description
+ return card
+
+
+def platform_register(platform_url: str, workspace_id: str, token: str) -> None:
+ """One-shot register at startup; fails fast on auth errors.
+
+ Lifts the workspace from ``awaiting_agent`` to ``online`` for
+ operators who never ran the curl-register snippet. Safe to call
+ repeatedly: the platform's register handler is an upsert that
+ just refreshes ``url``, ``agent_card``, and ``status``.
+
+ Failure model (post-review):
+ - 401 / 403 → ``sys.exit(3)`` immediately. The operator's
+ token is wrong; silently looping in a broken state would
+ make this hard to diagnose because the MCP tools would 401
+ on every call too. Hard-fail is the kindest option.
+ - Other 4xx/5xx → log a warning + continue. The heartbeat
+ thread will surface persistent failures; transient platform
+ blips shouldn't abort the MCP loop.
+ - Network / transport errors → log + continue. Same reasoning.
+
+ Origin header is required by the SaaS edge WAF; without it
+ /registry/register currently still works (it's on the WAF
+ allowlist), but the heartbeat path needs Origin and we want one
+ consistent header set across both calls.
+ """
+ try:
+ import httpx
+ except ImportError:
+ # httpx is a transitive dep via a2a-sdk; if missing, the MCP
+ # server won't import either. Let the caller's later import
+ # surface the real error.
+ return
+
+ payload = {
+ "id": workspace_id,
+ "url": "",
+ "agent_card": build_agent_card(workspace_id),
+ "delivery_mode": "poll",
+ }
+ headers = {
+ "Authorization": f"Bearer {token}",
+ "Origin": platform_url,
+ "Content-Type": "application/json",
+ }
+ try:
+ with httpx.Client(timeout=10.0) as client:
+ resp = client.post(
+ f"{platform_url}/registry/register",
+ json=payload,
+ headers=headers,
+ )
+ if resp.status_code in (401, 403):
+ print(
+ f"molecule-mcp: register rejected with HTTP {resp.status_code} — "
+ f"the token in MOLECULE_WORKSPACE_TOKEN is invalid for workspace "
+ f"{workspace_id}. Regenerate from the canvas → Tokens tab.",
+ file=sys.stderr,
+ )
+ sys.exit(3)
+ if resp.status_code >= 400:
+ logger.warning(
+ "molecule-mcp: register POST returned HTTP %d: %s",
+ resp.status_code,
+ (resp.text or "")[:200],
+ )
+ else:
+ logger.info(
+ "molecule-mcp: registered workspace %s with platform",
+ workspace_id,
+ )
+ except SystemExit:
+ raise
+ except Exception as exc: # noqa: BLE001
+ logger.warning("molecule-mcp: register POST failed: %s", exc)
+
+
+def heartbeat_loop(
+ platform_url: str,
+ workspace_id: str,
+ token: str,
+ interval: float = HEARTBEAT_INTERVAL_SECONDS,
+) -> None:
+ """Daemon thread body: POST /registry/heartbeat every ``interval``s.
+
+ Failures are logged at WARNING and the loop continues. The thread
+ exits when the main process does (daemon=True). Each iteration
+ rebuilds the payload + headers — cheap and ensures token rotation
+ via env var (rare but possible) is picked up on the next tick.
+ """
+ try:
+ import httpx
+ except ImportError:
+ return
+
+ start_time = time.time()
+ consecutive_auth_failures = 0
+ while True:
+ body = {
+ "workspace_id": workspace_id,
+ "error_rate": 0.0,
+ "sample_error": "",
+ "active_tasks": 0,
+ "uptime_seconds": int(time.time() - start_time),
+ }
+ headers = {
+ "Authorization": f"Bearer {token}",
+ "Origin": platform_url,
+ "Content-Type": "application/json",
+ }
+ try:
+ with httpx.Client(timeout=10.0) as client:
+ resp = client.post(
+ f"{platform_url}/registry/heartbeat",
+ json=body,
+ headers=headers,
+ )
+ if resp.status_code in (401, 403):
+ consecutive_auth_failures += 1
+ log_heartbeat_auth_failure(
+ consecutive_auth_failures, workspace_id, resp.status_code,
+ )
+ elif resp.status_code >= 400:
+ # Non-auth HTTP error — log, but DO NOT touch the
+ # auth-failure counter (5xx blips, 429, etc. are
+ # transient and unrelated to token validity).
+ logger.warning(
+ "molecule-mcp: heartbeat HTTP %d: %s",
+ resp.status_code,
+ (resp.text or "")[:200],
+ )
+ else:
+ consecutive_auth_failures = 0
+ persist_inbound_secret_from_heartbeat(resp)
+ except Exception as exc: # noqa: BLE001
+ logger.warning("molecule-mcp: heartbeat failed: %s", exc)
+ time.sleep(interval)
+
+
+def log_heartbeat_auth_failure(count: int, workspace_id: str, status_code: int) -> None:
+ """Escalate consecutive heartbeat 401/403s from quiet WARNING to
+ actionable ERROR.
+
+ The operator's first sign of trouble shouldn't be "tools 401 with no
+ explanation" — that was the failure mode that motivated this code,
+ triggered by a workspace being deleted server-side and its tokens
+ revoked while the runtime kept heartbeating in silence.
+
+ Cadence:
+ * count < threshold: WARNING per tick (transient — could be a
+ platform blip, don't shout yet)
+ * count == threshold: ERROR with re-onboard instructions
+ (the first signal the operator can't miss)
+ * count > threshold and (count - threshold) % relog == 0: re-log
+ ERROR (so a session that started after the first ERROR still
+ sees the message scrolling past in their logs)
+ """
+ if count < HEARTBEAT_AUTH_LOUD_THRESHOLD:
+ logger.warning(
+ "molecule-mcp: heartbeat HTTP %d (auth failure %d/%d) — "
+ "token may be revoked. Will retry; if persistent, regenerate "
+ "from canvas → Tokens.",
+ status_code, count, HEARTBEAT_AUTH_LOUD_THRESHOLD,
+ )
+ return
+ # At or past the threshold — this is the loud actionable error.
+ if count == HEARTBEAT_AUTH_LOUD_THRESHOLD or (
+ count - HEARTBEAT_AUTH_LOUD_THRESHOLD
+ ) % HEARTBEAT_AUTH_RELOG_INTERVAL == 0:
+ logger.error(
+ "molecule-mcp: %d consecutive heartbeat auth failures (HTTP %d) — "
+ "the token in MOLECULE_WORKSPACE_TOKEN has been REVOKED, likely "
+ "because workspace %s was deleted server-side. The MCP server is "
+ "still running but every platform call will fail. Regenerate the "
+ "workspace + token from the canvas (Tokens tab), update your MCP "
+ "config, and restart your runtime.",
+ count, status_code, workspace_id,
+ )
+
+
+def persist_inbound_secret_from_heartbeat(resp: object) -> None:
+ """Persist ``platform_inbound_secret`` from a heartbeat response, if any.
+
+ The platform's heartbeat handler returns the secret on every beat
+ (mirroring /registry/register) so a workspace that lazy-healed the
+ secret on the platform side — typical recovery path for a workspace
+ whose row had a NULL ``platform_inbound_secret`` after a partial
+ bootstrap — picks it up within one heartbeat tick instead of
+ requiring a runtime restart.
+
+ Without this delivery path the chat-upload code path's "secret was
+ just minted, will pick up on next heartbeat" 503 message is a lie
+ and the workspace stays 401-forever until the operator restarts
+ the runtime. Caught 2026-04-30 on hongmingwang tenant.
+
+ Failure is non-fatal: if the body isn't JSON, doesn't carry the
+ field, or the disk write fails, the next heartbeat retries. This
+ matches the cold-start register flow in main.py:319-323.
+ """
+ try:
+ body = resp.json()
+ except Exception: # noqa: BLE001
+ return
+ if not isinstance(body, dict):
+ return
+ secret = body.get("platform_inbound_secret")
+ if not secret:
+ return
+ try:
+ from platform_inbound_auth import save_inbound_secret
+
+ save_inbound_secret(secret)
+ except Exception as exc: # noqa: BLE001
+ logger.warning(
+ "molecule-mcp: persist inbound secret from heartbeat failed: %s", exc
+ )
+
+
+def start_heartbeat_thread(
+ platform_url: str,
+ workspace_id: str,
+ token: str,
+) -> threading.Thread:
+ """Start the heartbeat daemon thread. Returns the Thread handle.
+
+ The MCP stdio loop runs in the foreground (asyncio); this thread
+ runs alongside it. ``daemon=True`` so when the operator hits
+ Ctrl-C / closes the runtime, the heartbeat dies with it instead
+ of leaking and writing to a stale workspace.
+ """
+ t = threading.Thread(
+ target=heartbeat_loop,
+ args=(platform_url, workspace_id, token),
+ name="molecule-mcp-heartbeat",
+ daemon=True,
+ )
+ t.start()
+ return t
diff --git a/workspace/mcp_inbox_pollers.py b/workspace/mcp_inbox_pollers.py
new file mode 100644
index 00000000..659da5ed
--- /dev/null
+++ b/workspace/mcp_inbox_pollers.py
@@ -0,0 +1,63 @@
+"""Inbox-poller spawn helpers for the standalone ``molecule-mcp`` wrapper.
+
+Extracted from ``mcp_cli.py`` (RFC #2873 iter 3). The poller is the
+INBOUND side of the standalone path — without it, the universal MCP
+server is outbound-only (can call ``delegate_task`` /
+``send_message_to_user``, never observes canvas-user / peer-agent
+messages).
+
+Public surface:
+
+* ``start_inbox_pollers(platform_url, workspace_ids)`` — activate the
+ inbox singleton and spawn one daemon poller per workspace.
+"""
+from __future__ import annotations
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+def start_inbox_pollers(platform_url: str, workspace_ids: list[str]) -> None:
+ """Activate the inbox singleton + spawn one poller daemon thread per workspace.
+
+ Done lazily here (not at module import) because importing inbox
+ pulls in platform_auth, which only resolves cleanly AFTER env
+ validation succeeds. Activation is idempotent within a process,
+ so a stray double-call (e.g. test harness re-entering main) is
+ harmless.
+
+ The poller threads are daemon=True — die with the main process.
+
+ Single-workspace path: one poller, single cursor file at the legacy
+ location (``.mcp_inbox_cursor``). Cursor-key resolution falls back
+ to the empty string for back-compat with operators whose existing
+ on-disk cursor was written by the pre-multi-workspace code.
+
+ Multi-workspace path: N pollers, each with its own cursor file
+ keyed by ``workspace_id[:8]``. Cursors live next to each other in
+ configs_dir so an operator inspecting state sees all of them
+ together.
+ """
+ try:
+ import inbox
+ except ImportError as exc:
+ logger.warning("molecule-mcp: inbox module unavailable: %s", exc)
+ return
+
+ if len(workspace_ids) <= 1:
+ # Back-compat exact: single-workspace mode reuses the legacy
+ # cursor filename + cursor_path constructor arg, so an existing
+ # operator's on-disk state isn't invalidated by upgrade.
+ wsid = workspace_ids[0]
+ state = inbox.InboxState(cursor_path=inbox.default_cursor_path())
+ inbox.activate(state)
+ inbox.start_poller_thread(state, platform_url, wsid)
+ return
+
+ # Multi-workspace: per-workspace cursor file, one shared queue.
+ cursor_paths = {wsid: inbox.default_cursor_path(wsid) for wsid in workspace_ids}
+ state = inbox.InboxState(cursor_paths=cursor_paths)
+ inbox.activate(state)
+ for wsid in workspace_ids:
+ inbox.start_poller_thread(state, platform_url, wsid)
diff --git a/workspace/mcp_workspace_resolver.py b/workspace/mcp_workspace_resolver.py
new file mode 100644
index 00000000..a6fe3bff
--- /dev/null
+++ b/workspace/mcp_workspace_resolver.py
@@ -0,0 +1,146 @@
+"""Env validation + workspace resolution for the standalone ``molecule-mcp``.
+
+Extracted from ``mcp_cli.py`` (RFC #2873 iter 3). Deals with the two
+shapes ``molecule-mcp`` accepts:
+
+ * Single-workspace legacy shape: ``WORKSPACE_ID`` + token from
+ ``MOLECULE_WORKSPACE_TOKEN`` or ``${CONFIGS_DIR}/.auth_token``.
+ * Multi-workspace JSON shape: ``MOLECULE_WORKSPACES`` env var carries a
+ JSON array of ``{"id": ..., "token": ...}`` entries.
+
+Public surface:
+
+* ``resolve_workspaces()`` → ``(workspaces, errors)``.
+* ``read_token_file()`` → token text or ``""``.
+* ``print_missing_env_help(missing, have_token_file)`` — operator-help
+ printer.
+"""
+from __future__ import annotations
+
+import json
+import os
+import sys
+
+import configs_dir
+
+
+def resolve_workspaces() -> tuple[list[tuple[str, str]], list[str]]:
+ """Return the list of ``(workspace_id, token)`` pairs to register.
+
+ Resolution order:
+
+ 1. ``MOLECULE_WORKSPACES`` env var — JSON array of
+ ``{"id": "...", "token": "..."}`` objects. Activates the
+ multi-workspace external-agent path (one process registered into
+ N workspaces). When set, ``WORKSPACE_ID`` / ``MOLECULE_WORKSPACE_TOKEN``
+ are IGNORED — the JSON is the source of truth.
+
+ 2. Single-workspace fallback — ``WORKSPACE_ID`` env var + token from
+ ``MOLECULE_WORKSPACE_TOKEN`` or ``${CONFIGS_DIR}/.auth_token``.
+ This is the pre-existing path; back-compat exact.
+
+ Returns ``(workspaces, errors)``:
+ * ``workspaces``: list of ``(workspace_id, token)`` — non-empty
+ on the happy path.
+ * ``errors``: human-readable strings describing what's missing /
+ malformed. ``main()`` surfaces these with the same shape as
+ ``print_missing_env_help`` so the operator's first run gives
+ actionable output.
+
+ Why JSON env (not file): ergonomic for Claude Code MCP config (one
+ string in ``mcpServers.molecule.env`` instead of a sidecar file)
+ and for CI / launchers. A separate config-file path can be added
+ later without breaking this.
+ """
+ raw = os.environ.get("MOLECULE_WORKSPACES", "").strip()
+ if raw:
+ try:
+ parsed = json.loads(raw)
+ except json.JSONDecodeError as exc:
+ return [], [
+ f"MOLECULE_WORKSPACES is not valid JSON ({exc.msg} at pos "
+ f"{exc.pos}). Expected: '[{{\"id\":\"\",\"token\":"
+ f"\"\"}},{{...}}]'"
+ ]
+ if not isinstance(parsed, list) or not parsed:
+ return [], [
+ "MOLECULE_WORKSPACES must be a non-empty JSON array of "
+ "{\"id\":\"...\",\"token\":\"...\"} objects"
+ ]
+ out: list[tuple[str, str]] = []
+ seen: set[str] = set()
+ errors: list[str] = []
+ for i, entry in enumerate(parsed):
+ if not isinstance(entry, dict):
+ errors.append(
+ f"MOLECULE_WORKSPACES[{i}] is not an object — got {type(entry).__name__}"
+ )
+ continue
+ wsid = str(entry.get("id", "")).strip()
+ tok = str(entry.get("token", "")).strip()
+ if not wsid or not tok:
+ errors.append(
+ f"MOLECULE_WORKSPACES[{i}] missing 'id' or 'token'"
+ )
+ continue
+ if wsid in seen:
+ errors.append(
+ f"MOLECULE_WORKSPACES[{i}] duplicate workspace id {wsid!r}"
+ )
+ continue
+ seen.add(wsid)
+ out.append((wsid, tok))
+ if errors:
+ return [], errors
+ return out, []
+
+ # Single-workspace back-compat path.
+ wsid = os.environ.get("WORKSPACE_ID", "").strip()
+ if not wsid:
+ return [], ["WORKSPACE_ID (or MOLECULE_WORKSPACES) is required"]
+ tok = os.environ.get("MOLECULE_WORKSPACE_TOKEN", "").strip()
+ if not tok:
+ tok = read_token_file()
+ if not tok:
+ return [], [
+ "MOLECULE_WORKSPACE_TOKEN (or CONFIGS_DIR/.auth_token) is required"
+ ]
+ return [(wsid, tok)], []
+
+
+def print_missing_env_help(missing: list[str], have_token_file: bool) -> None:
+ print("molecule-mcp: missing required environment.\n", file=sys.stderr)
+ print("Set the following before running molecule-mcp:", file=sys.stderr)
+ print(" WORKSPACE_ID — your workspace UUID (from canvas)", file=sys.stderr)
+ print(
+ " PLATFORM_URL — base URL of your Molecule platform "
+ "(e.g. https://your-tenant.staging.moleculesai.app)",
+ file=sys.stderr,
+ )
+ if not have_token_file:
+ print(
+ " MOLECULE_WORKSPACE_TOKEN — bearer token for this workspace "
+ "(canvas → Tokens tab)",
+ file=sys.stderr,
+ )
+ print("", file=sys.stderr)
+ print(f"Currently missing: {', '.join(missing)}", file=sys.stderr)
+
+
+def read_token_file() -> str:
+ """Read the token from the resolved configs dir's ``.auth_token`` if
+ present.
+
+ Mirrors platform_auth._token_file's location resolution but without
+ importing the heavy module here (that import triggers a2a_client's
+ WORKSPACE_ID guard which is fine after env validation, but cheaper
+ to inline a 4-line file read than pull in the whole stack just for
+ the path).
+ """
+ path = configs_dir.resolve() / ".auth_token"
+ if not path.is_file():
+ return ""
+ try:
+ return path.read_text().strip()
+ except OSError:
+ return ""
diff --git a/workspace/tests/test_inbox.py b/workspace/tests/test_inbox.py
index 6731701a..cbba9a3b 100644
--- a/workspace/tests/test_inbox.py
+++ b/workspace/tests/test_inbox.py
@@ -555,16 +555,34 @@ def test_poll_once_self_notify_does_not_fire_notification(state: inbox.InboxStat
def test_start_poller_thread_is_daemon(state: inbox.InboxState):
"""Daemon flag is required so the poller dies with the parent
process; a non-daemon poller would leak across `claude` restarts
- and write to a stale workspace."""
+ and write to a stale workspace.
+
+ Stop_event is plumbed so the thread cleans up at the end of the
+ test instead of leaking into later tests. Without cleanup, the
+ daemon's ~10ms tick races with later tests that patch httpx.Client
+ — the leaked thread sees their patched response and runs an
+ unwanted iteration of _poll_once that double-counts mocked calls
+ (caught when test_batch_fetcher_owns_client_when_not_supplied
+ surfaced this on Python 3.11 CI but not 3.13 local).
+ """
resp = _make_response(200, [])
p, _ = _patch_httpx(resp)
+ stop_event = threading.Event()
with p, patch("platform_auth.auth_headers", return_value={}):
# Use a very short interval so the loop body runs at least once
# before we exit the test.
- t = inbox.start_poller_thread(state, "http://platform", "ws-1", interval=0.01)
+ t = inbox.start_poller_thread(
+ state, "http://platform", "ws-1", interval=0.01, stop_event=stop_event
+ )
time.sleep(0.05)
- assert t.daemon is True
- assert t.is_alive()
+ assert t.daemon is True
+ assert t.is_alive()
+ # Signal shutdown + wait for the thread to actually exit before
+ # we leave the test scope. Without this join, the leaked thread
+ # races with later tests' httpx patches.
+ stop_event.set()
+ t.join(timeout=2.0)
+ assert not t.is_alive(), "poller thread did not exit on stop_event"
# ---------------------------------------------------------------------------
@@ -577,6 +595,219 @@ def test_default_cursor_path_uses_configs_dir(monkeypatch, tmp_path: Path):
assert inbox.default_cursor_path() == tmp_path / ".mcp_inbox_cursor"
+# ---------------------------------------------------------------------------
+# Phase 5b — BatchFetcher integration with the poll loop
+# ---------------------------------------------------------------------------
+#
+# These tests pin the cross-module contract between inbox._poll_once and
+# inbox_uploads.BatchFetcher: chat_upload_receive rows must be submitted
+# to a single BatchFetcher AND drained (URI cache populated) before any
+# subsequent message row is processed. Without the drain, the
+# rewrite_request_body path inside message_from_activity surfaces the
+# un-rewritten ``platform-pending:`` URI to the agent.
+
+
+def _upload_row(act_id: str, file_id: str) -> dict:
+ return {
+ "id": act_id,
+ "source_id": None,
+ "method": "chat_upload_receive",
+ "summary": f"chat_upload_receive: {file_id}.pdf",
+ "request_body": {
+ "file_id": file_id,
+ "name": f"{file_id}.pdf",
+ "uri": f"platform-pending:ws-1/{file_id}",
+ "mimeType": "application/pdf",
+ "size": 3,
+ },
+ "created_at": "2026-05-04T10:00:00Z",
+ }
+
+
+def _message_row_referencing(act_id: str, file_id: str) -> dict:
+ return {
+ "id": act_id,
+ "source_id": None,
+ "method": "message/send",
+ "summary": None,
+ "request_body": {
+ "params": {
+ "message": {
+ "parts": [
+ {"kind": "text", "text": "have a look"},
+ {
+ "kind": "file",
+ "file": {
+ "uri": f"platform-pending:ws-1/{file_id}",
+ "name": f"{file_id}.pdf",
+ },
+ },
+ ]
+ }
+ }
+ },
+ "created_at": "2026-05-04T10:00:01Z",
+ }
+
+
+def _patch_httpx_routing(activity_rows: list[dict], upload_bytes: bytes = b"PDF"):
+ """Replace ``httpx.Client`` so:
+
+ - GET /activity returns ``activity_rows``
+ - GET /workspaces/.../content returns ``upload_bytes`` with content-type
+ - POST /ack returns 200
+
+ Returns the patch context manager; tests use ``with p:``. Each new
+ Client(...) gets a fresh MagicMock so the test can verify
+ constructor-count expectations without pinning singletons.
+ """
+ def _client_factory(*args, **kwargs):
+ c = MagicMock()
+ c.__enter__ = MagicMock(return_value=c)
+ c.__exit__ = MagicMock(return_value=False)
+
+ def _get(url, params=None, headers=None):
+ if "/activity" in url:
+ resp = MagicMock()
+ resp.status_code = 200
+ resp.json.return_value = activity_rows
+ resp.text = ""
+ return resp
+ if "/pending-uploads/" in url and "/content" in url:
+ resp = MagicMock()
+ resp.status_code = 200
+ resp.content = upload_bytes
+ resp.headers = {"content-type": "application/pdf"}
+ resp.text = ""
+ return resp
+ resp = MagicMock()
+ resp.status_code = 404
+ resp.text = ""
+ return resp
+
+ def _post(url, headers=None):
+ resp = MagicMock()
+ resp.status_code = 200
+ resp.text = ""
+ return resp
+
+ c.get = MagicMock(side_effect=_get)
+ c.post = MagicMock(side_effect=_post)
+ c.close = MagicMock()
+ return c
+
+ return patch("httpx.Client", side_effect=_client_factory)
+
+
+def test_poll_once_drains_uploads_before_processing_message_row(state: inbox.InboxState, tmp_path):
+ """The chat-message row's file.uri MUST be rewritten to the local
+ workspace: URI by the time it lands in the InboxState queue. This
+ requires BatchFetcher.wait_all() to run before message_from_activity
+ on the second row.
+ """
+ import inbox_uploads
+ inbox_uploads.get_cache().clear()
+ # Sandbox the on-disk staging dir so the test can't pollute the
+ # workspace's real chat-uploads.
+ real_dir = inbox_uploads.CHAT_UPLOAD_DIR
+ inbox_uploads.CHAT_UPLOAD_DIR = str(tmp_path / "chat-uploads")
+ try:
+ rows = [
+ _upload_row("act-1", "file-A"),
+ _message_row_referencing("act-2", "file-A"),
+ ]
+ state.save_cursor("act-old")
+ with _patch_httpx_routing(rows, upload_bytes=b"PDF-bytes"):
+ n = inbox._poll_once(state, "http://platform", "ws-1", {})
+ finally:
+ inbox_uploads.CHAT_UPLOAD_DIR = real_dir
+ inbox_uploads.get_cache().clear()
+
+ assert n == 1, "exactly one message row should be enqueued (the upload row is a side-effect, not a message)"
+ queued = state.peek(10)
+ assert len(queued) == 1
+ # The contract this test exists to pin: the platform-pending: URI
+ # was rewritten to workspace: BEFORE the message landed in the
+ # state queue. message_from_activity mutates row['request_body']
+ # in-place, so the rewritten URI is observable on the row dict
+ # we passed in.
+ rewritten_part = rows[1]["request_body"]["params"]["message"]["parts"][1]
+ assert rewritten_part["file"]["uri"].startswith("workspace:"), (
+ f"upload barrier broken: file.uri = {rewritten_part['file']['uri']!r}; "
+ "rewrite_request_body ran before BatchFetcher.wait_all populated the cache"
+ )
+ # Cursor advanced past BOTH rows — upload-receive (act-1) is
+ # acknowledged via the inbox cursor regardless of fetch outcome.
+ assert state.load_cursor() == "act-2"
+
+
+def test_poll_once_with_only_upload_rows_drains_at_loop_end(state: inbox.InboxState, tmp_path):
+ """End-of-batch drain: a poll that contains ONLY upload rows (no
+ chat-message row to trigger the inline drain) must still drain the
+ BatchFetcher before _poll_once returns. Otherwise a future poll
+ that picks up the corresponding chat-message row would race with
+ in-flight fetches from the previous batch.
+ """
+ import inbox_uploads
+ inbox_uploads.get_cache().clear()
+ real_dir = inbox_uploads.CHAT_UPLOAD_DIR
+ inbox_uploads.CHAT_UPLOAD_DIR = str(tmp_path / "chat-uploads")
+ try:
+ rows = [_upload_row("act-1", "file-A"), _upload_row("act-2", "file-B")]
+ state.save_cursor("act-old")
+ with _patch_httpx_routing(rows, upload_bytes=b"PDF"):
+ n = inbox._poll_once(state, "http://platform", "ws-1", {})
+ # By the time _poll_once returned, the URI cache must be hot
+ # for both file_ids — proves the end-of-loop drain ran.
+ assert inbox_uploads.get_cache().get("platform-pending:ws-1/file-A") is not None
+ assert inbox_uploads.get_cache().get("platform-pending:ws-1/file-B") is not None
+ finally:
+ inbox_uploads.CHAT_UPLOAD_DIR = real_dir
+ inbox_uploads.get_cache().clear()
+ # Upload rows are NOT message rows; queue stays empty.
+ assert n == 0
+ # Cursor advances past both upload rows.
+ assert state.load_cursor() == "act-2"
+
+
+def test_poll_once_no_uploads_does_not_construct_batch_fetcher(state: inbox.InboxState):
+ """A batch with no upload-receive rows must not pay the BatchFetcher
+ construction cost — the executor + httpx client allocation is
+ deferred until the first upload row appears.
+ """
+ import inbox_uploads
+
+ constructed: list[Any] = []
+
+ def _patched_init(self, **kwargs):
+ constructed.append(kwargs)
+ # Don't actually run __init__; we never hit submit/wait_all.
+ self._closed = False
+ self._futures = []
+ self._executor = MagicMock()
+ self._client = MagicMock()
+ self._own_client = False
+
+ rows = [
+ {
+ "id": "act-1",
+ "source_id": None,
+ "method": "message/send",
+ "summary": None,
+ "request_body": {"parts": [{"type": "text", "text": "hi"}]},
+ "created_at": "2026-04-30T22:00:00Z",
+ },
+ ]
+ state.save_cursor("act-old")
+ resp = _make_response(200, rows)
+ p, _ = _patch_httpx(resp)
+ with patch.object(inbox_uploads.BatchFetcher, "__init__", _patched_init), p:
+ n = inbox._poll_once(state, "http://platform", "ws-1", {})
+
+ assert n == 1
+ assert constructed == [], "BatchFetcher must not be constructed when no upload rows are present"
+
+
def test_default_cursor_path_falls_back_to_default(tmp_path, monkeypatch):
"""When CONFIGS_DIR is unset, the cursor path resolves through
configs_dir.resolve() — /configs in-container, ~/.molecule-workspace
@@ -701,3 +932,165 @@ def test_set_notification_callback_none_clears(state: inbox.InboxState):
state.record(_msg("act-1"))
assert received == []
+
+
+# ---------------------------------------------------------------------------
+# Phase 2 — chat_upload_receive rows route to inbox_uploads.fetch_and_stage
+# ---------------------------------------------------------------------------
+
+
+def test_poll_once_skips_chat_upload_row_from_queue(state: inbox.InboxState, monkeypatch, tmp_path):
+ """A row with method='chat_upload_receive' must NOT enqueue as a
+ chat message — it's a side-effect telling the workspace to fetch
+ bytes. Pin the contract so a refactor that flattens the row loop
+ can't silently re-enqueue these as 'empty A2A message' rows."""
+ import inbox_uploads
+ monkeypatch.setattr(inbox_uploads, "CHAT_UPLOAD_DIR", str(tmp_path / "chat-uploads"))
+ inbox_uploads.get_cache().clear()
+
+ rows = [
+ {
+ "id": "act-1",
+ "source_id": None,
+ "method": "chat_upload_receive",
+ "summary": "chat_upload_receive: foo.pdf",
+ "request_body": {
+ "file_id": "abc123",
+ "name": "foo.pdf",
+ "mimeType": "application/pdf",
+ "size": 4,
+ "uri": "platform-pending:ws-1/abc123",
+ },
+ "created_at": "2026-05-04T10:00:00Z",
+ },
+ ]
+ resp = _make_response(200, rows)
+ p, _ = _patch_httpx(resp)
+ fetch_called = []
+
+ def fake_fetch(row, **kwargs):
+ fetch_called.append((row.get("id"), kwargs["workspace_id"]))
+ return "workspace:/local/foo.pdf"
+
+ with p, patch.object(inbox_uploads, "fetch_and_stage", fake_fetch):
+ n = inbox._poll_once(state, "http://platform", "ws-1", {})
+
+ # Not enqueued + cursor advanced.
+ assert n == 0
+ assert state.peek(10) == []
+ assert state.load_cursor() == "act-1"
+ # fetch_and_stage was invoked with the row and workspace_id.
+ assert fetch_called == [("act-1", "ws-1")]
+
+
+def test_poll_once_chat_upload_row_then_chat_message_rewrites_uri(state: inbox.InboxState, monkeypatch, tmp_path):
+ """The classic ordering: upload-receive row first (lower id), chat
+ message referencing platform-pending: URI second. The chat message
+ that lands in the inbox must have its URI rewritten to the local
+ workspace: URI before the agent sees it.
+ """
+ import inbox_uploads
+ monkeypatch.setattr(inbox_uploads, "CHAT_UPLOAD_DIR", str(tmp_path / "chat-uploads"))
+ cache = inbox_uploads.get_cache()
+ cache.clear()
+
+ # Pretend the fetch already populated the cache. (The real flow
+ # populates it inside fetch_and_stage; we patch that to keep the
+ # test focused on the rewrite contract.)
+ cache.set("platform-pending:ws-1/abc123", "workspace:/workspace/.molecule/chat-uploads/xx-foo.pdf")
+
+ rows = [
+ {
+ "id": "act-1",
+ "source_id": None,
+ "method": "chat_upload_receive",
+ "summary": "chat_upload_receive: foo.pdf",
+ "request_body": {
+ "file_id": "abc123",
+ "name": "foo.pdf",
+ "mimeType": "application/pdf",
+ "size": 4,
+ "uri": "platform-pending:ws-1/abc123",
+ },
+ "created_at": "2026-05-04T10:00:00Z",
+ },
+ {
+ "id": "act-2",
+ "source_id": None,
+ "method": "message/send",
+ "summary": None,
+ "request_body": {
+ "params": {
+ "message": {
+ "parts": [
+ {"kind": "text", "text": "look at this"},
+ {
+ "kind": "file",
+ "file": {
+ "uri": "platform-pending:ws-1/abc123",
+ "name": "foo.pdf",
+ },
+ },
+ ]
+ }
+ }
+ },
+ "created_at": "2026-05-04T10:00:01Z",
+ },
+ ]
+ resp = _make_response(200, rows)
+ p, _ = _patch_httpx(resp)
+
+ def fake_fetch(row, **kwargs):
+ return "workspace:/workspace/.molecule/chat-uploads/xx-foo.pdf"
+
+ with p, patch.object(inbox_uploads, "fetch_and_stage", fake_fetch):
+ n = inbox._poll_once(state, "http://platform", "ws-1", {})
+
+ # Only the chat message is enqueued.
+ assert n == 1
+ queue = state.peek(10)
+ assert len(queue) == 1
+ msg = queue[0]
+ assert msg.activity_id == "act-2"
+ # The URI in the row's request_body was mutated by message_from_activity
+ # → rewrite_request_body. Re-extracting reveals the rewritten value.
+ rewritten = rows[1]["request_body"]["params"]["message"]["parts"][1]["file"]["uri"]
+ assert rewritten == "workspace:/workspace/.molecule/chat-uploads/xx-foo.pdf"
+
+
+def test_poll_once_chat_upload_row_advances_cursor_even_on_fetch_failure(
+ state: inbox.InboxState, monkeypatch, tmp_path
+):
+ """A permanent network failure on /content must NOT stall the cursor
+ — otherwise one bad upload blocks all real chat traffic for the
+ workspace. fetch_and_stage returns None on failure, but the row is
+ still considered handled from the cursor's perspective."""
+ import inbox_uploads
+ monkeypatch.setattr(inbox_uploads, "CHAT_UPLOAD_DIR", str(tmp_path / "chat-uploads"))
+
+ rows = [
+ {
+ "id": "act-broken",
+ "source_id": None,
+ "method": "chat_upload_receive",
+ "summary": "chat_upload_receive: doomed.pdf",
+ "request_body": {
+ "file_id": "doom",
+ "name": "doomed.pdf",
+ "uri": "platform-pending:ws-1/doom",
+ },
+ "created_at": "2026-05-04T10:00:00Z",
+ },
+ ]
+ resp = _make_response(200, rows)
+ p, _ = _patch_httpx(resp)
+
+ def fake_fetch(row, **kwargs):
+ return None # network failure
+
+ with p, patch.object(inbox_uploads, "fetch_and_stage", fake_fetch):
+ inbox._poll_once(state, "http://platform", "ws-1", {})
+
+ assert state.peek(10) == []
+ assert state.load_cursor() == "act-broken"
diff --git a/workspace/tests/test_inbox_uploads.py b/workspace/tests/test_inbox_uploads.py
new file mode 100644
index 00000000..37446760
--- /dev/null
+++ b/workspace/tests/test_inbox_uploads.py
@@ -0,0 +1,1120 @@
+"""Tests for workspace/inbox_uploads.py — poll-mode chat-upload fetcher.
+
+Covers the full activity-row → fetch → stage-on-disk → ack flow plus
+the URI cache and the rewrite that swaps platform-pending: URIs to
+local workspace: URIs in subsequent chat messages.
+"""
+from __future__ import annotations
+
+import os
+from typing import Any
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+import inbox_uploads
+
+
+@pytest.fixture(autouse=True)
+def _reset_cache_and_dir(tmp_path, monkeypatch):
+ """Each test starts with an empty URI cache and a temp upload dir
+ so on-disk artifacts from one test don't leak into the next."""
+ inbox_uploads.get_cache().clear()
+ monkeypatch.setattr(inbox_uploads, "CHAT_UPLOAD_DIR", str(tmp_path / "chat-uploads"))
+ yield
+ inbox_uploads.get_cache().clear()
+
+
+# ---------------------------------------------------------------------------
+# sanitize_filename — parity with internal_chat_uploads + Go SanitizeFilename
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize(
+ "raw,want",
+ [
+ ("../../etc/passwd", "passwd"),
+ ("/etc/passwd", "passwd"),
+ ("hello world.pdf", "hello_world.pdf"),
+ ("weird;chars!?.txt", "weird_chars__.txt"),
+ ("中文.docx", "__.docx"),
+ ("file (1).pdf", "file__1_.pdf"),
+ ("report-2026.05.04_v2.pdf", "report-2026.05.04_v2.pdf"),
+ ("", "file"),
+ (".", "file"),
+ ("..", "file"),
+ ],
+)
+def test_sanitize_filename_parity_with_python_internal(raw, want):
+ assert inbox_uploads.sanitize_filename(raw) == want
+
+
+def test_sanitize_filename_caps_at_100_preserves_short_extension():
+ long = "a" * 200 + ".pdf"
+ got = inbox_uploads.sanitize_filename(long)
+ assert len(got) == 100
+ assert got.endswith(".pdf")
+
+
+def test_sanitize_filename_drops_long_extension():
+ long = "c" * 90 + ".thisisaverylongextensionnotpreserved"
+ got = inbox_uploads.sanitize_filename(long)
+ assert len(got) == 100
+ assert ".thisisaverylongextensionnotpreserved" not in got
+
+
+# ---------------------------------------------------------------------------
+# _URICache — LRU semantics
+# ---------------------------------------------------------------------------
+
+
+def test_uricache_set_get_roundtrip():
+ c = inbox_uploads._URICache(max_entries=10)
+ c.set("platform-pending:ws/1", "workspace:/local/1")
+ assert c.get("platform-pending:ws/1") == "workspace:/local/1"
+
+
+def test_uricache_get_missing_returns_none():
+ c = inbox_uploads._URICache(max_entries=10)
+ assert c.get("platform-pending:ws/missing") is None
+
+
+def test_uricache_evicts_oldest_at_capacity():
+ c = inbox_uploads._URICache(max_entries=2)
+ c.set("a", "A")
+ c.set("b", "B")
+ c.set("c", "C") # evicts "a"
+ assert c.get("a") is None
+ assert c.get("b") == "B"
+ assert c.get("c") == "C"
+ assert len(c) == 2
+
+
+def test_uricache_get_promotes_recently_used():
+ c = inbox_uploads._URICache(max_entries=2)
+ c.set("a", "A")
+ c.set("b", "B")
+ # Promote "a" by reading; next set should evict "b" instead of "a".
+ assert c.get("a") == "A"
+ c.set("c", "C")
+ assert c.get("a") == "A"
+ assert c.get("b") is None
+ assert c.get("c") == "C"
+
+
+def test_uricache_overwrite_updates_value():
+ c = inbox_uploads._URICache(max_entries=10)
+ c.set("k", "v1")
+ c.set("k", "v2")
+ assert c.get("k") == "v2"
+ assert len(c) == 1
+
+
+def test_uricache_clear():
+ c = inbox_uploads._URICache(max_entries=10)
+ c.set("a", "A")
+ c.set("b", "B")
+ c.clear()
+ assert c.get("a") is None
+ assert len(c) == 0
+
+
+def test_resolve_pending_uri_uses_module_cache():
+ inbox_uploads.get_cache().set("platform-pending:ws/x", "workspace:/local/x")
+ assert inbox_uploads.resolve_pending_uri("platform-pending:ws/x") == "workspace:/local/x"
+ assert inbox_uploads.resolve_pending_uri("platform-pending:ws/missing") is None
+
+
+# ---------------------------------------------------------------------------
+# stage_to_disk
+# ---------------------------------------------------------------------------
+
+
+def test_stage_to_disk_writes_file_and_returns_workspace_uri(tmp_path):
+ uri = inbox_uploads.stage_to_disk(b"hello", "report.pdf")
+ assert uri.startswith("workspace:")
+ path = uri[len("workspace:"):]
+ assert os.path.isfile(path)
+ with open(path, "rb") as f:
+ assert f.read() == b"hello"
+ assert path.endswith("-report.pdf")
+ # Prefix is 32 hex chars + "-" + name.
+ name = os.path.basename(path)
+ prefix, _, _ = name.partition("-")
+ assert len(prefix) == 32
+
+
+def test_stage_to_disk_sanitizes_filename():
+ uri = inbox_uploads.stage_to_disk(b"x", "../../evil.txt")
+ name = os.path.basename(uri)
+ assert "/" not in name
+ assert name.endswith("-evil.txt")
+
+
+def test_stage_to_disk_rejects_oversize():
+ with pytest.raises(ValueError):
+ inbox_uploads.stage_to_disk(b"x" * (inbox_uploads.MAX_FILE_BYTES + 1), "big.bin")
+
+
+def test_stage_to_disk_creates_directory_if_missing():
+ # CHAT_UPLOAD_DIR is monkeypatched to a non-existent tmp path; the
+ # call must mkdir -p it on first write.
+ assert not os.path.exists(inbox_uploads.CHAT_UPLOAD_DIR)
+ inbox_uploads.stage_to_disk(b"x", "a.txt")
+ assert os.path.isdir(inbox_uploads.CHAT_UPLOAD_DIR)
+
+
+def test_stage_to_disk_write_failure_cleans_partial_file(tmp_path, monkeypatch):
+ # open() succeeds but write() fails — the partial file must be
+ # removed so a retry can claim a fresh prefix without colliding.
+ real_fdopen = os.fdopen
+ written_paths: list[str] = []
+
+ def boom_fdopen(fd, mode):
+ # Wrap the real file with one whose write() raises.
+ f = real_fdopen(fd, mode)
+ # Track which path's fd we opened by inspecting the chat-upload dir.
+ for entry in os.listdir(inbox_uploads.CHAT_UPLOAD_DIR):
+ written_paths.append(os.path.join(inbox_uploads.CHAT_UPLOAD_DIR, entry))
+ original_write = f.write
+
+ def bad_write(b):
+ original_write(b"") # ensure file exists
+ raise OSError(28, "no space")
+ f.write = bad_write
+ return f
+
+ monkeypatch.setattr(os, "fdopen", boom_fdopen)
+ with pytest.raises(OSError):
+ inbox_uploads.stage_to_disk(b"data", "x.txt")
+ # All staged files cleaned up.
+ for p in written_paths:
+ assert not os.path.exists(p)
+
+
+def test_stage_to_disk_write_failure_unlink_failure_swallowed(monkeypatch):
+ # open() succeeds, write() fails, unlink() ALSO fails — the unlink
+ # error is swallowed and the original write error propagates.
+ real_fdopen = os.fdopen
+
+ def boom_fdopen(fd, mode):
+ f = real_fdopen(fd, mode)
+
+ def bad_write(_):
+ raise OSError(28, "no space")
+ f.write = bad_write
+ return f
+
+ def bad_unlink(_):
+ raise OSError(13, "permission denied")
+
+ monkeypatch.setattr(os, "fdopen", boom_fdopen)
+ monkeypatch.setattr(os, "unlink", bad_unlink)
+ with pytest.raises(OSError) as ei:
+ inbox_uploads.stage_to_disk(b"data", "x.txt")
+ # Original write error, not the unlink error.
+ assert ei.value.errno == 28
+
+
+def test_stage_to_disk_propagates_oserror_and_cleans_partial(tmp_path, monkeypatch):
+ # Make the dir read-only AFTER mkdir succeeds, so open() fails. Skip
+ # this on platforms where the dir's permissions don't restrict the
+ # process owner (root in Docker, etc.).
+ inbox_uploads.stage_to_disk(b"first", "a.txt")
+ if os.geteuid() == 0:
+ pytest.skip("root bypasses permission bits")
+ os.chmod(inbox_uploads.CHAT_UPLOAD_DIR, 0o500)
+ try:
+ with pytest.raises(OSError):
+ inbox_uploads.stage_to_disk(b"second", "b.txt")
+ finally:
+ os.chmod(inbox_uploads.CHAT_UPLOAD_DIR, 0o755)
+
+
+# ---------------------------------------------------------------------------
+# is_chat_upload_row + _request_body_dict
+# ---------------------------------------------------------------------------
+
+
+def test_is_chat_upload_row_true_on_method_match():
+ assert inbox_uploads.is_chat_upload_row({"method": "chat_upload_receive"})
+
+
+def test_is_chat_upload_row_false_on_other_methods():
+ assert not inbox_uploads.is_chat_upload_row({"method": "message/send"})
+ assert not inbox_uploads.is_chat_upload_row({"method": None})
+ assert not inbox_uploads.is_chat_upload_row({})
+
+
+def test_request_body_dict_passthrough():
+ body = {"file_id": "x"}
+ assert inbox_uploads._request_body_dict({"request_body": body}) is body
+
+
+def test_request_body_dict_string_decoded():
+ assert inbox_uploads._request_body_dict({"request_body": '{"a": 1}'}) == {"a": 1}
+
+
+def test_request_body_dict_invalid_string_returns_none():
+ assert inbox_uploads._request_body_dict({"request_body": "not json"}) is None
+
+
+def test_request_body_dict_non_dict_after_decode_returns_none():
+ assert inbox_uploads._request_body_dict({"request_body": "[1, 2]"}) is None
+
+
+def test_request_body_dict_other_type_returns_none():
+ assert inbox_uploads._request_body_dict({"request_body": 123}) is None
+
+
+# ---------------------------------------------------------------------------
+# fetch_and_stage — the full GET / write / ack flow
+# ---------------------------------------------------------------------------
+
+
+def _make_resp(status_code: int, content: bytes = b"", content_type: str = "", text: str = "") -> MagicMock:
+ resp = MagicMock()
+ resp.status_code = status_code
+ resp.content = content
+ headers: dict[str, str] = {}
+ if content_type:
+ headers["content-type"] = content_type
+ resp.headers = headers
+ resp.text = text
+ return resp
+
+
+def _patch_httpx_for_fetch(get_resp: MagicMock, ack_resp: MagicMock | None = None):
+ """Patch httpx.Client so each new context-manager returns a client
+ whose .get() returns get_resp and .post() returns ack_resp.
+ """
+ client = MagicMock()
+ client.__enter__ = MagicMock(return_value=client)
+ client.__exit__ = MagicMock(return_value=False)
+ client.get = MagicMock(return_value=get_resp)
+ client.post = MagicMock(return_value=ack_resp or _make_resp(200))
+ return patch("httpx.Client", return_value=client), client
+
+
+def _row(file_id: str = "file-1", uri: str | None = None, name: str = "report.pdf", body_extra: dict | None = None) -> dict:
+ body: dict[str, Any] = {
+ "file_id": file_id,
+ "name": name,
+ "mimeType": "application/pdf",
+ "size": 9,
+ }
+ if uri is not None:
+ body["uri"] = uri
+ if body_extra:
+ body.update(body_extra)
+ return {
+ "id": "act-100",
+ "source_id": None,
+ "method": "chat_upload_receive",
+ "summary": "chat_upload_receive: report.pdf",
+ "request_body": body,
+ "created_at": "2026-05-04T10:00:00Z",
+ }
+
+
+def test_fetch_and_stage_happy_path_writes_file_acks_and_caches():
+ pending_uri = "platform-pending:ws-1/file-1"
+ row = _row(uri=pending_uri)
+ get_resp = _make_resp(200, content=b"PDF-bytes", content_type="application/pdf")
+ p, client = _patch_httpx_for_fetch(get_resp)
+ with p:
+ local_uri = inbox_uploads.fetch_and_stage(
+ row, platform_url="http://plat", workspace_id="ws-1", headers={"Authorization": "Bearer t"}
+ )
+ assert local_uri is not None
+ assert local_uri.startswith("workspace:")
+ # On-disk file content matches.
+ path = local_uri[len("workspace:"):]
+ with open(path, "rb") as f:
+ assert f.read() == b"PDF-bytes"
+ # Cache populated.
+ assert inbox_uploads.get_cache().get(pending_uri) == local_uri
+ # Ack POSTed to the right URL.
+ client.post.assert_called_once()
+ args, kwargs = client.post.call_args
+ assert "/pending-uploads/file-1/ack" in args[0]
+ assert kwargs["headers"]["Authorization"] == "Bearer t"
+
+
+def test_fetch_and_stage_reconstructs_uri_when_missing_in_body():
+ row = _row(uri=None) # request_body has no 'uri'
+ get_resp = _make_resp(200, content=b"x", content_type="text/plain")
+ p, _ = _patch_httpx_for_fetch(get_resp)
+ with p:
+ inbox_uploads.fetch_and_stage(
+ row, platform_url="http://plat", workspace_id="ws-1", headers={}
+ )
+ # Cache key reconstructed from workspace_id + file_id.
+ assert inbox_uploads.get_cache().get("platform-pending:ws-1/file-1") is not None
+
+
+def test_fetch_and_stage_returns_none_on_missing_request_body():
+ row = {"id": "act-100", "method": "chat_upload_receive"}
+ # No httpx call should happen, but we patch defensively.
+ p, client = _patch_httpx_for_fetch(_make_resp(200))
+ with p:
+ result = inbox_uploads.fetch_and_stage(
+ row, platform_url="http://plat", workspace_id="ws-1", headers={}
+ )
+ assert result is None
+ client.get.assert_not_called()
+
+
+def test_fetch_and_stage_returns_none_on_missing_file_id():
+ row = {"id": "act-100", "method": "chat_upload_receive", "request_body": {"name": "x.pdf"}}
+ p, client = _patch_httpx_for_fetch(_make_resp(200))
+ with p:
+ result = inbox_uploads.fetch_and_stage(
+ row, platform_url="http://plat", workspace_id="ws-1", headers={}
+ )
+ assert result is None
+ client.get.assert_not_called()
+
+
+def test_fetch_and_stage_handles_nonstring_file_id():
+ row = {"id": "act-100", "method": "chat_upload_receive", "request_body": {"file_id": 123}}
+ p, client = _patch_httpx_for_fetch(_make_resp(200))
+ with p:
+ result = inbox_uploads.fetch_and_stage(
+ row, platform_url="http://plat", workspace_id="ws-1", headers={}
+ )
+ assert result is None
+ client.get.assert_not_called()
+
+
+def test_fetch_and_stage_404_returns_none_no_ack():
+ row = _row()
+ get_resp = _make_resp(404, text="gone")
+ ack_resp = _make_resp(200)
+ p, client = _patch_httpx_for_fetch(get_resp, ack_resp)
+ with p:
+ result = inbox_uploads.fetch_and_stage(
+ row, platform_url="http://plat", workspace_id="ws-1", headers={}
+ )
+ assert result is None
+ # No ack — the row is already gone.
+ client.post.assert_not_called()
+
+
+def test_fetch_and_stage_500_returns_none_no_ack():
+ row = _row()
+ p, client = _patch_httpx_for_fetch(_make_resp(500, text="boom"))
+ with p:
+ result = inbox_uploads.fetch_and_stage(
+ row, platform_url="http://plat", workspace_id="ws-1", headers={}
+ )
+ assert result is None
+ client.post.assert_not_called()
+
+
+def test_fetch_and_stage_network_error_returns_none():
+ row = _row()
+ client = MagicMock()
+ client.__enter__ = MagicMock(return_value=client)
+ client.__exit__ = MagicMock(return_value=False)
+ client.get = MagicMock(side_effect=RuntimeError("connection refused"))
+ with patch("httpx.Client", return_value=client):
+ result = inbox_uploads.fetch_and_stage(
+ row, platform_url="http://plat", workspace_id="ws-1", headers={}
+ )
+ assert result is None
+
+
+def test_fetch_and_stage_oversize_response_refused():
+ row = _row()
+ big = b"x" * (inbox_uploads.MAX_FILE_BYTES + 1)
+ p, client = _patch_httpx_for_fetch(_make_resp(200, content=big, content_type="application/octet-stream"))
+ with p:
+ result = inbox_uploads.fetch_and_stage(
+ row, platform_url="http://plat", workspace_id="ws-1", headers={}
+ )
+ assert result is None
+ client.post.assert_not_called()
+
+
+def test_fetch_and_stage_ack_failure_does_not_invalidate_local_uri():
+ row = _row(uri="platform-pending:ws-1/file-1")
+ get_resp = _make_resp(200, content=b"data", content_type="text/plain")
+ ack_resp = _make_resp(500, text="ack failed")
+ p, _ = _patch_httpx_for_fetch(get_resp, ack_resp)
+ with p:
+ local_uri = inbox_uploads.fetch_and_stage(
+ row, platform_url="http://plat", workspace_id="ws-1", headers={}
+ )
+ # On-disk staging succeeded; ack failure is logged but doesn't
+ # roll back the cache.
+ assert local_uri is not None
+ assert inbox_uploads.get_cache().get("platform-pending:ws-1/file-1") == local_uri
+
+
+def test_fetch_and_stage_ack_network_error_swallowed():
+ row = _row(uri="platform-pending:ws-1/file-1")
+ client = MagicMock()
+ client.__enter__ = MagicMock(return_value=client)
+ client.__exit__ = MagicMock(return_value=False)
+ client.get = MagicMock(return_value=_make_resp(200, content=b"data", content_type="text/plain"))
+ client.post = MagicMock(side_effect=RuntimeError("ack network error"))
+ with patch("httpx.Client", return_value=client):
+ result = inbox_uploads.fetch_and_stage(
+ row, platform_url="http://plat", workspace_id="ws-1", headers={}
+ )
+ assert result is not None # GET succeeded → URI returned even if ack blew up
+
+
+def test_fetch_and_stage_uses_response_content_type_when_present():
+ row = _row(name="thing.bin", body_extra={"mimeType": "application/x-bogus"})
+ # Response says image/png; should win over body's mimeType.
+ get_resp = _make_resp(200, content=b"PNG", content_type="image/png; charset=binary")
+ p, _ = _patch_httpx_for_fetch(get_resp)
+ with p:
+ # We don't assert on returned mime (not part of the contract);
+ # the test just verifies the happy path runs without trying to
+ # parse the trailing parameter.
+ result = inbox_uploads.fetch_and_stage(
+ row, platform_url="http://plat", workspace_id="ws-1", headers={}
+ )
+ assert result is not None
+
+
+def test_fetch_and_stage_nonstring_filename_falls_back_to_file():
+ # body['name'] is a non-string (e.g. truncated to None or a number);
+ # filename must default to "file" so sanitize_filename has something
+ # to work with.
+ row = _row(body_extra={"name": 12345})
+ p, _ = _patch_httpx_for_fetch(_make_resp(200, content=b"x", content_type="text/plain"))
+ with p:
+ local_uri = inbox_uploads.fetch_and_stage(
+ row, platform_url="http://plat", workspace_id="ws-1", headers={}
+ )
+ assert local_uri is not None
+ assert local_uri.endswith("-file")
+
+
+def test_fetch_and_stage_default_filename_when_missing():
+ row = {
+ "id": "act",
+ "method": "chat_upload_receive",
+ "request_body": {"file_id": "file-1"},
+ }
+ p, _ = _patch_httpx_for_fetch(_make_resp(200, content=b"data", content_type="text/plain"))
+ with p:
+ local_uri = inbox_uploads.fetch_and_stage(
+ row, platform_url="http://plat", workspace_id="ws-1", headers={}
+ )
+ assert local_uri is not None
+ assert local_uri.endswith("-file") # default filename
+
+
+def test_fetch_and_stage_disk_write_failure_returns_none(monkeypatch):
+ row = _row()
+ p, client = _patch_httpx_for_fetch(_make_resp(200, content=b"x", content_type="text/plain"))
+
+ def bad_stage(*args, **kwargs):
+ raise OSError(28, "no space left")
+ monkeypatch.setattr(inbox_uploads, "stage_to_disk", bad_stage)
+
+ with p:
+ result = inbox_uploads.fetch_and_stage(
+ row, platform_url="http://plat", workspace_id="ws-1", headers={}
+ )
+ assert result is None
+ client.post.assert_not_called()
+
+
+def test_fetch_and_stage_disk_value_error_returns_none(monkeypatch):
+ row = _row()
+ p, client = _patch_httpx_for_fetch(_make_resp(200, content=b"x", content_type="text/plain"))
+
+ def bad_stage(*args, **kwargs):
+ raise ValueError("oversize after sanity check")
+ monkeypatch.setattr(inbox_uploads, "stage_to_disk", bad_stage)
+
+ with p:
+ result = inbox_uploads.fetch_and_stage(
+ row, platform_url="http://plat", workspace_id="ws-1", headers={}
+ )
+ assert result is None
+ client.post.assert_not_called()
+
+
+def test_fetch_and_stage_httpx_missing_returns_none(monkeypatch):
+ row = _row()
+ # Simulate httpx not installed by making the import fail.
+ import sys
+ real_httpx = sys.modules.pop("httpx", None)
+ monkeypatch.setitem(sys.modules, "httpx", None)
+ try:
+ result = inbox_uploads.fetch_and_stage(
+ row, platform_url="http://plat", workspace_id="ws-1", headers={}
+ )
+ finally:
+ if real_httpx is not None:
+ sys.modules["httpx"] = real_httpx
+ else:
+ sys.modules.pop("httpx", None)
+ assert result is None
+
+
+def test_fetch_and_stage_falls_back_to_extension_mime(monkeypatch):
+ row = _row(name="snap.png", body_extra={"mimeType": ""}) # no mimeType in body
+ # Response also has no content-type so it falls through to mimetypes.guess_type.
+ get_resp = _make_resp(200, content=b"PNG", content_type="")
+ p, _ = _patch_httpx_for_fetch(get_resp)
+ with p:
+ result = inbox_uploads.fetch_and_stage(
+ row, platform_url="http://plat", workspace_id="ws-1", headers={}
+ )
+ assert result is not None
+
+
+# ---------------------------------------------------------------------------
+# rewrite_request_body — URI swap in chat-message bodies
+# ---------------------------------------------------------------------------
+
+
+def test_rewrite_request_body_swaps_pending_uri_in_message_parts():
+ inbox_uploads.get_cache().set("platform-pending:ws/1", "workspace:/local/1")
+ body = {
+ "method": "message/send",
+ "params": {
+ "message": {
+ "parts": [
+ {"kind": "text", "text": "see this"},
+ {"kind": "file", "file": {"uri": "platform-pending:ws/1", "name": "a.pdf"}},
+ ]
+ }
+ },
+ }
+ inbox_uploads.rewrite_request_body(body)
+ assert body["params"]["message"]["parts"][1]["file"]["uri"] == "workspace:/local/1"
+
+
+def test_rewrite_request_body_swaps_in_params_parts():
+ inbox_uploads.get_cache().set("platform-pending:ws/2", "workspace:/local/2")
+ body = {
+ "params": {
+ "parts": [
+ {"kind": "file", "file": {"uri": "platform-pending:ws/2"}},
+ ]
+ }
+ }
+ inbox_uploads.rewrite_request_body(body)
+ assert body["params"]["parts"][0]["file"]["uri"] == "workspace:/local/2"
+
+
+def test_rewrite_request_body_swaps_in_top_level_parts():
+ inbox_uploads.get_cache().set("platform-pending:ws/3", "workspace:/local/3")
+ body = {
+ "parts": [{"kind": "file", "file": {"uri": "platform-pending:ws/3"}}]
+ }
+ inbox_uploads.rewrite_request_body(body)
+ assert body["parts"][0]["file"]["uri"] == "workspace:/local/3"
+
+
+def test_rewrite_request_body_leaves_unmatched_uri_unchanged():
+ # No cache entry → URI stays as-is. Agent surfaces the unresolvable
+ # URI rather than the inbox silently dropping the part.
+ body = {
+ "parts": [{"kind": "file", "file": {"uri": "platform-pending:ws/missing"}}]
+ }
+ inbox_uploads.rewrite_request_body(body)
+ assert body["parts"][0]["file"]["uri"] == "platform-pending:ws/missing"
+
+
+def test_rewrite_request_body_leaves_non_pending_uri_unchanged():
+ inbox_uploads.get_cache().set("platform-pending:ws/3", "workspace:/local/3")
+ body = {
+ "parts": [
+ {"kind": "file", "file": {"uri": "workspace:/already-local.pdf"}},
+ {"kind": "file", "file": {"uri": "https://example.com/x.pdf"}},
+ ]
+ }
+ inbox_uploads.rewrite_request_body(body)
+ assert body["parts"][0]["file"]["uri"] == "workspace:/already-local.pdf"
+ assert body["parts"][1]["file"]["uri"] == "https://example.com/x.pdf"
+
+
+def test_rewrite_request_body_skips_non_dict_parts():
+ body = {"parts": ["not a dict", 42, None]}
+ inbox_uploads.rewrite_request_body(body) # must not raise
+ assert body["parts"] == ["not a dict", 42, None]
+
+
+def test_rewrite_request_body_skips_text_parts():
+ body = {
+ "parts": [{"kind": "text", "text": "platform-pending:ws/should-not-rewrite"}]
+ }
+ inbox_uploads.rewrite_request_body(body)
+ # Text content not touched — only file.uri fields are URIs.
+ assert body["parts"][0]["text"] == "platform-pending:ws/should-not-rewrite"
+
+
+def test_rewrite_request_body_skips_part_without_file_dict():
+ body = {"parts": [{"kind": "file"}]} # no file key
+ inbox_uploads.rewrite_request_body(body)
+ assert body["parts"] == [{"kind": "file"}]
+
+
+def test_rewrite_request_body_skips_file_without_uri():
+ body = {"parts": [{"kind": "file", "file": {"name": "x.pdf"}}]}
+ inbox_uploads.rewrite_request_body(body)
+ assert body["parts"][0]["file"] == {"name": "x.pdf"}
+
+
+def test_rewrite_request_body_skips_nonstring_uri():
+ body = {"parts": [{"kind": "file", "file": {"uri": None}}]}
+ inbox_uploads.rewrite_request_body(body) # must not raise
+
+
+def test_rewrite_request_body_handles_non_dict_body():
+ inbox_uploads.rewrite_request_body(None) # no-op
+ inbox_uploads.rewrite_request_body("string body") # no-op
+ inbox_uploads.rewrite_request_body([1, 2, 3]) # no-op
+
+
+def test_rewrite_request_body_handles_non_dict_params():
+ body = {"params": "not a dict", "parts": []}
+ inbox_uploads.rewrite_request_body(body) # must not raise
+
+
+def test_rewrite_request_body_handles_non_dict_message():
+ body = {"params": {"message": "not a dict"}}
+ inbox_uploads.rewrite_request_body(body) # must not raise
+
+
+def test_rewrite_request_body_handles_non_list_parts():
+ body = {"parts": "not a list"}
+ inbox_uploads.rewrite_request_body(body) # must not raise
+
+
+def test_rewrite_request_body_handles_non_dict_file():
+ body = {"parts": [{"kind": "file", "file": "not a dict"}]}
+ inbox_uploads.rewrite_request_body(body) # must not raise
+
+
+# ---------------------------------------------------------------------------
+# fetch_and_stage with shared client — Phase 5b client-reuse contract
+# ---------------------------------------------------------------------------
+#
+# When a caller passes ``client=`` to fetch_and_stage, that client must be
+# used for BOTH the GET /content and the POST /ack — no fresh
+# ``httpx.Client(...)`` constructions should happen. The pre-Phase-5b
+# implementation made one new client for GET and another for ack; the new
+# shape lets BatchFetcher share one connection pool across an entire batch.
+
+
+def test_fetch_and_stage_with_supplied_client_does_not_construct_new_client(monkeypatch):
+ row = _row(uri="platform-pending:ws-1/file-1")
+ get_resp = _make_resp(200, content=b"PDF", content_type="application/pdf")
+ ack_resp = _make_resp(200)
+ supplied = MagicMock()
+ supplied.get = MagicMock(return_value=get_resp)
+ supplied.post = MagicMock(return_value=ack_resp)
+ # Sentinel: any code path that constructs httpx.Client when one was
+ # already supplied is a regression — count constructions.
+ constructed: list[Any] = []
+
+ class _ShouldNotBeCalled:
+ def __init__(self, *a, **kw):
+ constructed.append((a, kw))
+
+ monkeypatch.setattr("httpx.Client", _ShouldNotBeCalled)
+
+ local_uri = inbox_uploads.fetch_and_stage(
+ row,
+ platform_url="http://plat",
+ workspace_id="ws-1",
+ headers={"Authorization": "Bearer t"},
+ client=supplied,
+ )
+ assert local_uri is not None
+ assert constructed == [], "supplied client must be reused; no new Client should be constructed"
+ # GET + POST ack both went through the supplied client.
+ supplied.get.assert_called_once()
+ supplied.post.assert_called_once()
+ # Caller-owned client must NOT be closed by fetch_and_stage; the
+ # batch fetcher (or test) closes it once the whole batch is done.
+ supplied.close.assert_not_called()
+
+
+def test_fetch_and_stage_without_supplied_client_constructs_and_closes_one(monkeypatch):
+ row = _row(uri="platform-pending:ws-1/file-1")
+ get_resp = _make_resp(200, content=b"PDF", content_type="application/pdf")
+ ack_resp = _make_resp(200)
+ built: list[MagicMock] = []
+
+ def _factory(*args, **kwargs):
+ c = MagicMock()
+ c.get = MagicMock(return_value=get_resp)
+ c.post = MagicMock(return_value=ack_resp)
+ built.append(c)
+ return c
+
+ monkeypatch.setattr("httpx.Client", _factory)
+
+ local_uri = inbox_uploads.fetch_and_stage(
+ row, platform_url="http://plat", workspace_id="ws-1", headers={}
+ )
+ assert local_uri is not None
+ # Pre-Phase-5b built TWO clients (one for GET, one for ack); now exactly one.
+ assert len(built) == 1, f"expected 1 httpx.Client construction, got {len(built)}"
+ # Same client must serve BOTH calls.
+ built[0].get.assert_called_once()
+ built[0].post.assert_called_once()
+ # Owned client must be closed by fetch_and_stage on the way out.
+ built[0].close.assert_called_once()
+
+
+def test_fetch_and_stage_with_supplied_client_does_not_close_caller_client():
+ # Even on failure the supplied client must not be closed — the
+ # BatchFetcher owns the lifecycle for the whole batch.
+ row = _row(uri="platform-pending:ws-1/file-1")
+ supplied = MagicMock()
+ supplied.get = MagicMock(side_effect=RuntimeError("network down"))
+ supplied.post = MagicMock() # should not be reached on GET failure
+ inbox_uploads.fetch_and_stage(
+ row,
+ platform_url="http://plat",
+ workspace_id="ws-1",
+ headers={},
+ client=supplied,
+ )
+ supplied.close.assert_not_called()
+ supplied.post.assert_not_called()
+
+
+# ---------------------------------------------------------------------------
+# BatchFetcher — concurrent fetch + URI cache barrier
+# ---------------------------------------------------------------------------
+
+
+def _row_with_id(act_id: str, file_id: str) -> dict:
+ """Helper: an upload-receive row with a distinct activity id + file id."""
+ return {
+ "id": act_id,
+ "method": "chat_upload_receive",
+ "request_body": {
+ "file_id": file_id,
+ "name": f"{file_id}.pdf",
+ "uri": f"platform-pending:ws-1/{file_id}",
+ "mimeType": "application/pdf",
+ "size": 1,
+ },
+ }
+
+
+def _stub_client_for_batch(get_responses: dict[str, MagicMock]) -> MagicMock:
+ """Build one MagicMock client that returns per-file_id responses
+ based on the file_id segment of the URL.
+ """
+ client = MagicMock()
+
+ def _get(url: str, headers: dict[str, str] | None = None) -> MagicMock:
+ for fid, resp in get_responses.items():
+ if f"/pending-uploads/{fid}/content" in url:
+ return resp
+ return _make_resp(404)
+
+ def _post(url: str, headers: dict[str, str] | None = None) -> MagicMock:
+ return _make_resp(200)
+
+ client.get = MagicMock(side_effect=_get)
+ client.post = MagicMock(side_effect=_post)
+ return client
+
+
+def test_batch_fetcher_runs_submitted_rows_concurrently():
+ # Three rows whose .get() blocks for ~120ms each. With 4 workers the
+ # batch should complete in ~120ms (parallel), not ~360ms (serial).
+ # The 250ms ceiling accommodates CI scheduler jitter while still
+ # discriminating concurrent (~120ms) from serial (~360ms).
+ import time
+
+ barrier_start = [0.0]
+
+ def _slow_get(url: str, headers: dict[str, str] | None = None) -> MagicMock:
+ time.sleep(0.12)
+ for fid in ("a", "b", "c"):
+ if f"/pending-uploads/{fid}/content" in url:
+ return _make_resp(200, content=b"X", content_type="text/plain")
+ return _make_resp(404)
+
+ client = MagicMock()
+ client.get = MagicMock(side_effect=_slow_get)
+ client.post = MagicMock(return_value=_make_resp(200))
+
+ bf = inbox_uploads.BatchFetcher(
+ platform_url="http://plat",
+ workspace_id="ws-1",
+ headers={},
+ client=client,
+ max_workers=4,
+ )
+ barrier_start[0] = time.time()
+ for fid in ("a", "b", "c"):
+ bf.submit(_row_with_id(f"act-{fid}", fid))
+ bf.wait_all()
+ elapsed = time.time() - barrier_start[0]
+ bf.close()
+
+ assert elapsed < 0.25, (
+ f"3 rows × 120ms with 4 workers should finish in <250ms; got {elapsed:.3f}s "
+ "(suggests serial execution — Phase 5b regression)"
+ )
+ assert client.get.call_count == 3
+ assert client.post.call_count == 3
+
+
+def test_batch_fetcher_wait_all_blocks_until_uri_cache_populated():
+ """Pin the correctness invariant: when wait_all returns, the URI
+ cache is hot for every submitted row. Without this barrier the
+ inbox loop would process the chat-message row before its uploads
+ were staged, and rewrite_request_body would surface the un-rewritten
+ platform-pending: URI to the agent.
+ """
+ import time
+
+ def _slow_get(url: str, headers: dict[str, str] | None = None) -> MagicMock:
+ time.sleep(0.05)
+ return _make_resp(200, content=b"data", content_type="text/plain")
+
+ client = MagicMock()
+ client.get = MagicMock(side_effect=_slow_get)
+ client.post = MagicMock(return_value=_make_resp(200))
+
+ inbox_uploads.get_cache().clear()
+ with inbox_uploads.BatchFetcher(
+ platform_url="http://plat", workspace_id="ws-1", headers={}, client=client
+ ) as bf:
+ bf.submit(_row_with_id("act-a", "a"))
+ bf.submit(_row_with_id("act-b", "b"))
+ bf.wait_all()
+ # Cache must be hot for BOTH rows by the time wait_all returns.
+ assert inbox_uploads.get_cache().get("platform-pending:ws-1/a") is not None
+ assert inbox_uploads.get_cache().get("platform-pending:ws-1/b") is not None
+
+
+def test_batch_fetcher_isolates_per_row_failure():
+ """One failing fetch must not abort siblings. Sibling rows complete,
+ URI cache populates for them; the bad row's cache entry stays absent.
+ """
+ def _get(url: str, headers: dict[str, str] | None = None) -> MagicMock:
+ if "/pending-uploads/bad/content" in url:
+ return _make_resp(500, text="upstream broken")
+ return _make_resp(200, content=b"ok", content_type="text/plain")
+
+ client = MagicMock()
+ client.get = MagicMock(side_effect=_get)
+ client.post = MagicMock(return_value=_make_resp(200))
+
+ inbox_uploads.get_cache().clear()
+ with inbox_uploads.BatchFetcher(
+ platform_url="http://plat", workspace_id="ws-1", headers={}, client=client
+ ) as bf:
+ bf.submit(_row_with_id("act-1", "good1"))
+ bf.submit(_row_with_id("act-2", "bad"))
+ bf.submit(_row_with_id("act-3", "good2"))
+ bf.wait_all()
+
+ cache = inbox_uploads.get_cache()
+ assert cache.get("platform-pending:ws-1/good1") is not None
+ assert cache.get("platform-pending:ws-1/good2") is not None
+ assert cache.get("platform-pending:ws-1/bad") is None
+
+
+def test_batch_fetcher_reuses_one_client_across_all_submits():
+ """Every row in the batch must share the same client instance. This
+ is the connection-pool-reuse leg of the perf win: a second fetch
+ to the same host reuses the TCP+TLS handshake from the first.
+ """
+ client = MagicMock()
+ client.get = MagicMock(return_value=_make_resp(200, content=b"x", content_type="text/plain"))
+ client.post = MagicMock(return_value=_make_resp(200))
+
+ with inbox_uploads.BatchFetcher(
+ platform_url="http://plat", workspace_id="ws-1", headers={}, client=client
+ ) as bf:
+ for fid in ("a", "b", "c"):
+ bf.submit(_row_with_id(f"act-{fid}", fid))
+ bf.wait_all()
+
+ # 3 GETs + 3 POST acks all on the same client — no per-row Client
+ # construction.
+ assert client.get.call_count == 3
+ assert client.post.call_count == 3
+
+
+def test_batch_fetcher_close_idempotent():
+ client = MagicMock()
+ bf = inbox_uploads.BatchFetcher(
+ platform_url="http://plat", workspace_id="ws-1", headers={}, client=client
+ )
+ bf.close()
+ bf.close() # second call must not raise
+
+
+def test_batch_fetcher_submit_after_close_raises():
+ client = MagicMock()
+ bf = inbox_uploads.BatchFetcher(
+ platform_url="http://plat", workspace_id="ws-1", headers={}, client=client
+ )
+ bf.close()
+ with pytest.raises(RuntimeError, match="submit after close"):
+ bf.submit(_row_with_id("act-x", "x"))
+
+
+def test_batch_fetcher_owns_client_when_not_supplied(monkeypatch):
+ built: list[MagicMock] = []
+
+ def _factory(*args, **kwargs):
+ c = MagicMock()
+ c.get = MagicMock(return_value=_make_resp(200, content=b"x", content_type="text/plain"))
+ c.post = MagicMock(return_value=_make_resp(200))
+ built.append(c)
+ return c
+
+ monkeypatch.setattr("httpx.Client", _factory)
+
+ bf = inbox_uploads.BatchFetcher(
+ platform_url="http://plat", workspace_id="ws-1", headers={}
+ )
+ bf.submit(_row_with_id("act-a", "a"))
+ bf.wait_all()
+ bf.close()
+
+ assert len(built) == 1, "expected one owned client per BatchFetcher"
+ built[0].close.assert_called_once()
+
+
+def test_batch_fetcher_does_not_close_supplied_client():
+ client = MagicMock()
+ client.get = MagicMock(return_value=_make_resp(200, content=b"x", content_type="text/plain"))
+ client.post = MagicMock(return_value=_make_resp(200))
+ with inbox_uploads.BatchFetcher(
+ platform_url="http://plat", workspace_id="ws-1", headers={}, client=client
+ ) as bf:
+ bf.submit(_row_with_id("act-a", "a"))
+ bf.wait_all()
+ # Supplied client survives the BatchFetcher's close — caller's lifecycle.
+ client.close.assert_not_called()
+
+
+def test_batch_fetcher_wait_all_no_op_on_empty_batch():
+ client = MagicMock()
+ with inbox_uploads.BatchFetcher(
+ platform_url="http://plat", workspace_id="ws-1", headers={}, client=client
+ ) as bf:
+ bf.wait_all() # nothing submitted; must not block, must not raise
+ client.get.assert_not_called()
+ client.post.assert_not_called()
+
+
+def test_batch_fetcher_httpx_missing_makes_submit_a_noop(monkeypatch):
+ # No client supplied + httpx import fails → BatchFetcher degrades
+ # gracefully: submit() returns None and the row is silently skipped.
+ import sys
+
+ real_httpx = sys.modules.pop("httpx", None)
+ monkeypatch.setitem(sys.modules, "httpx", None)
+ try:
+ bf = inbox_uploads.BatchFetcher(
+ platform_url="http://plat", workspace_id="ws-1", headers={}
+ )
+ result = bf.submit(_row_with_id("act-a", "a"))
+ bf.wait_all()
+ bf.close()
+ finally:
+ if real_httpx is not None:
+ sys.modules["httpx"] = real_httpx
+ else:
+ sys.modules.pop("httpx", None)
+ assert result is None
+
+
+def test_batch_fetcher_close_after_timeout_does_not_block_on_running_workers():
+ """The deadline contract: when wait_all times out, close() must NOT
+ block waiting for the leaked worker threads. Otherwise the inbox
+ poll loop stalls indefinitely on a hung /content fetch — undoing
+ the user-facing timeout.
+
+ Strategy: build a client whose .get() blocks on a threading.Event
+ that the test never sets. Submit a row, wait_all with a tiny
+ timeout, then time close(). If close() drained-and-waited it would
+ block until we set the event (i.e., forever in this test).
+ """
+ import threading
+ import time
+
+ blocker = threading.Event() # never set — workers stay running
+
+ def _hang_get(url, headers=None):
+ # Wait at most ~5s so a buggy implementation eventually unblocks
+ # the test instead of timing out the whole pytest run, but
+ # nothing legitimate should reach this fallback.
+ blocker.wait(timeout=5.0)
+ return _make_resp(200, content=b"x", content_type="text/plain")
+
+ client = MagicMock()
+ client.get = MagicMock(side_effect=_hang_get)
+ client.post = MagicMock(return_value=_make_resp(200))
+
+ bf = inbox_uploads.BatchFetcher(
+ platform_url="http://plat",
+ workspace_id="ws-1",
+ headers={},
+ client=client,
+ max_workers=1, # serialize so submitting 1 keeps the worker busy
+ )
+ bf.submit(_row_with_id("act-a", "a"))
+ # Tiny timeout — wait_all must report the future as not_done.
+ bf.wait_all(timeout=0.05)
+ t0 = time.time()
+ bf.close()
+ elapsed = time.time() - t0
+ # Unblock the lingering worker so it doesn't pollute later tests.
+ blocker.set()
+
+ # Without the cancel-on-timeout fix, close() would block until
+ # blocker.set() — i.e., the full ~5s. With the fix it returns
+ # immediately because shutdown(wait=False) doesn't drain.
+ assert elapsed < 1.0, (
+ f"close() blocked for {elapsed:.2f}s after wait_all timeout — "
+ "cancel-on-timeout regression: close() is draining instead of bailing"
+ )
+
+
+def test_batch_fetcher_close_without_timeout_still_drains():
+ """Negative leg of the timeout contract: when wait_all completes
+ cleanly (no timeout), close() must KEEP its drain-and-wait
+ behavior so a still-queued ack POST isn't dropped mid-write.
+ """
+ import time
+
+ def _slow_get(url, headers=None):
+ time.sleep(0.05)
+ return _make_resp(200, content=b"x", content_type="text/plain")
+
+ client = MagicMock()
+ client.get = MagicMock(side_effect=_slow_get)
+ client.post = MagicMock(return_value=_make_resp(200))
+
+ bf = inbox_uploads.BatchFetcher(
+ platform_url="http://plat",
+ workspace_id="ws-1",
+ headers={},
+ client=client,
+ max_workers=2,
+ )
+ bf.submit(_row_with_id("act-a", "a"))
+ bf.submit(_row_with_id("act-b", "b"))
+ bf.wait_all() # generous default timeout — should not fire
+ bf.close()
+
+ # All 2 GETs + 2 ACK POSTs ran to completion via drain-and-wait.
+ assert client.get.call_count == 2
+ assert client.post.call_count == 2
diff --git a/workspace/tests/test_mcp_cli.py b/workspace/tests/test_mcp_cli.py
index 608d1e7c..a1061394 100644
--- a/workspace/tests/test_mcp_cli.py
+++ b/workspace/tests/test_mcp_cli.py
@@ -13,6 +13,7 @@ from pathlib import Path
import pytest
import mcp_cli
+import mcp_heartbeat
@pytest.fixture(autouse=True)
@@ -739,8 +740,13 @@ def test_heartbeat_loop_calls_persist_on_success(monkeypatch):
def fake_persist(resp):
saw.append(resp)
+ # Patch on mcp_heartbeat — that's where heartbeat_loop's internal
+ # name resolution looks up persist_inbound_secret_from_heartbeat
+ # after the RFC #2873 iter 3 split. The mcp_cli._persist_…_from_heartbeat
+ # back-compat re-export still exists, but patching it here would not
+ # affect the loop body.
monkeypatch.setattr(
- mcp_cli, "_persist_inbound_secret_from_heartbeat", fake_persist
+ mcp_heartbeat, "persist_inbound_secret_from_heartbeat", fake_persist
)
class FakeResp:
@@ -786,8 +792,8 @@ def test_heartbeat_loop_skips_persist_on_4xx(monkeypatch):
"""Heartbeat 4xx error path must NOT invoke persist (no body to trust)."""
saw: list[object] = []
monkeypatch.setattr(
- mcp_cli,
- "_persist_inbound_secret_from_heartbeat",
+ mcp_heartbeat,
+ "persist_inbound_secret_from_heartbeat",
lambda r: saw.append(r),
)
@@ -899,7 +905,7 @@ def test_heartbeat_single_401_logs_warning_not_error(monkeypatch, caplog):
transient platform blip. Log at WARNING; don't shout."""
import logging
- caplog.set_level(logging.WARNING, logger="mcp_cli")
+ caplog.set_level(logging.WARNING, logger="mcp_heartbeat")
_multi_iter_runner(monkeypatch, [401])
@@ -923,7 +929,7 @@ def test_heartbeat_three_consecutive_401s_escalates_to_error(monkeypatch, caplog
LOUD ERROR with re-onboard guidance — not buried at WARNING."""
import logging
- caplog.set_level(logging.WARNING, logger="mcp_cli")
+ caplog.set_level(logging.WARNING, logger="mcp_heartbeat")
_multi_iter_runner(monkeypatch, [401, 401, 401])
@@ -949,7 +955,7 @@ def test_heartbeat_403_treated_same_as_401(monkeypatch, caplog):
not authorized for this workspace). Same escalation path."""
import logging
- caplog.set_level(logging.WARNING, logger="mcp_cli")
+ caplog.set_level(logging.WARNING, logger="mcp_heartbeat")
_multi_iter_runner(monkeypatch, [403, 403, 403])
@@ -963,7 +969,7 @@ def test_heartbeat_recovery_resets_consecutive_counter(monkeypatch, caplog):
later should NOT immediately escalate."""
import logging
- caplog.set_level(logging.WARNING, logger="mcp_cli")
+ caplog.set_level(logging.WARNING, logger="mcp_heartbeat")
# Two 401s, then 200, then one 401. If counter resets correctly,
# the final 401 is "1 consecutive" and should NOT escalate.
@@ -982,7 +988,7 @@ def test_heartbeat_500_does_not_increment_auth_counter(monkeypatch, caplog):
misleading the operator."""
import logging
- caplog.set_level(logging.WARNING, logger="mcp_cli")
+ caplog.set_level(logging.WARNING, logger="mcp_heartbeat")
_multi_iter_runner(monkeypatch, [500, 500, 500])
diff --git a/workspace/tests/test_mcp_cli_split.py b/workspace/tests/test_mcp_cli_split.py
new file mode 100644
index 00000000..e8a39817
--- /dev/null
+++ b/workspace/tests/test_mcp_cli_split.py
@@ -0,0 +1,231 @@
+"""RFC #2873 iter 3 — drift gate + behavior tests for the post-split surface.
+
+The bulk of the heartbeat / resolver behavior is exercised by
+``test_mcp_cli.py`` and ``test_mcp_cli_multi_workspace.py`` through the
+``mcp_cli._symbol`` back-compat aliases. This file pins:
+
+ 1. The split is **behavior-neutral via aliasing** — every previously-
+ exposed ``mcp_cli._foo`` symbol is the SAME callable as the new
+ module's authoritative function. If a refactor accidentally drops
+ an alias or points it at a stale copy, this fails.
+
+ 2. ``mcp_inbox_pollers.start_inbox_pollers`` works for both single-
+ workspace (legacy back-compat) and multi-workspace shapes.
+ ``mcp_cli`` had no direct test for this branch before the split.
+"""
+from __future__ import annotations
+
+import sys
+import types
+
+import pytest
+
+import mcp_cli
+import mcp_heartbeat
+import mcp_inbox_pollers
+import mcp_workspace_resolver
+
+
+# ============== Drift gate: back-compat aliases point at the real fn ==============
+
+class TestBackCompatAliases:
+ """Pin that ``mcp_cli._foo is real_fn``. A test that re-implements
+ the alias would still pass — the ``is`` check guarantees we didn't
+ create a wrapper that drifts."""
+
+ def test_heartbeat_aliases(self):
+ assert mcp_cli._build_agent_card is mcp_heartbeat.build_agent_card
+ assert mcp_cli._platform_register is mcp_heartbeat.platform_register
+ assert mcp_cli._heartbeat_loop is mcp_heartbeat.heartbeat_loop
+ assert mcp_cli._log_heartbeat_auth_failure is mcp_heartbeat.log_heartbeat_auth_failure
+ assert (
+ mcp_cli._persist_inbound_secret_from_heartbeat
+ is mcp_heartbeat.persist_inbound_secret_from_heartbeat
+ )
+ assert mcp_cli._start_heartbeat_thread is mcp_heartbeat.start_heartbeat_thread
+
+ def test_resolver_aliases(self):
+ assert mcp_cli._resolve_workspaces is mcp_workspace_resolver.resolve_workspaces
+ assert mcp_cli._print_missing_env_help is mcp_workspace_resolver.print_missing_env_help
+ assert mcp_cli._read_token_file is mcp_workspace_resolver.read_token_file
+
+ def test_inbox_pollers_alias(self):
+ assert mcp_cli._start_inbox_pollers is mcp_inbox_pollers.start_inbox_pollers
+
+ def test_constants_match(self):
+ assert (
+ mcp_cli.HEARTBEAT_INTERVAL_SECONDS
+ == mcp_heartbeat.HEARTBEAT_INTERVAL_SECONDS
+ )
+ assert (
+ mcp_cli._HEARTBEAT_AUTH_LOUD_THRESHOLD
+ == mcp_heartbeat.HEARTBEAT_AUTH_LOUD_THRESHOLD
+ )
+ assert (
+ mcp_cli._HEARTBEAT_AUTH_RELOG_INTERVAL
+ == mcp_heartbeat.HEARTBEAT_AUTH_RELOG_INTERVAL
+ )
+
+
+# ============== mcp_inbox_pollers — both shapes + degraded import ==============
+
+class _FakeInboxState:
+ def __init__(self, **kwargs):
+ self.kwargs = kwargs
+
+
+def _install_fake_inbox(monkeypatch):
+ """Inject a fake ``inbox`` module so we observe the spawn calls
+ without pulling in the real platform_auth dependency tree."""
+ activations: list[_FakeInboxState] = []
+ spawned: list[tuple[_FakeInboxState, str, str]] = []
+ cursor_paths: list[str] = []
+
+ def default_cursor_path(wsid=None):
+ # Mirror the real signature: optional wsid → distinct path per id,
+ # absent → legacy single path.
+ path = f"/tmp/.mcp_inbox_cursor.{wsid[:8]}" if wsid else "/tmp/.mcp_inbox_cursor"
+ cursor_paths.append(path)
+ return path
+
+ def activate(state):
+ activations.append(state)
+
+ def start_poller_thread(state, platform_url, wsid):
+ spawned.append((state, platform_url, wsid))
+
+ fake = types.ModuleType("inbox")
+ fake.InboxState = _FakeInboxState
+ fake.activate = activate
+ fake.default_cursor_path = default_cursor_path
+ fake.start_poller_thread = start_poller_thread
+ monkeypatch.setitem(sys.modules, "inbox", fake)
+ return activations, spawned, cursor_paths
+
+
+class TestStartInboxPollers:
+ def test_single_workspace_uses_legacy_cursor_path(self, monkeypatch):
+ """Back-compat exact: single-workspace mode reuses the legacy
+ cursor filename so an existing operator's on-disk state isn't
+ invalidated by upgrade."""
+ activations, spawned, cursor_paths = _install_fake_inbox(monkeypatch)
+
+ mcp_inbox_pollers.start_inbox_pollers(
+ "https://test.moleculesai.app", ["ws-only-one"]
+ )
+
+ assert len(activations) == 1, "exactly one inbox.activate call"
+ assert len(spawned) == 1, "exactly one poller thread spawned"
+ # Single-workspace path uses default_cursor_path() with no arg —
+ # the cursor_path captured here must be the legacy filename
+ # (no per-ws suffix).
+ assert cursor_paths == ["/tmp/.mcp_inbox_cursor"]
+ # State carries cursor_path, not cursor_paths
+ state = activations[0]
+ assert state.kwargs == {"cursor_path": "/tmp/.mcp_inbox_cursor"}
+ # Spawned poller is for the right workspace
+ assert spawned[0] == (state, "https://test.moleculesai.app", "ws-only-one")
+
+ def test_multi_workspace_uses_per_workspace_cursor_paths(self, monkeypatch):
+ """Multi-workspace path: per-workspace cursor file, one shared
+ InboxState. N pollers, each pointed at the same state so the
+ agent's inbox_peek/pop sees a merged view."""
+ activations, spawned, _ = _install_fake_inbox(monkeypatch)
+
+ wsids = ["ws-aaaaaaaa", "ws-bbbbbbbb", "ws-cccccccc"]
+ mcp_inbox_pollers.start_inbox_pollers(
+ "https://test.moleculesai.app", wsids
+ )
+
+ # One state, one activate, three pollers
+ assert len(activations) == 1
+ assert len(spawned) == 3
+ state = activations[0]
+ # Multi-workspace state carries cursor_paths (mapping)
+ assert "cursor_paths" in state.kwargs
+ assert set(state.kwargs["cursor_paths"].keys()) == set(wsids)
+ # All pollers share the same state
+ for s, _url, _wsid in spawned:
+ assert s is state
+ # All workspace ids covered
+ assert sorted(t[2] for t in spawned) == sorted(wsids)
+
+ def test_inbox_module_unavailable_logs_and_returns(self, monkeypatch, caplog):
+ """If ``import inbox`` fails (older install or stripped
+ runtime), spawn must NOT raise — log a warning and continue.
+ The MCP server can still serve outbound tools."""
+ import logging
+
+ # Force ImportError by injecting a module sentinel that raises.
+ class _Boom:
+ def __getattr__(self, _name):
+ raise ImportError("inbox stripped from this build")
+
+ # Setting sys.modules["inbox"] to a broken object isn't enough —
+ # the import statement reads sys.modules first; if the entry is
+ # truthy, Python returns it. We need to force the import to raise.
+ # Easiest: pre-poison sys.modules so the `import inbox` line
+ # raises by setting the entry to None (Python special-cases None
+ # as "explicit ImportError").
+ monkeypatch.setitem(sys.modules, "inbox", None)
+
+ caplog.set_level(logging.WARNING, logger="mcp_inbox_pollers")
+ # Should not raise.
+ mcp_inbox_pollers.start_inbox_pollers(
+ "https://test.moleculesai.app", ["ws-1"]
+ )
+ warnings = [r for r in caplog.records if r.levelno == logging.WARNING]
+ assert any("inbox module unavailable" in r.message for r in warnings), (
+ f"expected a 'inbox module unavailable' warning, got: "
+ f"{[r.message for r in warnings]}"
+ )
+
+
+# ============== mcp_heartbeat.build_agent_card — short direct tests ==============
+
+class TestBuildAgentCardDirect:
+ """Spot-check the new module's public surface; the full test matrix
+ lives in ``test_mcp_cli.py`` reaching through ``mcp_cli._build_agent_card``.
+ """
+
+ def test_default_card_shape(self, monkeypatch):
+ for v in ("MOLECULE_AGENT_NAME", "MOLECULE_AGENT_DESCRIPTION", "MOLECULE_AGENT_SKILLS"):
+ monkeypatch.delenv(v, raising=False)
+ card = mcp_heartbeat.build_agent_card("8dad3e29-c32a-4ec7-9ea7-94fe2d2d98ec")
+ assert card == {"name": "molecule-mcp-8dad3e29", "skills": []}
+
+ def test_skills_csv_split_and_trim(self, monkeypatch):
+ monkeypatch.setenv("MOLECULE_AGENT_SKILLS", "research, , code-review,memory-curation, ")
+ card = mcp_heartbeat.build_agent_card("ws-1")
+ assert card["skills"] == [
+ {"name": "research"},
+ {"name": "code-review"},
+ {"name": "memory-curation"},
+ ]
+
+
+# ============== mcp_workspace_resolver — short direct tests ==============
+
+class TestResolveWorkspacesDirect:
+ @pytest.fixture(autouse=True)
+ def _isolate(self, monkeypatch, tmp_path):
+ for v in ("WORKSPACE_ID", "MOLECULE_WORKSPACE_TOKEN", "MOLECULE_WORKSPACES"):
+ monkeypatch.delenv(v, raising=False)
+ monkeypatch.setenv("CONFIGS_DIR", str(tmp_path))
+ yield
+
+ def test_single_workspace_via_env(self, monkeypatch):
+ monkeypatch.setenv("WORKSPACE_ID", "ws-1")
+ monkeypatch.setenv("MOLECULE_WORKSPACE_TOKEN", "tok")
+ out, errors = mcp_workspace_resolver.resolve_workspaces()
+ assert out == [("ws-1", "tok")]
+ assert errors == []
+
+ def test_multi_workspace_via_json_env(self, monkeypatch):
+ monkeypatch.setenv(
+ "MOLECULE_WORKSPACES",
+ '[{"id":"ws-a","token":"a"},{"id":"ws-b","token":"b"}]',
+ )
+ out, errors = mcp_workspace_resolver.resolve_workspaces()
+ assert out == [("ws-a", "a"), ("ws-b", "b")]
+ assert errors == []
diff --git a/workspace/tests/test_mcp_memory.py b/workspace/tests/test_mcp_memory.py
index 117e5417..d2a7ac35 100644
--- a/workspace/tests/test_mcp_memory.py
+++ b/workspace/tests/test_mcp_memory.py
@@ -63,7 +63,7 @@ async def test_commit_memory_success(monkeypatch):
mcp = _load_mcp()
client = FakeClient()
- monkeypatch.setattr("a2a_tools.httpx.AsyncClient", lambda **kw: client)
+ monkeypatch.setattr("a2a_tools_memory.httpx.AsyncClient", lambda **kw: client)
result = await mcp.handle_tool_call("commit_memory", {
"content": "Architecture decision: use Go for backend",
@@ -92,7 +92,7 @@ async def test_commit_memory_default_scope(monkeypatch):
mcp = _load_mcp()
client = FakeClient()
- monkeypatch.setattr("a2a_tools.httpx.AsyncClient", lambda **kw: client)
+ monkeypatch.setattr("a2a_tools_memory.httpx.AsyncClient", lambda **kw: client)
result = await mcp.handle_tool_call("commit_memory", {
"content": "Some note",
@@ -108,7 +108,7 @@ async def test_recall_memory_success(monkeypatch):
mcp = _load_mcp()
client = FakeClient()
- monkeypatch.setattr("a2a_tools.httpx.AsyncClient", lambda **kw: client)
+ monkeypatch.setattr("a2a_tools_memory.httpx.AsyncClient", lambda **kw: client)
result = await mcp.handle_tool_call("recall_memory", {"query": "architecture"})
@@ -127,7 +127,7 @@ async def test_recall_memory_empty(monkeypatch):
async def get(self, url, params=None, headers=None, **kwargs):
return FakeResponse(200, [])
- monkeypatch.setattr("a2a_tools.httpx.AsyncClient", lambda **kw: EmptyClient())
+ monkeypatch.setattr("a2a_tools_memory.httpx.AsyncClient", lambda **kw: EmptyClient())
result = await mcp.handle_tool_call("recall_memory", {})
assert "No memories found" in result
@@ -139,7 +139,7 @@ async def test_recall_memory_with_scope_filter(monkeypatch):
mcp = _load_mcp()
client = FakeClient()
- monkeypatch.setattr("a2a_tools.httpx.AsyncClient", lambda **kw: client)
+ monkeypatch.setattr("a2a_tools_memory.httpx.AsyncClient", lambda **kw: client)
await mcp.handle_tool_call("recall_memory", {"scope": "TEAM"})
diff --git a/workspace/tests/test_secret_redact.py b/workspace/tests/test_secret_redact.py
index d0975969..ecc268e8 100644
--- a/workspace/tests/test_secret_redact.py
+++ b/workspace/tests/test_secret_redact.py
@@ -357,7 +357,7 @@ class TestA2AToolCommitMemoryRedactsSecrets:
fake_client.post = _capture
- with patch("a2a_tools.httpx.AsyncClient", return_value=fake_client):
+ with patch("a2a_tools_memory.httpx.AsyncClient", return_value=fake_client):
await a2a_tools.tool_commit_memory(content_with_secret)
stored = captured.get("content", "")
@@ -385,7 +385,7 @@ class TestA2AToolCommitMemoryRedactsSecrets:
fake_client.post = _capture
- with patch("a2a_tools.httpx.AsyncClient", return_value=fake_client):
+ with patch("a2a_tools_memory.httpx.AsyncClient", return_value=fake_client):
await a2a_tools.tool_commit_memory(f"key={key}")
stored = captured.get("content", "")