diff --git a/.github/workflows/e2e-api.yml b/.github/workflows/e2e-api.yml index bc9e629b..782cbedc 100644 --- a/.github/workflows/e2e-api.yml +++ b/.github/workflows/e2e-api.yml @@ -172,6 +172,9 @@ jobs: - name: Run poll-mode + since_id cursor E2E (#2339) if: needs.detect-changes.outputs.api == 'true' run: bash tests/e2e/test_poll_mode_e2e.sh + - name: Run poll-mode chat upload E2E (RFC #2891) + if: needs.detect-changes.outputs.api == 'true' + run: bash tests/e2e/test_poll_mode_chat_upload_e2e.sh - name: Dump platform log on failure if: failure() && needs.detect-changes.outputs.api == 'true' run: cat workspace-server/platform.log || true diff --git a/canvas/src/app/orgs/page.tsx b/canvas/src/app/orgs/page.tsx index 3c5576ef..a137ac2e 100644 --- a/canvas/src/app/orgs/page.tsx +++ b/canvas/src/app/orgs/page.tsx @@ -18,7 +18,7 @@ // quick bounce between signup and either Checkout or the tenant UI. import { useEffect, useState } from "react"; -import { fetchSession, redirectToLogin, type Session } from "@/lib/auth"; +import { fetchSession, redirectToLogin, signOut, type Session } from "@/lib/auth"; import { PLATFORM_URL } from "@/lib/api"; import { formatCredits, pillTone, bannerKind } from "@/lib/credits"; import { TermsGate } from "@/components/TermsGate"; @@ -129,7 +129,7 @@ export default function OrgsPage() { return : null} />; } return ( - + {justCheckedOut && }
    {orgs.map((o) => ( @@ -160,11 +160,21 @@ function CheckoutBanner() { ); } -function Shell({ children }: { children: React.ReactNode }) { +function Shell({ + children, + session, +}: { + children: React.ReactNode; + // Optional: when present, the header renders the signed-in email + + // a Sign-out button. The empty-state Shell call doesn't have a + // session in scope, so accept null and skip the header chrome there. + session?: Session | null; +}) { return (
    + {session ? : null}

    Your organizations

    Each org is an isolated Molecule workspace. @@ -177,6 +187,40 @@ function Shell({ children }: { children: React.ReactNode }) { ); } +// AccountBar renders the signed-in email + a Sign-out button at the +// top of the page. Without this the user has no way to log out — the +// /cp/auth/signout endpoint exists on the control plane but no UI ever +// called it. Reported externally on 2026-05-05; this is the fix. +// +// Click → calls signOut() which POSTs /cp/auth/signout (clears the +// WorkOS session cookie + revokes at the provider) then bounces to +// /cp/auth/login. The signOut helper is best-effort — even on a 5xx +// or network failure the redirect fires so the user never gets stuck +// on an authed-looking page after they clicked Sign out. +function AccountBar({ session }: { session: Session }) { + const [signingOut, setSigningOut] = useState(false); + return ( +

    + {session.email} + +
    + ); +} + // DataResidencyNotice surfaces where workspace data lives so EU-based // signups can make an informed choice (GDPR Art. 13 disclosure // requirement). Plain text, no icon — the goal is clarity, not diff --git a/canvas/src/components/EmptyState.tsx b/canvas/src/components/EmptyState.tsx index 2452ef1a..d54f1709 100644 --- a/canvas/src/components/EmptyState.tsx +++ b/canvas/src/components/EmptyState.tsx @@ -48,16 +48,21 @@ export function EmptyState() { }); // "Create blank" bypasses templates entirely — no preflight, no - // modal, just POST /workspaces with a default name and tier. - // Deliberately NOT routed through useTemplateDeploy because it - // has no `template.id` to deploy against. + // modal, just POST /workspaces with a default name. Deliberately + // NOT routed through useTemplateDeploy because it has no + // `template.id` to deploy against. + // + // tier is omitted so the backend picks a SaaS-aware default + // (T4 on SaaS, T3 on self-hosted — see WorkspaceHandler.DefaultTier). + // The previous hardcoded `tier: 2` shipped every fresh-tenant agent + // at Standard regardless of host, which surprised SaaS users whose + // CreateWorkspaceDialog already defaults to T4. const createBlank = async () => { setBlankCreating(true); setBlankError(null); try { const ws = await api.post<{ id: string }>("/workspaces", { name: "My First Agent", - tier: 2, canvas: firstDeployCoords(), }); handleDeployed(ws.id); diff --git a/canvas/src/components/tabs/ChatTab.tsx b/canvas/src/components/tabs/ChatTab.tsx index 7da17b72..2d6ae908 100644 --- a/canvas/src/components/tabs/ChatTab.tsx +++ b/canvas/src/components/tabs/ChatTab.tsx @@ -286,6 +286,14 @@ function MyChatPanel({ workspaceId, data }: Props) { const [error, setError] = useState(null); const [confirmRestart, setConfirmRestart] = useState(false); const bottomRef = useRef(null); + // First-mount scroll-to-bottom needs `behavior: "instant"` — long + // conversations smooth-animate for ~300ms which any concurrent + // re-render can interrupt, leaving the user stuck mid-conversation + // when the chat tab opens. Subsequent appends (new agent messages) + // keep `smooth` for the visual "landing" feel. Flipped the first + // time messages.length goes positive, so a workspace switch (which + // remounts ChatTab) gets a fresh instant jump too. + const hasInitialScrollRef = useRef(false); // Lazy-load older history on scroll-up. // - containerRef = the scrollable messages viewport // - topRef = sentinel above the messages list; IO observes it @@ -545,6 +553,15 @@ function MyChatPanel({ workspaceId, data }: Props) { scrollAnchorRef.current = null; return; } + // Instant on first arrival of messages — smooth-scroll on a long + // conversation gets interrupted by concurrent renders and leaves + // the user stuck in the middle. After the first jump, subsequent + // appends animate as before. + if (!hasInitialScrollRef.current && messages.length > 0) { + hasInitialScrollRef.current = true; + bottomRef.current?.scrollIntoView({ behavior: "instant" as ScrollBehavior }); + return; + } bottomRef.current?.scrollIntoView({ behavior: "smooth" }); }, [messages]); diff --git a/canvas/src/components/tabs/chat/AgentCommsPanel.tsx b/canvas/src/components/tabs/chat/AgentCommsPanel.tsx index fc327ea0..074d96fc 100644 --- a/canvas/src/components/tabs/chat/AgentCommsPanel.tsx +++ b/canvas/src/components/tabs/chat/AgentCommsPanel.tsx @@ -1,6 +1,6 @@ "use client"; -import { useState, useEffect, useMemo, useRef } from "react"; +import { useState, useEffect, useLayoutEffect, useMemo, useRef, useCallback } from "react"; import ReactMarkdown from "react-markdown"; import remarkGfm from "remark-gfm"; import { api } from "@/lib/api"; @@ -184,13 +184,23 @@ function unwrapErrorText(raw: string | null): string { export function AgentCommsPanel({ workspaceId }: { workspaceId: string }) { const [messages, setMessages] = useState([]); const [loading, setLoading] = useState(true); + const [loadError, setLoadError] = useState(null); // Dedup by timestamp+type+peer to handle API load + WebSocket race const seenKeys = useRef(new Set()); const bottomRef = useRef(null); + // Mirrors the my-chat scroll behaviour from ChatTab (PR #2903) — + // smooth-scroll on a long history gets interrupted by concurrent + // renders and lands the panel mid-conversation. Switch the first + // arrival to instant; subsequent appends animate. + const hasInitialScrollRef = useRef(false); - // Load history - useEffect(() => { + // Load history. Extracted so the error-state retry button can + // re-invoke without remount. ChatTab uses the same shape + // (loadInitial → loadError state → retry button). + const loadInitial = useCallback(() => { setLoading(true); + setLoadError(null); + seenKeys.current.clear(); api.get(`/workspaces/${workspaceId}/activity?source=agent&limit=50`) .then((entries) => { const filtered = (entries ?? []) @@ -234,10 +244,15 @@ export function AgentCommsPanel({ workspaceId }: { workspaceId: string }) { // the .then body) — the panel just sat on the empty state // with zero signal. console.warn("AgentCommsPanel: load activity failed", err); + setLoadError(err instanceof Error ? err.message : String(err)); setLoading(false); }); }, [workspaceId]); + useEffect(() => { + loadInitial(); + }, [loadInitial]); + // Live updates routed through the global ReconnectingSocket. The // previous pattern of `new WebSocket(WS_URL)` per panel had no // onclose / no reconnect, so any drop (idle timeout, browser @@ -358,7 +373,18 @@ export function AgentCommsPanel({ workspaceId }: { workspaceId: string }) { } catch { /* ignore */ } }); - useEffect(() => { + // useLayoutEffect (not useEffect) so the scroll runs BEFORE paint — + // otherwise the user sees the panel jump for one frame on every + // append. Mirrors ChatTab's MyChatPanel scroll block. + useLayoutEffect(() => { + if (!hasInitialScrollRef.current && messages.length > 0) { + // Instant on first arrival — smooth-scroll on a long history + // gets interrupted by concurrent renders and lands the panel + // mid-conversation (the chat-opens-in-middle bug class). + hasInitialScrollRef.current = true; + bottomRef.current?.scrollIntoView({ behavior: "instant" as ScrollBehavior }); + return; + } bottomRef.current?.scrollIntoView({ behavior: "smooth" }); }, [messages]); @@ -366,6 +392,27 @@ export function AgentCommsPanel({ workspaceId }: { workspaceId: string }) { return
    Loading agent communications...
    ; } + if (loadError !== null && messages.length === 0) { + // Mirrors ChatTab my-chat error UI — surfaces the load failure + // with a retry button instead of silently rendering empty state. + return ( +
    +

    + Failed to load agent communications: {loadError} +

    + +
    + ); + } + if (messages.length === 0) { return (
    diff --git a/canvas/src/components/tabs/chat/__tests__/AgentCommsPanel.render.test.tsx b/canvas/src/components/tabs/chat/__tests__/AgentCommsPanel.render.test.tsx new file mode 100644 index 00000000..80b37982 --- /dev/null +++ b/canvas/src/components/tabs/chat/__tests__/AgentCommsPanel.render.test.tsx @@ -0,0 +1,115 @@ +// @vitest-environment jsdom +import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; +import { render, screen, fireEvent, waitFor } from "@testing-library/react"; + +// API mock — tests can override per case via apiGetMock.mockImplementationOnce. +const apiGetMock = vi.fn<(url: string) => Promise>(); +vi.mock("@/lib/api", () => ({ + api: { + get: (url: string) => apiGetMock(url), + }, +})); + +// useSocketEvent — no-op for these render tests; live updates aren't +// what we're verifying here. +vi.mock("@/hooks/useSocketEvent", () => ({ + useSocketEvent: () => {}, +})); + +// Canvas store — peer name resolution. +vi.mock("@/store/canvas", () => ({ + useCanvasStore: { + getState: () => ({ + nodes: [ + { id: "ws-self", data: { name: "Self" } }, + { id: "ws-peer", data: { name: "Peer Agent" } }, + ], + }), + }, +})); + +// Toaster shim — AgentCommsPanel imports showToast. +vi.mock("../../Toaster", () => ({ + showToast: vi.fn(), +})); + +import { AgentCommsPanel } from "../AgentCommsPanel"; + +// jsdom doesn't implement scrollIntoView. Tests that observe the call +// install a spy here; tests that don't care still need a no-op stub +// so the component doesn't throw. +const scrollSpy = vi.fn<(opts?: ScrollIntoViewOptions | boolean) => void>(); +beforeEach(() => { + apiGetMock.mockReset(); + scrollSpy.mockReset(); + Element.prototype.scrollIntoView = scrollSpy as unknown as Element["scrollIntoView"]; +}); + +afterEach(() => { + vi.clearAllMocks(); +}); + +describe("AgentCommsPanel — initial-state parity with ChatTab my-chat", () => { + it("shows loading text while history fetch is in flight", () => { + apiGetMock.mockReturnValueOnce(new Promise(() => { /* never resolves */ })); + render(); + expect(screen.getByText("Loading agent communications...")).toBeDefined(); + }); + + it("renders error UI with a Retry button when the history fetch rejects", async () => { + apiGetMock.mockRejectedValueOnce(new Error("network down")); + render(); + + // Wait for the error state to render — loading→error transition is async. + const alert = await waitFor(() => screen.getByRole("alert")); + expect(alert.textContent).toMatch(/Failed to load agent communications/); + expect(alert.textContent).toMatch(/network down/); + + // Retry button must be present and trigger a refetch. + const retry = screen.getByRole("button", { name: "Retry" }); + apiGetMock.mockResolvedValueOnce([]); // success on retry + fireEvent.click(retry); + + // Two calls total: initial load + retry. Pin via mock call count. + await waitFor(() => expect(apiGetMock.mock.calls.length).toBe(2)); + }); + + it("falls back to empty-state copy when load succeeds with zero rows", async () => { + apiGetMock.mockResolvedValueOnce([]); + render(); + await waitFor(() => + expect(screen.getByText("No agent-to-agent communications yet.")).toBeDefined(), + ); + }); + + it("scrollIntoView is called with behavior=instant on the first message arrival", async () => { + apiGetMock.mockResolvedValueOnce([ + { + id: "act-1", + activity_type: "a2a_send", + source_id: "ws-self", + target_id: "ws-peer", + method: "message/send", + summary: "Delegating", + request_body: { message: { parts: [{ text: "hi" }] } }, + response_body: null, + status: "ok", + created_at: "2026-04-25T18:00:00Z", + }, + ]); + render(); + + // useLayoutEffect is what makes the first call instant — wait for + // the panel to render at least one message. + await waitFor(() => expect(scrollSpy.mock.calls.length).toBeGreaterThan(0)); + + // The pinned contract: SOME call uses behavior: "instant" — the + // first-arrival case. Subsequent appends use "smooth", but those + // can't fire here (no live update yet). + const sawInstant = scrollSpy.mock.calls.some((args) => { + const opts = args[0]; + return typeof opts === "object" && opts !== null && "behavior" in opts && opts.behavior === "instant"; + }); + expect(sawInstant).toBe(true); + }); +}); diff --git a/canvas/src/lib/__tests__/auth.test.ts b/canvas/src/lib/__tests__/auth.test.ts index ee74a521..5f9b76b3 100644 --- a/canvas/src/lib/__tests__/auth.test.ts +++ b/canvas/src/lib/__tests__/auth.test.ts @@ -2,7 +2,7 @@ * @vitest-environment jsdom */ import { describe, it, expect, vi, afterEach } from "vitest"; -import { fetchSession, redirectToLogin } from "../auth"; +import { fetchSession, redirectToLogin, signOut } from "../auth"; afterEach(() => { vi.unstubAllGlobals(); @@ -110,3 +110,157 @@ describe("redirectToLogin", () => { expect((window.location as unknown as { href: string }).href).toBe(signupHref); }); }); + +describe("signOut", () => { + // Helper — most tests need the same window.location stub. + function stubLocation(): void { + Object.defineProperty(window, "location", { + writable: true, + value: { + href: "https://acme.moleculesai.app/orgs", + pathname: "/orgs", + hostname: "acme.moleculesai.app", + protocol: "https:", + }, + }); + } + + it("POSTs to /cp/auth/signout with credentials:include", async () => { + stubLocation(); + const fetchMock = vi.fn().mockResolvedValue({ + ok: true, + status: 200, + json: async () => ({ ok: true, logout_url: "" }), + }); + vi.stubGlobal("fetch", fetchMock); + + await signOut(); + + expect(fetchMock).toHaveBeenCalledTimes(1); + expect(fetchMock).toHaveBeenCalledWith( + expect.stringContaining("/cp/auth/signout"), + expect.objectContaining({ method: "POST", credentials: "include" }), + ); + }); + + it("navigates to provider logout_url when the response includes one", async () => { + // The hosted-logout path is what actually breaks the SSO re-auth + // loop reported on PR #2913. Without this, AuthKit's browser + // cookie keeps the user signed in via SSO and any subsequent + // /cp/auth/login silently re-auths. + stubLocation(); + const hostedLogout = + "https://api.workos.com/user_management/sessions/logout?session_id=cookie&return_to=https%3A%2F%2Fapp.moleculesai.app%2Forgs"; + vi.stubGlobal( + "fetch", + vi.fn().mockResolvedValue({ + ok: true, + status: 200, + json: async () => ({ ok: true, logout_url: hostedLogout }), + }), + ); + + await signOut(); + + const after = (window.location as unknown as { href: string }).href; + expect(after).toBe(hostedLogout); + }); + + it("falls back to /cp/auth/login when logout_url is empty (DisabledProvider / dev)", async () => { + // DisabledProvider returns "" — the local /cp/auth/login redirect + // works in dev/test where there's no SSO session to escape. + stubLocation(); + vi.stubGlobal( + "fetch", + vi.fn().mockResolvedValue({ + ok: true, + status: 200, + json: async () => ({ ok: true, logout_url: "" }), + }), + ); + + await signOut(); + + const after = (window.location as unknown as { href: string }).href; + // Tenant subdomain (acme.moleculesai.app) → auth origin is app.moleculesai.app. + expect(after).toBe("https://app.moleculesai.app/cp/auth/login"); + }); + + it("redirects even when the POST fails so the user isn't stuck on an authed page", async () => { + // Critical UX invariant: clicking 'Sign out' MUST navigate away from + // the authenticated app, even if the network is down or the cookie + // is already invalid. Anything else looks like the button is + // broken — the precise complaint that triggered this fix. + stubLocation(); + vi.stubGlobal("fetch", vi.fn().mockRejectedValue(new Error("network down"))); + + await signOut(); + + const after = (window.location as unknown as { href: string }).href; + expect(after).toBe("https://app.moleculesai.app/cp/auth/login"); + }); + + it("redirects on 401 (session already invalid) just like 200", async () => { + // A user with an already-invalid cookie should still see the + // logout flow complete — no error, no stuck-on-app dead end. + // Note: 401 means res.ok=false → we don't read .json() at all, + // so a missing body is fine. + stubLocation(); + vi.stubGlobal( + "fetch", + vi.fn().mockResolvedValue({ + ok: false, + status: 401, + json: async () => ({}), + }), + ); + + await signOut(); + + const after = (window.location as unknown as { href: string }).href; + expect(after).toBe("https://app.moleculesai.app/cp/auth/login"); + }); + + it("falls back to /cp/auth/login when the response body is malformed", async () => { + // Defensive parsing: a body that isn't valid JSON, or doesn't + // have logout_url, or has logout_url as the wrong type — none of + // these should strand the user on the authed page. Fallback path + // takes over. + stubLocation(); + vi.stubGlobal( + "fetch", + vi.fn().mockResolvedValue({ + ok: true, + status: 200, + json: async () => { + throw new Error("not json"); + }, + }), + ); + + await signOut(); + + const after = (window.location as unknown as { href: string }).href; + expect(after).toBe("https://app.moleculesai.app/cp/auth/login"); + }); + + it("falls back to /cp/auth/login when logout_url is the wrong type", async () => { + // Even valid JSON should be type-checked: a non-string logout_url + // (e.g. server-side bug, version drift) must not crash or open- + // redirect the user. + stubLocation(); + vi.stubGlobal( + "fetch", + vi.fn().mockResolvedValue({ + ok: true, + status: 200, + json: async () => ({ ok: true, logout_url: 42 }), + }), + ); + + await signOut(); + + const after = (window.location as unknown as { href: string }).href; + expect(after).toBe("https://app.moleculesai.app/cp/auth/login"); + }); +}); diff --git a/canvas/src/lib/auth.ts b/canvas/src/lib/auth.ts index fe7c71ab..d091c2cb 100644 --- a/canvas/src/lib/auth.ts +++ b/canvas/src/lib/auth.ts @@ -67,3 +67,80 @@ export function redirectToLogin(screenHint: "sign-up" | "sign-in" = "sign-in"): const dest = `${authOrigin}${AUTH_BASE}/${path}?return_to=${encodeURIComponent(returnTo)}`; window.location.href = dest; } + +/** + * signOut posts to /cp/auth/signout to clear the WorkOS session cookie + * + revoke at the provider, then navigates the browser to the + * provider-supplied hosted logout URL (so the provider's BROWSER-side + * SSO cookie is cleared too — without this, AuthKit silently re-auths + * via SSO on the next /cp/auth/login and the user is "still signed + * in" after pressing Sign out). + * + * Two-layer flow: + * 1. POST /cp/auth/signout → CP clears OUR session cookie + revokes + * session_id at the provider API. Response includes + * `logout_url` — the AuthKit hosted URL the BROWSER must navigate + * to so the provider's own browser cookie is cleared. + * 2. window.location.href = → AuthKit clears its + * session, then redirects the browser to the configured + * return_to (defaults to APP_URL/orgs). + * + * Best-effort by design: a 5xx, network failure, missing logout_url + * (DisabledProvider, dev), or stale cookie still results in the + * browser navigating away — leaving the user on a logged-in-looking + * page after they clicked "Sign out" is the worst possible UX. The + * fallback path navigates to /cp/auth/login on the auth origin, which + * works correctly in environments without a hosted logout flow (dev, + * tests, DisabledProvider). + * + * Throws nothing — callers can disable the button optimistically or + * await this and trust it returns. On a redirect-blocked test + * environment (jsdom under vitest) we still exit cleanly so unit tests + * can spy on the fetch call. + */ +export async function signOut(): Promise { + let logoutURL: string | undefined; + // Fire-and-tolerate the POST. credentials:include is mandatory cross- + // origin so the SaaS canvas (acme.moleculesai.app) can hit + // app.moleculesai.app/cp/auth/signout with the session cookie. + try { + const res = await fetch(`${getAuthOrigin()}${AUTH_BASE}/signout`, { + method: "POST", + credentials: "include", + }); + if (res.ok) { + // Body shape: {"ok": true, "logout_url": "..."}. logout_url is + // empty for DisabledProvider (dev/local) — we fall back to + // /cp/auth/login below. Defensive parsing: a malformed body + // shouldn't strand the user on the authed page. + const body: unknown = await res.json().catch(() => null); + if ( + body && + typeof body === "object" && + "logout_url" in body && + typeof (body as { logout_url: unknown }).logout_url === "string" && + (body as { logout_url: string }).logout_url + ) { + logoutURL = (body as { logout_url: string }).logout_url; + } + } + } catch { + // Ignore — we still redirect below. + } + if (typeof window === "undefined") return; + if (logoutURL) { + // Hosted logout: AuthKit clears its SSO cookie + redirects to + // return_to (configured server-side). This is the path that + // actually breaks the SSO re-auth loop. + window.location.href = logoutURL; + return; + } + // Fallback: no hosted logout (dev, DisabledProvider, network + // failure). Land on the login screen rather than the current URL: + // returning to a tenant URL after signout would just re-redirect + // through /cp/auth/login due to AuthGate. Send the user straight + // there with no return_to so they don't loop back into the org they + // just left. + const authOrigin = getAuthOrigin(); + window.location.href = `${authOrigin}${AUTH_BASE}/login`; +} diff --git a/docs/agent-runtime/team-expansion.md b/docs/agent-runtime/team-expansion.md deleted file mode 100644 index 5785dd13..00000000 --- a/docs/agent-runtime/team-expansion.md +++ /dev/null @@ -1,111 +0,0 @@ -# Team Expansion (Recursive Workspaces) - -When a workspace is expanded into a team, it gains sub-workspaces while its own agent remains as the **team lead** (coordinator). This is recursive — sub-workspaces can themselves be expanded into teams, infinitely deep. - -## How It Works - -When Developer PM is expanded into a team: - -``` -Business Core - | - +-- Developer PM (agent stays, becomes coordinator) - | - +-- Frontend Agent (sub-workspace, private scope) - +-- Backend Agent (sub-workspace, private scope) - +-- QA Agent (sub-workspace, private scope) -``` - -- Developer PM's agent **still exists** and acts as coordinator -- Developer PM receives incoming A2A messages from Business Core -- Developer PM's agent decides how to delegate to sub-workspaces -- Sub-workspaces talk to Developer PM and to each other (same level) -- Sub-workspaces **cannot** talk to Business Core or any workspace outside the team - -## Communication Rules - -| Direction | Allowed? | Example | -|-----------|----------|---------| -| Parent level -> team lead | Yes | Business Core -> Developer PM | -| Team lead -> sub-workspaces | Yes | Developer PM -> Frontend Agent | -| Sub-workspace -> team lead | Yes | Frontend Agent -> Developer PM | -| Sub-workspace <-> sibling | Yes | Frontend Agent <-> Backend Agent | -| Outside -> sub-workspace directly | No (403) | Business Core -> Frontend Agent | -| Sub-workspace -> outside directly | No | Frontend Agent -> Business Core | - -The team lead (Developer PM) is the **only** bridge between the team's internal world and the outside. - -## Scoped Registry - -Sub-workspaces register in the platform registry but with a **private scope**. The registry knows about them but enforces access control. - -``` -Registry: - Business Core :8001 scope: public - Developer PM :8002 scope: public - Frontend Agent :8010 scope: private, parent=Developer PM - Backend Agent :8011 scope: private, parent=Developer PM - QA Agent :8012 scope: private, parent=Developer PM -``` - -- The platform can always discover any workspace (for provisioning, monitoring) -- The parent workspace can discover its sub-workspaces -- Sub-workspaces can discover their siblings (same parent) -- Outside workspaces get a **403 Forbidden** if they try to discover a private sub-workspace - -## How to Expand - -Expansion is triggered via `POST /workspaces/:id/expand`. The platform reads the `sub_workspaces` list from the workspace's config and provisions each one. On the canvas, users right-click a workspace node and select "Expand into team." - -Collapsing is the inverse: `POST /workspaces/:id/collapse`. Sub-workspaces are stopped and removed. - -## What Happens on Expansion - -When Developer PM is expanded into a team, the hierarchy changes but the outside view doesn't. Business Core's parent/child relationship to Developer PM is unaffected — Developer PM still responds to the same A2A endpoint. - -The events fired: -- `WORKSPACE_EXPANDED` with the new `sub_workspace_ids` in the payload -- `WORKSPACE_PROVISIONING` for each new sub-workspace -- `WORKSPACE_ONLINE` for each sub-workspace as they come up - -Communication rules are automatically derived from the new hierarchy — no manual wiring needed. - -## Canvas Behavior - -- Children render as embedded mini-cards (`TeamMemberChip`) inside the parent node, not as separate canvas nodes -- Each mini-card shows full status: gradient bar, name, tier badge, skills pills, active tasks, descendant count -- **Recursive rendering** up to 3 levels deep (`MAX_NESTING_DEPTH = 3`) — sub-cards can contain their own "Team" sections -- Parent node dynamically resizes: 210-280px (no children), 320-450px (children), 400-560px (grandchildren) -- Eject button (sky-blue arrow icon) on hover extracts a child from the team -- "Extract from Team" also available in the right-click context menu -- Double-click a team node to zoom/fit to the parent area -- The parent workspace node shows a badge with total descendant count - -## Collapsing a Team - -The inverse of expansion, triggered via `POST /workspaces/:id/collapse`: - -1. Each sub-workspace agent wraps up current work and writes a handoff document to memory -2. Sub-workspaces are stopped and removed -3. The team lead's agent goes back to handling everything directly -4. A `WORKSPACE_COLLAPSED` event fires - -Sub-workspace memory is cleaned up based on backend (see [Memory — Cleanup](../architecture/memory.md#cleanup-on-workspace-deletion)). - -## Deleting a Team Workspace - -When a team workspace is deleted: -1. Platform shows a warning listing all sub-workspaces that will be deleted -2. User can **drag sub-workspaces out** of the team before confirming (promotes them to the parent level) -3. On confirmation, cascade delete removes the parent and all remaining sub-workspaces -4. `WORKSPACE_REMOVED` events fire for each deleted workspace - -## Related Docs - -- [Communication Rules](../api-protocol/communication-rules.md) — Full access control model -- [Core Concepts](../product/core-concepts.md) — Workspace fundamentals -- [System Prompt Structure](./system-prompt-structure.md) — How peer capabilities are injected -- [Provisioner](../architecture/provisioner.md) — How sub-workspaces are deployed -- [Registry & Heartbeat](../api-protocol/registry-and-heartbeat.md) — How registration works -- [Event Log](../architecture/event-log.md) — Events fired during expansion -- [Canvas UI](../frontend/canvas.md) — Visual behavior of teams diff --git a/docs/api-reference.md b/docs/api-reference.md index e1a75668..12e94a3c 100644 --- a/docs/api-reference.md +++ b/docs/api-reference.md @@ -41,8 +41,6 @@ Full contract: `docs/runbooks/admin-auth.md`. | GET | /admin/workspaces/:id/test-token | admin_test_token.go — mint a fresh bearer token for E2E scripts; returns 404 unless `MOLECULE_ENV != production` or `MOLECULE_ENABLE_TEST_TOKENS=1` | | GET/POST/DELETE | /admin/secrets[/:key] | secrets.go — legacy aliases for /settings/secrets | | WS | /workspaces/:id/terminal | terminal.go | -| POST | /workspaces/:id/expand | team.go | -| POST | /workspaces/:id/collapse | team.go | | POST/GET | /workspaces/:id/approvals | approvals.go | | POST | /workspaces/:id/approvals/:id/decide | approvals.go | | GET | /approvals/pending | approvals.go | diff --git a/docs/architecture/molecule-technical-doc.md b/docs/architecture/molecule-technical-doc.md index 0d9c653c..cd3dc957 100644 --- a/docs/architecture/molecule-technical-doc.md +++ b/docs/architecture/molecule-technical-doc.md @@ -336,8 +336,6 @@ This same logic governs: A2A delegation, memory scope enforcement, activity visi | Method | Endpoint | Purpose | |--------|----------|---------| -| `POST` | `/workspaces/:id/expand` | Expand workspace into team (become coordinator) | -| `POST` | `/workspaces/:id/collapse` | Collapse team back to single workspace | ### Files, Terminal, Templates, Bundles (8 endpoints) diff --git a/docs/frontend/canvas.md b/docs/frontend/canvas.md index 8d59c80f..fc103bd6 100644 --- a/docs/frontend/canvas.md +++ b/docs/frontend/canvas.md @@ -186,4 +186,3 @@ So the UI now exposes more operational failure state directly instead of silentl - [Quickstart](../quickstart.md) - [Platform API](../api-protocol/platform-api.md) - [Workspace Runtime](../agent-runtime/workspace-runtime.md) -- [Team Expansion](../agent-runtime/team-expansion.md) diff --git a/docs/glossary.md b/docs/glossary.md index f0343a38..b3535ae8 100644 --- a/docs/glossary.md +++ b/docs/glossary.md @@ -18,7 +18,7 @@ lands in the watch list with a colliding term, add a row here. | **plugin** | A directory under `plugins/` packaging one or more skills or an MCP server wrapper, installable per-workspace via `POST /workspaces/:id/plugins`. Governed by `plugin.yaml`. | **Langflow**: a visual UI node / component in a flowchart. **CrewAI**: a Python-importable callable registered as a capability. | | **agent** | A persistent containerized workspace running continuously — an identity with memory, a role, and a schedule. Not a one-shot invocation. | Most frameworks (AutoGPT, LangChain agents, OpenAI Assistants): a stateless function-call loop. No persistence between invocations unless explicitly checkpointed. | | **flow** | A task execution within a workspace — a request enters, the agent runs tools, emits a response, logs activity. No explicit graph abstraction. | **Langflow**: a directed graph of nodes you author visually. **LangGraph**: a stateful graph of callable nodes. Our "flow" is an imperative timeline, not a graph. | -| **team** | A named cluster of workspaces under a PM (org template `expand_team`). Used for role grouping in Canvas. | **CrewAI**: a "crew" is a sequence of agents that pass a task through a declared order. Our "team" is an org-chart abstraction, not an execution order. | +| **team** | A named cluster of workspaces under a PM . Used for role grouping in Canvas. | **CrewAI**: a "crew" is a sequence of agents that pass a task through a declared order. Our "team" is an org-chart abstraction, not an execution order. | | **skill** | A directory with `SKILL.md` that an agent invokes via the `Skill` tool. Skills are documentation + optional scripts that teach an agent a recipe. | **Anthropic Skills API**: nearly identical. **CrewAI tool**: closer to our plugin's MCP tool, not our skill. | | **channel** | An outbound/inbound social integration (Telegram, Slack, …) per-workspace, wired in `workspace_channels`. | Slack's "channel": the container for messages. We use "channel" for the adapter + credentials, not the conversation itself. | | **runtime** | The execution engine image tag for a workspace: one of `langgraph`, `claude-code`, `openclaw`, `crewai`, `autogen`, `deepagents`, `hermes`. | **LangGraph runtime**: the Python process running the graph. We use "runtime" for the Docker image + adapter pairing, not the inner process. | diff --git a/docs/guides/mcp-server-setup.md b/docs/guides/mcp-server-setup.md index aacc554a..5539ba97 100644 --- a/docs/guides/mcp-server-setup.md +++ b/docs/guides/mcp-server-setup.md @@ -166,8 +166,6 @@ list_workspaces | MCP Tool | API Route | Method | Description | |----------|-----------|--------|-------------| -| `expand_team` | `/workspaces/:id/expand` | POST | Expand team node | -| `collapse_team` | `/workspaces/:id/collapse` | POST | Collapse team node | ### Templates & Bundles diff --git a/scripts/build_runtime_package.py b/scripts/build_runtime_package.py index 84636c2b..e4ced302 100755 --- a/scripts/build_runtime_package.py +++ b/scripts/build_runtime_package.py @@ -73,11 +73,15 @@ TOP_LEVEL_MODULES = { "executor_helpers", "heartbeat", "inbox", + "inbox_uploads", "initial_prompt", "internal_chat_uploads", "internal_file_read", "main", "mcp_cli", + "mcp_heartbeat", + "mcp_inbox_pollers", + "mcp_workspace_resolver", "molecule_ai_status", "not_configured_handler", "platform_auth", diff --git a/tests/e2e/test_poll_mode_chat_upload_e2e.sh b/tests/e2e/test_poll_mode_chat_upload_e2e.sh new file mode 100755 index 00000000..fbed604f --- /dev/null +++ b/tests/e2e/test_poll_mode_chat_upload_e2e.sh @@ -0,0 +1,295 @@ +#!/usr/bin/env bash +# E2E for poll-mode chat upload (RFC #2891 phases 1-5b). +# +# Round-trip: register a workspace as poll-mode (no callback URL) → POST a +# multi-file chat upload → verify each file becomes (a) one +# `chat_upload_receive` activity row and (b) one /pending-uploads row → fetch +# the bytes back via the poll endpoint → ack → verify the row 404s on +# subsequent fetch. Also pins cross-workspace bleed protection: workspace B +# cannot read workspace A's pending uploads even with its own valid bearer. +# +# Why this exists separately from test_chat_upload_e2e.sh: that script +# covers the PUSH path (the workspace's own /internal/chat/uploads/ingest). +# This script covers the POLL path: the same canvas-side request lands on +# the platform's pendinguploads.Storage instead, and the workspace fetches +# it later. The two paths share zero handler code on the platform side, so +# both need their own E2E. +# +# Requires: platform running on localhost:8080 with migrations applied. +# bash workspace-server/scripts/dev-start.sh +# bash workspace-server/scripts/run-migrations.sh +# +# Idempotent: each run uses fresh per-script workspace UUIDs so reruns +# don't collide. Best-effort cleanup on EXIT — does NOT call +# e2e_cleanup_all_workspaces (see +# `feedback_never_run_cluster_cleanup_tests_on_live_platform.md`). + +set -euo pipefail + +source "$(dirname "$0")/_lib.sh" + +PASS=0 +FAIL=0 +TIMEOUT="${A2A_TIMEOUT:-30}" + +gen_uuid() { + if command -v uuidgen >/dev/null 2>&1; then + uuidgen | tr '[:upper:]' '[:lower:]' + else + python3 -c 'import uuid; print(uuid.uuid4())' + fi +} +WS_A="$(gen_uuid)" +WS_B="$(gen_uuid)" + +# Per-run scratch dir collected under one trap so every assertion-failure +# path drops the temp files it made (see test_chat_attachments_e2e.sh). +TMPDIR_E2E=$(mktemp -d -t poll-chat-upload-e2e-XXXXXX) + +cleanup() { + local rc=$? + curl -s -X DELETE "$BASE/workspaces/$WS_A?confirm=true" >/dev/null 2>&1 || true + curl -s -X DELETE "$BASE/workspaces/$WS_B?confirm=true" >/dev/null 2>&1 || true + rm -rf "$TMPDIR_E2E" + exit $rc +} +trap cleanup EXIT INT TERM + +check() { + local desc="$1" expected="$2" actual="$3" + if echo "$actual" | grep -qF -- "$expected"; then + echo "PASS: $desc" + PASS=$((PASS + 1)) + else + echo "FAIL: $desc" + echo " expected to contain: $expected" + echo " got: $(echo "$actual" | head -10)" + FAIL=$((FAIL + 1)) + fi +} + +check_eq() { + local desc="$1" expected="$2" actual="$3" + if [ "$actual" = "$expected" ]; then + echo "PASS: $desc" + PASS=$((PASS + 1)) + else + echo "FAIL: $desc" + echo " expected: $expected" + echo " got: $actual" + FAIL=$((FAIL + 1)) + fi +} + +echo "=== Poll-Mode Chat Upload E2E ===" +echo " base: $BASE" +echo " workspace A: $WS_A" +echo " workspace B: $WS_B" +echo "" + +# ---------- Phase 1: register poll-mode workspace ---------- +echo "--- Phase 1: Register poll-mode workspace A ---" + +REG_A=$(curl -s -X POST "$BASE/registry/register" \ + -H "Content-Type: application/json" \ + -d "{ + \"id\": \"$WS_A\", + \"delivery_mode\": \"poll\", + \"agent_card\": {\"name\": \"poll-chat-upload-test-a\"} + }") +check "register accepts poll mode without URL" '"status":"registered"' "$REG_A" +TOK_A=$(echo "$REG_A" | e2e_extract_token || true) +[ -n "$TOK_A" ] || { echo "FAIL: no auth_token in register response (ws A)"; FAIL=$((FAIL + 1)); exit 1; } + +# ---------- Phase 2: multi-file chat upload ---------- +echo "" +echo "--- Phase 2: POST /chat/uploads with two files ---" + +FILE1="$TMPDIR_E2E/alpha.txt" +FILE2="$TMPDIR_E2E/beta.txt" +EXPECTED1="alpha-secret-$(openssl rand -hex 4)" +EXPECTED2="beta-secret-$(openssl rand -hex 4)" +printf '%s' "$EXPECTED1" > "$FILE1" +printf '%s' "$EXPECTED2" > "$FILE2" + +UPLOAD=$(curl -s -X POST "$BASE/workspaces/$WS_A/chat/uploads" \ + -H "Authorization: Bearer $TOK_A" \ + -F "files=@$FILE1;filename=alpha.txt;type=text/plain" \ + -F "files=@$FILE2;filename=beta.txt;type=text/plain" \ + -w "\nHTTP_CODE=%{http_code}\n") +UPLOAD_CODE=$(echo "$UPLOAD" | grep -oE 'HTTP_CODE=[0-9]+' | cut -d= -f2) +UPLOAD_BODY=$(echo "$UPLOAD" | sed '/^HTTP_CODE=/,$d') + +check_eq "upload returns 200" "200" "$UPLOAD_CODE" +check "upload response has files array" '"files":' "$UPLOAD_BODY" + +# Pull file_ids out of the URI in the response. URI shape is +# `platform-pending:/` — proves the response came from the +# poll-mode branch, not the push-mode internal-ingest branch. +URI1=$(echo "$UPLOAD_BODY" | python3 -c 'import sys,json; d=json.load(sys.stdin); print(d["files"][0]["uri"])') +URI2=$(echo "$UPLOAD_BODY" | python3 -c 'import sys,json; d=json.load(sys.stdin); print(d["files"][1]["uri"])') +check "URI 1 has platform-pending: scheme" "platform-pending:$WS_A/" "$URI1" +check "URI 2 has platform-pending: scheme" "platform-pending:$WS_A/" "$URI2" + +FID1="${URI1##*/}" +FID2="${URI2##*/}" +[ -n "$FID1" ] && [ -n "$FID2" ] || { echo "FAIL: could not extract file IDs"; FAIL=$((FAIL + 1)); exit 1; } +echo " file_id 1: $FID1" +echo " file_id 2: $FID2" + +# ---------- Phase 3: activity rows visible to the workspace ---------- +echo "" +echo "--- Phase 3: /activity shows two chat_upload_receive rows ---" + +# activity_logs INSERTs run in a goroutine — give them a moment. +sleep 1 +ACT=$(curl -s --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \ + "$BASE/workspaces/$WS_A/activity?type=a2a_receive&limit=20") +check "activity feed has the alpha file" "$FID1" "$ACT" +check "activity feed has the beta file" "$FID2" "$ACT" +check "activity rows tagged chat_upload_receive" '"method":"chat_upload_receive"' "$ACT" +check "activity rows record alpha mimetype" '"mimeType":"text/plain"' "$ACT" + +CHAT_UPLOAD_COUNT=$(echo "$ACT" | python3 -c ' +import json, sys +rows = json.load(sys.stdin) +n = sum(1 for r in rows if (r.get("method") or "") == "chat_upload_receive") +print(n) +') +check_eq "exactly two chat_upload_receive rows" "2" "$CHAT_UPLOAD_COUNT" + +# ---------- Phase 4: GET /pending-uploads/:file_id/content ---------- +echo "" +echo "--- Phase 4: Fetch content for each pending upload ---" + +GOT1=$(curl -s --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \ + "$BASE/workspaces/$WS_A/pending-uploads/$FID1/content") +check_eq "alpha bytes round-trip" "$EXPECTED1" "$GOT1" + +GOT2=$(curl -s --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \ + "$BASE/workspaces/$WS_A/pending-uploads/$FID2/content") +check_eq "beta bytes round-trip" "$EXPECTED2" "$GOT2" + +# Mimetype + Content-Disposition headers should match what was uploaded. +HEAD1=$(curl -s -D - -o /dev/null --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \ + "$BASE/workspaces/$WS_A/pending-uploads/$FID1/content") +check "alpha response carries text/plain Content-Type" "Content-Type: text/plain" "$HEAD1" +check "alpha response carries Content-Disposition with filename" 'filename="alpha.txt"' "$HEAD1" + +# ---------- Phase 5: idempotent re-fetch (until ack) ---------- +echo "" +echo "--- Phase 5: Re-fetch before ack returns the same bytes ---" + +RE_GOT1=$(curl -s --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \ + "$BASE/workspaces/$WS_A/pending-uploads/$FID1/content") +check_eq "re-fetch returns same alpha bytes" "$EXPECTED1" "$RE_GOT1" + +# ---------- Phase 6: ack each row ---------- +echo "" +echo "--- Phase 6: Ack each pending upload ---" + +ACK1=$(curl -s -X POST --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \ + "$BASE/workspaces/$WS_A/pending-uploads/$FID1/ack") +check "alpha ack returns acked:true" '"acked":true' "$ACK1" + +ACK2=$(curl -s -X POST --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \ + "$BASE/workspaces/$WS_A/pending-uploads/$FID2/ack") +check "beta ack returns acked:true" '"acked":true' "$ACK2" + +# Re-ack should still 200 (idempotent — the row's gone but the workspace's +# at-least-once intent was already honored, and the second ack hits the +# raced path which also returns 200). +RE_ACK1=$(curl -s -w '\n%{http_code}' -X POST --max-time "$TIMEOUT" \ + -H "Authorization: Bearer $TOK_A" \ + "$BASE/workspaces/$WS_A/pending-uploads/$FID1/ack") +RE_ACK1_CODE=$(printf '%s' "$RE_ACK1" | tail -n1) +# Acked rows return 404 on Get-before-Ack (the row's still in the table +# but Get filters acked_at IS NULL); workspace would not normally re-ack +# since it already saw the success. Accept both 200 and 404 here so the +# test pins the contract without being brittle on the inner ordering. +case "$RE_ACK1_CODE" in + 200|404) + echo "PASS: re-ack returns 200 or 404 ($RE_ACK1_CODE)" + PASS=$((PASS + 1)) + ;; + *) + echo "FAIL: re-ack returned unexpected $RE_ACK1_CODE" + FAIL=$((FAIL + 1)) + ;; +esac + +# ---------- Phase 7: GET content after ack returns 404 ---------- +echo "" +echo "--- Phase 7: Acked file 404s on subsequent fetch ---" + +POST_ACK=$(curl -s -w '\n%{http_code}' --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \ + "$BASE/workspaces/$WS_A/pending-uploads/$FID1/content") +POST_ACK_CODE=$(printf '%s' "$POST_ACK" | tail -n1) +check_eq "acked alpha returns HTTP 404" "404" "$POST_ACK_CODE" + +# ---------- Phase 8: cross-workspace bleed protection ---------- +echo "" +echo "--- Phase 8: Workspace B cannot read workspace A's pending uploads ---" + +# Stage a fresh upload on workspace A so we have an UN-acked row to probe. +PROBE_FILE="$TMPDIR_E2E/probe.txt" +printf '%s' "probe-bytes-$(openssl rand -hex 4)" > "$PROBE_FILE" +PROBE_UP=$(curl -s -X POST "$BASE/workspaces/$WS_A/chat/uploads" \ + -H "Authorization: Bearer $TOK_A" \ + -F "files=@$PROBE_FILE;filename=probe.txt;type=text/plain") +PROBE_FID=$(echo "$PROBE_UP" | python3 -c 'import sys,json; d=json.load(sys.stdin); print(d["files"][0]["uri"].split("/")[-1])') +[ -n "$PROBE_FID" ] || { echo "FAIL: probe upload returned no file_id"; FAIL=$((FAIL + 1)); exit 1; } + +# Register a SECOND poll-mode workspace and capture its bearer. +REG_B=$(curl -s -X POST "$BASE/registry/register" \ + -H "Content-Type: application/json" \ + -d "{ + \"id\": \"$WS_B\", + \"delivery_mode\": \"poll\", + \"agent_card\": {\"name\": \"poll-chat-upload-test-b\"} + }") +check "second workspace registers" '"status":"registered"' "$REG_B" +TOK_B=$(echo "$REG_B" | e2e_extract_token || true) +[ -n "$TOK_B" ] || { echo "FAIL: no auth_token (ws B)"; FAIL=$((FAIL + 1)); exit 1; } + +# B's bearer hitting B's URL with A's file_id → 404 (handler checks the row's +# workspace_id matches the URL :id, not the bearer's workspace). +CROSS_RESP=$(curl -s -w '\n%{http_code}' --max-time "$TIMEOUT" \ + -H "Authorization: Bearer $TOK_B" \ + "$BASE/workspaces/$WS_B/pending-uploads/$PROBE_FID/content") +CROSS_CODE=$(printf '%s' "$CROSS_RESP" | tail -n1) +check_eq "B's URL with A's file_id returns 404" "404" "$CROSS_CODE" + +# B's bearer hitting A's URL → 401 (wsAuth pins bearer to :id). This is the +# strictest cross-workspace check: a presented-but-wrong bearer is rejected +# in EVERY platform posture (dev-mode fail-open only triggers when no bearer +# is presented at all — invalid tokens always 401). +WRONG_BEARER=$(curl -s -w '\n%{http_code}' --max-time "$TIMEOUT" \ + -H "Authorization: Bearer $TOK_B" \ + "$BASE/workspaces/$WS_A/pending-uploads/$PROBE_FID/content") +WRONG_CODE=$(printf '%s' "$WRONG_BEARER" | tail -n1) +check_eq "B's bearer on A's URL returns 401" "401" "$WRONG_CODE" + +# NB: a fully bearerless request to /pending-uploads/:fid/content returns +# 401 ONLY when the platform has MOLECULE_ENV != development (production / +# staging). On local-dev with MOLECULE_ENV=development the wsauth middleware +# fail-opens for bearerless requests so the canvas at :3000 can talk to the +# platform at :8080 without per-call token plumbing — see middleware/ +# devmode.go. The strict bearerless-401 contract is covered by the wsauth +# unit + middleware tests; we don't reassert it here because the result +# depends on platform posture, not the poll-mode upload contract. + +# ---------- Phase 9: invalid file_id rejected at the URL parser ---------- +echo "" +echo "--- Phase 9: Invalid file_id returns 400 ---" + +BAD_FID=$(curl -s -w '\n%{http_code}' --max-time "$TIMEOUT" \ + -H "Authorization: Bearer $TOK_A" \ + "$BASE/workspaces/$WS_A/pending-uploads/not-a-uuid/content") +BAD_FID_CODE=$(printf '%s' "$BAD_FID" | tail -n1) +check_eq "invalid file_id UUID returns 400" "400" "$BAD_FID_CODE" + +# ---------- Results ---------- +echo "" +echo "=== Results: $PASS passed, $FAIL failed ===" +[ "$FAIL" -eq 0 ] diff --git a/tests/harness/compose.yml b/tests/harness/compose.yml index debbb675..e209287d 100644 --- a/tests/harness/compose.yml +++ b/tests/harness/compose.yml @@ -94,6 +94,13 @@ services: CP_UPSTREAM_URL: "http://cp-stub:9090" RATE_LIMIT: "1000" CANVAS_PROXY_URL: "http://localhost:3000" + # Memory v2 sidecar (PR #2906) bundles the plugin into the + # tenant image and starts it before the main server. The plugin + # runs `CREATE EXTENSION vector` on first boot, which fails on + # the harness's plain postgres:15-alpine (no pgvector). The + # harness doesn't exercise memory features, so disable the + # sidecar via the entrypoint's documented escape hatch. + MEMORY_PLUGIN_DISABLE: "1" networks: [harness-net] healthcheck: test: ["CMD-SHELL", "wget -q -O- http://localhost:8080/health || exit 1"] @@ -142,6 +149,13 @@ services: CP_UPSTREAM_URL: "http://cp-stub:9090" RATE_LIMIT: "1000" CANVAS_PROXY_URL: "http://localhost:3000" + # Memory v2 sidecar (PR #2906) bundles the plugin into the + # tenant image and starts it before the main server. The plugin + # runs `CREATE EXTENSION vector` on first boot, which fails on + # the harness's plain postgres:15-alpine (no pgvector). The + # harness doesn't exercise memory features, so disable the + # sidecar via the entrypoint's documented escape hatch. + MEMORY_PLUGIN_DISABLE: "1" networks: [harness-net] healthcheck: test: ["CMD-SHELL", "wget -q -O- http://localhost:8080/health || exit 1"] diff --git a/workspace-server/Dockerfile b/workspace-server/Dockerfile index 7065e405..d6754312 100644 --- a/workspace-server/Dockerfile +++ b/workspace-server/Dockerfile @@ -21,6 +21,14 @@ ARG GIT_SHA=dev RUN CGO_ENABLED=0 GOOS=linux go build \ -ldflags "-X github.com/Molecule-AI/molecule-monorepo/platform/internal/buildinfo.GitSHA=${GIT_SHA}" \ -o /platform ./cmd/server +# Bundle the built-in memory-plugin-postgres binary so an operator can +# activate Memory v2 by setting MEMORY_V2_CUTOVER=true + (default) +# MEMORY_PLUGIN_URL=http://localhost:9100. The entrypoint starts this +# binary in the background; main /platform talks to it over loopback. +# Stays inert until the operator flips the cutover env var. +RUN CGO_ENABLED=0 GOOS=linux go build \ + -ldflags "-X github.com/Molecule-AI/molecule-monorepo/platform/internal/buildinfo.GitSHA=${GIT_SHA}" \ + -o /memory-plugin ./cmd/memory-plugin-postgres # Clone templates + plugins at build time from manifest.json FROM alpine:3.20 AS templates @@ -30,8 +38,9 @@ COPY scripts/clone-manifest.sh /scripts/clone-manifest.sh RUN chmod +x /scripts/clone-manifest.sh && /scripts/clone-manifest.sh /manifest.json /workspace-configs-templates /org-templates /plugins FROM alpine:3.20 -RUN apk add --no-cache ca-certificates git tzdata +RUN apk add --no-cache ca-certificates git tzdata wget COPY --from=builder /platform /platform +COPY --from=builder /memory-plugin /memory-plugin COPY workspace-server/migrations /migrations COPY --from=templates /workspace-configs-templates /workspace-configs-templates COPY --from=templates /org-templates /org-templates @@ -41,6 +50,7 @@ RUN addgroup -g 1000 platform && adduser -u 1000 -G platform -s /bin/sh -D platf EXPOSE 8080 COPY <<'ENTRY' /entrypoint.sh #!/bin/sh +# Set up docker-socket group (unchanged from pre-sidecar entrypoint). if [ -S /var/run/docker.sock ]; then SOCK_GID=$(stat -c '%g' /var/run/docker.sock 2>/dev/null || stat -f '%g' /var/run/docker.sock 2>/dev/null) if [ -n "$SOCK_GID" ] && [ "$SOCK_GID" != "0" ]; then @@ -50,6 +60,61 @@ if [ -S /var/run/docker.sock ]; then addgroup platform root 2>/dev/null || true fi fi + +# Memory v2 sidecar (built-in postgres plugin). Co-located with the +# main server so operators flipping MEMORY_V2_CUTOVER=true don't need +# to provision a separate service. +# +# Spawn-gating: only start the sidecar when the operator has indicated +# they want it — either MEMORY_V2_CUTOVER=true OR MEMORY_PLUGIN_URL set. +# Without that signal, the sidecar adds zero value (the platform's +# wiring.go skips building the client too) but pays a real cost: the +# plugin's first migration runs `CREATE EXTENSION vector`, which fails +# on tenant Postgres without pgvector preinstalled and aborts container +# boot via the 30s health gate. Caught on staging redeploy 2026-05-05. +# +# Env defaults (when sidecar IS spawned): +# MEMORY_PLUGIN_DATABASE_URL = $DATABASE_URL (share existing Postgres; +# plugin's `memory_namespaces` / `memory_records` tables coexist +# with `agent_memories` and the rest of the platform schema — +# no conflicts. Operator can override with a separate URL.) +# MEMORY_PLUGIN_LISTEN_ADDR = 127.0.0.1:9100 +# +# Set MEMORY_PLUGIN_DISABLE=1 to force-skip the sidecar even with +# cutover env set (e.g. running the plugin externally on a separate host). +memory_plugin_wanted="" +if [ "$MEMORY_V2_CUTOVER" = "true" ] || [ -n "$MEMORY_PLUGIN_URL" ]; then + memory_plugin_wanted=1 +fi +if [ -z "$MEMORY_PLUGIN_DISABLE" ] && [ -n "$memory_plugin_wanted" ] && [ -n "$DATABASE_URL" ]; then + : "${MEMORY_PLUGIN_DATABASE_URL:=$DATABASE_URL}" + : "${MEMORY_PLUGIN_LISTEN_ADDR:=:9100}" + export MEMORY_PLUGIN_DATABASE_URL MEMORY_PLUGIN_LISTEN_ADDR + echo "memory-plugin: starting sidecar on $MEMORY_PLUGIN_LISTEN_ADDR" >&2 + # Drop privs to the platform user — the plugin doesn't need root and + # runs unprivileged elsewhere (tenant image already starts as canvas). + su-exec platform /memory-plugin & + MEMORY_PLUGIN_PID=$! + # Wait up to 30s for the plugin's /v1/health to return 200. Boot + # failure here is fatal — better to crash-loop than to silently + # serve cutover traffic against a dead plugin. + health_port=${MEMORY_PLUGIN_LISTEN_ADDR#:} + ready=0 + for _ in $(seq 1 30); do + if wget -qO- --timeout=2 "http://localhost:${health_port}/v1/health" >/dev/null 2>&1; then + ready=1 + break + fi + sleep 1 + done + if [ "$ready" != "1" ]; then + echo "memory-plugin: ❌ /v1/health never returned 200 after 30s — aborting boot. Check that DATABASE_URL is reachable, has the pgvector extension, and the plugin's migrations applied." >&2 + kill "$MEMORY_PLUGIN_PID" 2>/dev/null || true + exit 1 + fi + echo "memory-plugin: ✅ sidecar healthy on :$health_port" >&2 +fi + exec su-exec platform /platform "$@" ENTRY RUN chmod +x /entrypoint.sh && apk add --no-cache su-exec diff --git a/workspace-server/Dockerfile.tenant b/workspace-server/Dockerfile.tenant index 23140a67..6ccc737e 100644 --- a/workspace-server/Dockerfile.tenant +++ b/workspace-server/Dockerfile.tenant @@ -34,6 +34,13 @@ ARG GIT_SHA=dev RUN CGO_ENABLED=0 GOOS=linux go build \ -ldflags "-X github.com/Molecule-AI/molecule-monorepo/platform/internal/buildinfo.GitSHA=${GIT_SHA}" \ -o /platform ./cmd/server +# Memory v2 sidecar binary (Memory v2 #2728). Bundled so an operator +# can activate cutover by flipping MEMORY_V2_CUTOVER=true without +# provisioning a separate service. See entrypoint-tenant.sh for the +# launch logic. +RUN CGO_ENABLED=0 GOOS=linux go build \ + -ldflags "-X github.com/Molecule-AI/molecule-monorepo/platform/internal/buildinfo.GitSHA=${GIT_SHA}" \ + -o /memory-plugin ./cmd/memory-plugin-postgres # ── Stage 2: Canvas Next.js standalone ──────────────────────────────── FROM node:20-alpine AS canvas-builder @@ -74,8 +81,9 @@ RUN deluser --remove-home node 2>/dev/null || true; \ delgroup node 2>/dev/null || true; \ addgroup -g 1000 canvas && adduser -u 1000 -G canvas -s /bin/sh -D canvas -# Go platform binary +# Go platform binary + Memory v2 sidecar COPY --from=go-builder /platform /platform +COPY --from=go-builder /memory-plugin /memory-plugin COPY workspace-server/migrations /migrations # Templates + plugins (cloned from GitHub in stage 3) @@ -91,7 +99,7 @@ COPY --from=canvas-builder /canvas/public ./public COPY workspace-server/entrypoint-tenant.sh /entrypoint.sh RUN chmod +x /entrypoint.sh && \ - chown -R canvas:canvas /canvas /platform /migrations + chown -R canvas:canvas /canvas /platform /memory-plugin /migrations EXPOSE 8080 # entrypoint.sh starts as root to fix volume perms, then drops to diff --git a/workspace-server/cmd/memory-plugin-postgres/config_test.go b/workspace-server/cmd/memory-plugin-postgres/config_test.go new file mode 100644 index 00000000..252f0d1b --- /dev/null +++ b/workspace-server/cmd/memory-plugin-postgres/config_test.go @@ -0,0 +1,50 @@ +package main + +import ( + "strings" + "testing" +) + +// TestLoadConfig_DefaultListenAddrIsLoopback pins the default-bind contract. +// +// Why this matters: with the prior `:9100` default, the plugin listened on +// every interface. Inside the container it didn't matter (no host port +// mapping today), but a future change that publishes 9100 OR a cross-host +// sidecar deploy would have exposed an unauth'd memory store. Loopback by +// default is the least-privilege baseline; operators with a multi-host +// topology override via MEMORY_PLUGIN_LISTEN_ADDR. +func TestLoadConfig_DefaultListenAddrIsLoopback(t *testing.T) { + t.Setenv("MEMORY_PLUGIN_DATABASE_URL", "postgres://stub") + t.Setenv("MEMORY_PLUGIN_LISTEN_ADDR", "") + + cfg, err := loadConfig() + if err != nil { + t.Fatalf("loadConfig: %v", err) + } + if !strings.HasPrefix(cfg.ListenAddr, "127.0.0.1:") { + t.Errorf("default ListenAddr must bind loopback-only, got %q "+ + "(security regression — would expose plugin on every interface)", + cfg.ListenAddr) + } +} + +func TestLoadConfig_ListenAddrEnvOverride(t *testing.T) { + t.Setenv("MEMORY_PLUGIN_DATABASE_URL", "postgres://stub") + t.Setenv("MEMORY_PLUGIN_LISTEN_ADDR", ":9100") + + cfg, err := loadConfig() + if err != nil { + t.Fatalf("loadConfig: %v", err) + } + if cfg.ListenAddr != ":9100" { + t.Errorf("env override ignored: want :9100, got %q", cfg.ListenAddr) + } +} + +func TestLoadConfig_MissingDatabaseURL(t *testing.T) { + t.Setenv("MEMORY_PLUGIN_DATABASE_URL", "") + + if _, err := loadConfig(); err == nil { + t.Fatal("loadConfig must error when MEMORY_PLUGIN_DATABASE_URL is empty") + } +} diff --git a/workspace-server/cmd/memory-plugin-postgres/main.go b/workspace-server/cmd/memory-plugin-postgres/main.go index 84e01351..2a1b2dee 100644 --- a/workspace-server/cmd/memory-plugin-postgres/main.go +++ b/workspace-server/cmd/memory-plugin-postgres/main.go @@ -10,6 +10,7 @@ package main import ( "context" "database/sql" + "embed" "errors" "fmt" "log" @@ -17,6 +18,7 @@ import ( "net/http" "os" "os/signal" + "sort" "strings" "syscall" "time" @@ -26,12 +28,28 @@ import ( "github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/pgplugin" ) +// migrationsFS bundles the .up.sql files into the binary at build time +// so the prebuilt image doesn't need the source tree at runtime. The +// prior `os.ReadDir("cmd/memory-plugin-postgres/migrations")` path +// only resolved during `go test` from the repo root — in the published +// image the path didn't exist and boot failed after the 30s health gate +// (caught on staging redeploy 2026-05-05 after PR #2906). +// +//go:embed migrations/*.up.sql +var migrationsFS embed.FS + const ( envDatabaseURL = "MEMORY_PLUGIN_DATABASE_URL" envListenAddr = "MEMORY_PLUGIN_LISTEN_ADDR" envSkipMigrate = "MEMORY_PLUGIN_SKIP_MIGRATE" - defaultListenAddr = ":9100" + // Loopback-only by default (defense in depth). The platform talks to + // the plugin over `http://localhost:9100` from the same container, so + // binding to all interfaces would only widen the reachable surface + // without enabling any in-design caller. Operators running the plugin + // on a separate host override via MEMORY_PLUGIN_LISTEN_ADDR=:9100 (or + // some other interface). + defaultListenAddr = "127.0.0.1:9100" ) func main() { @@ -143,32 +161,71 @@ func openDB(databaseURL string) (*sql.DB, error) { return db, nil } -// runMigrations applies the schema migrations bundled at -// cmd/memory-plugin-postgres/migrations/. Idempotent on repeat boot. +// runMigrations applies the schema migrations bundled into the binary +// via go:embed (see migrationsFS at the top of this file). Idempotent +// on repeat boot — every migration file uses CREATE … IF NOT EXISTS. // -// Implementation note: rather than embedding the full migrate engine, -// we read the migration files at boot from a known relative path. The -// down migrations are deliberately NOT applied here — that's a manual -// operator action. This keeps the binary tiny and avoids dragging in -// golang-migrate's drivers. +// The down migrations are deliberately NOT applied here — that's a +// manual operator action. This keeps the binary tiny and avoids +// dragging in golang-migrate's drivers. +// +// MEMORY_PLUGIN_MIGRATIONS_DIR (filesystem path) is honored as an +// override for operators who need to ship custom migrations alongside +// the binary without rebuilding. When unset (the common case) we read +// from the embedded FS. func runMigrations(db *sql.DB) error { - // Find the migrations directory. In `go run` mode it's relative - // to the cmd dir; in the prebuilt binary case it's expected next - // to the binary OR via env var override. - dir := os.Getenv("MEMORY_PLUGIN_MIGRATIONS_DIR") - if dir == "" { - // Best-effort: try the cwd-relative path that works for `go test`. - dir = "cmd/memory-plugin-postgres/migrations" + if dir := strings.TrimSpace(os.Getenv("MEMORY_PLUGIN_MIGRATIONS_DIR")); dir != "" { + return runMigrationsFromDisk(db, dir) } - entries, err := os.ReadDir(dir) + return runMigrationsFromEmbed(db) +} + +// runMigrationsFromEmbed applies the *.up.sql files bundled into the +// binary at build time. Order is alphabetical (matches the on-disk +// behavior of os.ReadDir on Linux for the same set of names). +func runMigrationsFromEmbed(db *sql.DB) error { + entries, err := migrationsFS.ReadDir("migrations") if err != nil { - return fmt.Errorf("read migrations dir %q: %w", dir, err) + return fmt.Errorf("read embedded migrations: %w", err) } + names := make([]string, 0, len(entries)) for _, e := range entries { if e.IsDir() || !strings.HasSuffix(e.Name(), ".up.sql") { continue } - path := dir + "/" + e.Name() + names = append(names, e.Name()) + } + sort.Strings(names) + for _, name := range names { + data, err := migrationsFS.ReadFile("migrations/" + name) + if err != nil { + return fmt.Errorf("read embedded %q: %w", name, err) + } + if _, err := db.Exec(string(data)); err != nil { + return fmt.Errorf("apply %q: %w", name, err) + } + log.Printf("applied embedded migration %s", name) + } + return nil +} + +// runMigrationsFromDisk preserves the legacy filesystem-path mode for +// operator-supplied custom migrations. +func runMigrationsFromDisk(db *sql.DB, dir string) error { + entries, err := os.ReadDir(dir) + if err != nil { + return fmt.Errorf("read migrations dir %q: %w", dir, err) + } + names := make([]string, 0, len(entries)) + for _, e := range entries { + if e.IsDir() || !strings.HasSuffix(e.Name(), ".up.sql") { + continue + } + names = append(names, e.Name()) + } + sort.Strings(names) + for _, name := range names { + path := dir + "/" + name data, err := os.ReadFile(path) if err != nil { return fmt.Errorf("read %q: %w", path, err) @@ -176,7 +233,7 @@ func runMigrations(db *sql.DB) error { if _, err := db.Exec(string(data)); err != nil { return fmt.Errorf("apply %q: %w", path, err) } - log.Printf("applied migration %s", e.Name()) + log.Printf("applied disk migration %s (from %s)", name, dir) } return nil } diff --git a/workspace-server/cmd/memory-plugin-postgres/migrations_embed_test.go b/workspace-server/cmd/memory-plugin-postgres/migrations_embed_test.go new file mode 100644 index 00000000..f2f0b785 --- /dev/null +++ b/workspace-server/cmd/memory-plugin-postgres/migrations_embed_test.go @@ -0,0 +1,72 @@ +package main + +import ( + "strings" + "testing" +) + +// TestMigrationsEmbedded_ContainsCreateTable pins that the migrations +// are bundled into the binary at build time, NOT loaded from a +// filesystem path that doesn't exist at runtime in the published image. +// +// Pre-fix: PR #2906 shipped the binary without the migrations dir; +// `os.ReadDir("cmd/memory-plugin-postgres/migrations")` errored on every +// tenant boot, the 30s health gate aborted the container, and the +// staging redeploy fleet job marked all tenants as failed. Embedding +// the migrations into the binary removes the runtime path entirely. +func TestMigrationsEmbedded_ContainsCreateTable(t *testing.T) { + entries, err := migrationsFS.ReadDir("migrations") + if err != nil { + t.Fatalf("embedded migrations dir unreadable: %v", err) + } + if len(entries) == 0 { + t.Fatal("embedded migrations dir is empty — go:embed pattern matched no files") + } + + var seenUp bool + for _, e := range entries { + if e.IsDir() || !strings.HasSuffix(e.Name(), ".up.sql") { + continue + } + seenUp = true + data, err := migrationsFS.ReadFile("migrations/" + e.Name()) + if err != nil { + t.Errorf("read embedded %q: %v", e.Name(), err) + continue + } + if !strings.Contains(string(data), "CREATE TABLE") { + t.Errorf("embedded %q has no CREATE TABLE — wrong file embedded?", e.Name()) + } + } + if !seenUp { + t.Fatal("no *.up.sql in embedded migrations — runtime would have no schema to apply") + } +} + +// TestRunMigrationsFromEmbed_OrderingIsAlphabetic pins that we apply +// migrations in deterministic alphabetical order, not in whatever +// arbitrary order migrationsFS.ReadDir happens to return. With one +// migration today this is moot, but a future second migration ('002_…') +// MUST run after '001_…' or the schema is broken. +// +// We can't easily exercise db.Exec here (no test DB); instead pin the +// sort step on the directory listing itself. +func TestRunMigrationsFromEmbed_OrderingIsAlphabetic(t *testing.T) { + entries, err := migrationsFS.ReadDir("migrations") + if err != nil { + t.Fatalf("embedded migrations dir unreadable: %v", err) + } + var names []string + for _, e := range entries { + if e.IsDir() || !strings.HasSuffix(e.Name(), ".up.sql") { + continue + } + names = append(names, e.Name()) + } + for i := 1; i < len(names); i++ { + if names[i-1] > names[i] { + t.Errorf("ReadDir returned non-sorted names; runMigrationsFromEmbed must sort. "+ + "Got %q before %q", names[i-1], names[i]) + } + } +} diff --git a/workspace-server/cmd/server/main.go b/workspace-server/cmd/server/main.go index 3961a842..45597367 100644 --- a/workspace-server/cmd/server/main.go +++ b/workspace-server/cmd/server/main.go @@ -19,6 +19,7 @@ import ( "github.com/Molecule-AI/molecule-monorepo/platform/internal/handlers" "github.com/Molecule-AI/molecule-monorepo/platform/internal/imagewatch" memwiring "github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/wiring" + "github.com/Molecule-AI/molecule-monorepo/platform/internal/pendinguploads" "github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner" "github.com/Molecule-AI/molecule-monorepo/platform/internal/registry" "github.com/Molecule-AI/molecule-monorepo/platform/internal/router" @@ -265,6 +266,14 @@ func main() { }) } + // Pending-uploads GC sweep — deletes acked rows past their retention + // window plus unacked rows past expires_at. Without this the + // pending_uploads table grows unbounded; even with the 24h hard TTL, + // nothing actually deletes a row, just makes it un-fetchable. + go supervised.RunWithRecover(ctx, "pending-uploads-sweeper", func(c context.Context) { + pendinguploads.StartSweeper(c, pendinguploads.NewPostgres(db.DB), 0) + }) + // Provision-timeout sweep — flips workspaces that have been stuck in // status='provisioning' past the timeout window to 'failed' and emits // WORKSPACE_PROVISION_TIMEOUT. Without this the UI banner is cosmetic diff --git a/workspace-server/entrypoint-tenant.sh b/workspace-server/entrypoint-tenant.sh index 9cfc1437..0f2d6dde 100644 --- a/workspace-server/entrypoint-tenant.sh +++ b/workspace-server/entrypoint-tenant.sh @@ -20,6 +20,51 @@ cd /canvas PORT=3000 HOSTNAME=0.0.0.0 node server.js & CANVAS_PID=$! +# Memory v2 sidecar (built-in postgres plugin). See Dockerfile entrypoint +# comment for rationale. +# +# Spawn-gating: only start the sidecar when the operator has indicated +# they want it (MEMORY_V2_CUTOVER=true OR MEMORY_PLUGIN_URL set). +# Without that signal, the sidecar adds zero value and risks aborting +# tenant boot via the 30s health gate when the tenant Postgres lacks +# pgvector. Caught on staging redeploy 2026-05-05: +# pq: extension "vector" is not available +# +# Defaults (when sidecar IS spawned): MEMORY_PLUGIN_DATABASE_URL +# falls back to the tenant's DATABASE_URL. +MEMORY_PLUGIN_PID="" +memory_plugin_wanted="" +if [ "$MEMORY_V2_CUTOVER" = "true" ] || [ -n "$MEMORY_PLUGIN_URL" ]; then + memory_plugin_wanted=1 +fi +if [ -z "$MEMORY_PLUGIN_DISABLE" ] && [ -n "$memory_plugin_wanted" ] && [ -n "$DATABASE_URL" ]; then + : "${MEMORY_PLUGIN_DATABASE_URL:=$DATABASE_URL}" + : "${MEMORY_PLUGIN_LISTEN_ADDR:=:9100}" + export MEMORY_PLUGIN_DATABASE_URL MEMORY_PLUGIN_LISTEN_ADDR + echo "memory-plugin: starting sidecar on $MEMORY_PLUGIN_LISTEN_ADDR" >&2 + /memory-plugin & + MEMORY_PLUGIN_PID=$! + # Wait up to 30s for /v1/health. Boot failure is fatal so a misconfigured + # tenant crash-loops instead of silently serving cutover traffic against + # a dead plugin. + health_port=${MEMORY_PLUGIN_LISTEN_ADDR#:} + ready=0 + for _ in $(seq 1 30); do + if wget -qO- --timeout=2 "http://localhost:${health_port}/v1/health" >/dev/null 2>&1; then + ready=1 + break + fi + sleep 1 + done + if [ "$ready" != "1" ]; then + echo "memory-plugin: ❌ /v1/health never returned 200 after 30s — aborting boot. Check DATABASE_URL reachability + pgvector extension + migrations." >&2 + kill "$MEMORY_PLUGIN_PID" 2>/dev/null || true + kill "$CANVAS_PID" 2>/dev/null || true + exit 1 + fi + echo "memory-plugin: ✅ sidecar healthy on :$health_port" >&2 +fi + # Start Go platform in foreground-ish (we trap signals) # CANVAS_PROXY_URL tells the platform to proxy unmatched routes to Canvas. # CONTAINER_BACKEND: empty = Docker (default for self-hosted/local). @@ -29,15 +74,20 @@ cd / /platform & PLATFORM_PID=$! -# If either process exits, kill the other +# If any process exits, kill the others cleanup() { kill $CANVAS_PID 2>/dev/null || true kill $PLATFORM_PID 2>/dev/null || true + [ -n "$MEMORY_PLUGIN_PID" ] && kill $MEMORY_PLUGIN_PID 2>/dev/null || true } trap cleanup EXIT SIGTERM SIGINT -# Wait for either to exit — whichever exits first triggers cleanup -wait -n $CANVAS_PID $PLATFORM_PID +# Wait for any to exit — whichever exits first triggers cleanup +if [ -n "$MEMORY_PLUGIN_PID" ]; then + wait -n $CANVAS_PID $PLATFORM_PID $MEMORY_PLUGIN_PID +else + wait -n $CANVAS_PID $PLATFORM_PID +fi EXIT_CODE=$? cleanup exit $EXIT_CODE diff --git a/workspace-server/internal/handlers/chat_files.go b/workspace-server/internal/handlers/chat_files.go index ccfa0d4c..f5e980bf 100644 --- a/workspace-server/internal/handlers/chat_files.go +++ b/workspace-server/internal/handlers/chat_files.go @@ -600,14 +600,21 @@ func (h *ChatFilesHandler) uploadPollMode(c *gin.Context, ctx context.Context, w return } - out := make([]uploadedFile, 0, len(headers)) + // Phase 1: pre-validate + read every part BEFORE any DB write. + // A multi-file upload must commit all-or-nothing; a per-file + // failure halfway through used to leave rows 1..K-1 in the table + // while the client got a 500 and retried the whole batch — duplicate + // rows, orphan activity rows. Validating up-front + atomic PutBatch + // closes that gap. + type prepped struct { + Sanitized string + Mimetype string + Content []byte + Original string // original (unsanitized) filename for error messages + } + prepReady := make([]prepped, 0, len(headers)) + items := make([]pendinguploads.PutItem, 0, len(headers)) for _, fh := range headers { - // Read full content. Per-file cap enforced post-read so an - // oversized file fails with a clean 413 rather than a torn - // stream. The +1 byte ReadAll trick that the Python side - // uses isn't easy through multipart.FileHeader; instead we - // rely on the multipart layer's ContentLength header and - // short-circuit before opening the part. if fh.Size > pendinguploads.MaxFileBytes { log.Printf("chat_files uploadPollMode: per-file cap exceeded for %s: %s (%d bytes)", workspaceID, fh.Filename, fh.Size) @@ -621,45 +628,67 @@ func (h *ChatFilesHandler) uploadPollMode(c *gin.Context, ctx context.Context, w } content, err := readMultipartFile(fh) if err != nil { - log.Printf("chat_files uploadPollMode: read part failed for %s/%s: %v", workspaceID, fh.Filename, err) + log.Printf("chat_files uploadPollMode: read part failed for %s/%s: %v", + workspaceID, fh.Filename, err) c.JSON(http.StatusBadRequest, gin.H{"error": "could not read file part"}) return } - - sanitized := SanitizeFilename(fh.Filename) - mimetype := fh.Header.Get("Content-Type") - - fileID, err := h.pendingUploads.Put(ctx, wsUUID, content, sanitized, mimetype) - if err != nil { - if errors.Is(err, pendinguploads.ErrTooLarge) { - // Belt + suspenders: the size check above already - // caught this, but Storage.Put re-validates so a - // malformed FileHeader can't slip through. 413 with - // the same shape so the client sees one error class. - c.JSON(http.StatusRequestEntityTooLarge, gin.H{ - "error": "file exceeds per-file cap", - "filename": fh.Filename, - "size": len(content), - "max": pendinguploads.MaxFileBytes, - }) - return - } - log.Printf("chat_files uploadPollMode: storage.Put failed for %s/%s: %v", - workspaceID, sanitized, err) - c.JSON(http.StatusInternalServerError, gin.H{"error": "could not stage file"}) + // Belt-and-braces post-read cap (multipart.FileHeader.Size can lie + // on some clients that don't set Content-Length per part). + if len(content) > pendinguploads.MaxFileBytes { + log.Printf("chat_files uploadPollMode: per-file cap exceeded post-read for %s: %s (%d bytes)", + workspaceID, fh.Filename, len(content)) + c.JSON(http.StatusRequestEntityTooLarge, gin.H{ + "error": "file exceeds per-file cap", + "filename": fh.Filename, + "size": len(content), + "max": pendinguploads.MaxFileBytes, + }) return } + sanitized := SanitizeFilename(fh.Filename) + mimetype := safeMimetype(fh.Header.Get("Content-Type")) + prepReady = append(prepReady, prepped{ + Sanitized: sanitized, Mimetype: mimetype, Content: content, Original: fh.Filename, + }) + items = append(items, pendinguploads.PutItem{ + Content: content, Filename: sanitized, Mimetype: mimetype, + }) + } - // Activity row so the workspace's inbox poller picks this up - // on its next cycle. activity_type=a2a_receive (NOT a new - // type) so the existing poll filter - // `?type=a2a_receive` catches it without poll-side changes; - // method=chat_upload_receive is the discriminator the - // workspace's adapter (Phase 2) uses to route to the upload - // fetcher instead of the agent's message handler. Same - // shape as A2A's tasks/send vs message/send method split. + // Phase 2: atomic batch insert. On failure no rows commit. + fileIDs, err := h.pendingUploads.PutBatch(ctx, wsUUID, items) + if err != nil { + if errors.Is(err, pendinguploads.ErrTooLarge) { + // Belt + suspenders: pre-validation above already caught + // this; surface a clean 413 if a malformed FileHeader + // somehow slipped through. + c.JSON(http.StatusRequestEntityTooLarge, gin.H{ + "error": "one or more files exceed per-file cap", + "max": pendinguploads.MaxFileBytes, + }) + return + } + log.Printf("chat_files uploadPollMode: storage.PutBatch failed for %s: %v", + workspaceID, err) + c.JSON(http.StatusInternalServerError, gin.H{"error": "could not stage files"}) + return + } + + // Phase 3: write per-file activity rows and build the response. Activity + // rows are written individually (not part of the same Tx as PutBatch) + // because LogActivity is shared across many handlers and threading the + // Tx through would be a bigger refactor. The trade-off: if an activity + // write fails after the PutBatch commits, the pending_uploads rows + // orphan until the 24h TTL — significantly better than the previous + // "every multi-file upload could orphan" behavior, and the workspace's + // fetcher handles soft-404 cleanly when activity rows reference a row + // the platform later expired. + out := make([]uploadedFile, 0, len(prepReady)) + for i, p := range prepReady { + fileID := fileIDs[i] uri := fmt.Sprintf("platform-pending:%s/%s", workspaceID, fileID) - summary := "chat_upload_receive: " + sanitized + summary := "chat_upload_receive: " + p.Sanitized method := "chat_upload_receive" LogActivity(ctx, h.broadcaster, ActivityParams{ WorkspaceID: workspaceID, @@ -669,28 +698,65 @@ func (h *ChatFilesHandler) uploadPollMode(c *gin.Context, ctx context.Context, w Summary: &summary, RequestBody: map[string]interface{}{ "file_id": fileID.String(), - "name": sanitized, - "mimeType": mimetype, - "size": len(content), + "name": p.Sanitized, + "mimeType": p.Mimetype, + "size": len(p.Content), "uri": uri, }, Status: "ok", }) log.Printf("chat_files uploadPollMode: staged %s/%s (file_id=%s size=%d mimetype=%q)", - workspaceID, sanitized, fileID, len(content), mimetype) + workspaceID, p.Sanitized, fileID, len(p.Content), p.Mimetype) out = append(out, uploadedFile{ URI: uri, - Name: sanitized, - Mimetype: mimetype, - Size: int64(len(content)), + Name: p.Sanitized, + Mimetype: p.Mimetype, + Size: int64(len(p.Content)), }) } c.JSON(http.StatusOK, gin.H{"files": out}) } +// safeMimetype validates a multipart-supplied Content-Type header and +// returns a sanitized value safe to store + serve back unmodified. +// +// The platform's GET /content handler reflects the stored mimetype as +// the response Content-Type. An attacker-controlled header that +// embedded CR/LF could split the response (header injection); a value +// containing semicolons could carry an unexpected charset parameter +// that confuses a downstream renderer. Strip CR/LF/control chars + +// keep only the type/subtype prefix; reject anything that doesn't +// match a basic `type/subtype` regex by falling back to the safe +// default (application/octet-stream — the workspace-side handler does +// the same fallback). +func safeMimetype(raw string) string { + const fallback = "application/octet-stream" + // Trim parameters (`text/html; charset=utf-8` → `text/html`). + if i := strings.IndexByte(raw, ';'); i >= 0 { + raw = raw[:i] + } + raw = strings.TrimSpace(raw) + if raw == "" { + return "" + } + // Reject if any control char or whitespace is present (header + // injection defense). RFC 7231 mimetype grammar forbids whitespace. + for _, r := range raw { + if r < 0x21 || r > 0x7e { + return fallback + } + } + // Require exactly one slash separating type and subtype. + parts := strings.Split(raw, "/") + if len(parts) != 2 || parts[0] == "" || parts[1] == "" { + return fallback + } + return raw +} + // readMultipartFile reads a multipart part fully into memory. Wraps // the open + io.ReadAll + close idiom so the call site stays clean, // and so a future change (chunked reads / hashing) has one place to diff --git a/workspace-server/internal/handlers/chat_files_poll_test.go b/workspace-server/internal/handlers/chat_files_poll_test.go index c064bd6a..eb23acf1 100644 --- a/workspace-server/internal/handlers/chat_files_poll_test.go +++ b/workspace-server/internal/handlers/chat_files_poll_test.go @@ -67,12 +67,59 @@ func (s *inMemStorage) Put(_ context.Context, ws uuid.UUID, content []byte, file return id, nil } +// PutBatch mirrors the production atomic-batch contract: any per-item +// failure leaves the in-memory state unchanged, simulating Tx rollback. +// Pre-validation matches PostgresStorage.PutBatch; oversized items +// return ErrTooLarge before any row is added. +func (s *inMemStorage) PutBatch(_ context.Context, ws uuid.UUID, items []pendinguploads.PutItem) ([]uuid.UUID, error) { + s.mu.Lock() + defer s.mu.Unlock() + if s.putErr != nil { + return nil, s.putErr + } + // Pre-validate so an oversized item rejects the whole batch before + // any state mutation — matches the Tx-rollback semantics. + for _, it := range items { + if len(it.Content) > pendinguploads.MaxFileBytes { + return nil, pendinguploads.ErrTooLarge + } + } + ids := make([]uuid.UUID, 0, len(items)) + stagedRows := make(map[uuid.UUID]pendinguploads.Record, len(items)) + stagedPuts := make([]putCall, 0, len(items)) + for _, it := range items { + id := uuid.New() + stagedRows[id] = pendinguploads.Record{ + FileID: id, WorkspaceID: ws, Content: it.Content, + Filename: it.Filename, Mimetype: it.Mimetype, + SizeBytes: int64(len(it.Content)), CreatedAt: time.Now(), + ExpiresAt: time.Now().Add(24 * time.Hour), + } + stagedPuts = append(stagedPuts, putCall{ + WorkspaceID: ws, Filename: it.Filename, Mimetype: it.Mimetype, Size: len(it.Content), + }) + ids = append(ids, id) + } + for id, r := range stagedRows { + s.rows[id] = r + } + s.puts = append(s.puts, stagedPuts...) + return ids, nil +} + func (s *inMemStorage) Get(context.Context, uuid.UUID) (pendinguploads.Record, error) { return pendinguploads.Record{}, pendinguploads.ErrNotFound } func (s *inMemStorage) MarkFetched(context.Context, uuid.UUID) error { return nil } func (s *inMemStorage) Ack(context.Context, uuid.UUID) error { return nil } +// Sweep is required by the Storage interface (Phase 3 GC). Not +// exercised by upload-branch tests — the dedicated sweeper_test.go + +// storage_sweep_test.go cover it. +func (s *inMemStorage) Sweep(context.Context, time.Duration) (pendinguploads.SweepResult, error) { + return pendinguploads.SweepResult{}, nil +} + // expectPollDeliveryMode stubs the SELECT delivery_mode lookup that // uploadPollMode does (separate from the one resolveWorkspaceForwardCreds // does — this is the new helper introduced for the poll branch). @@ -154,7 +201,7 @@ func TestPollUpload_HappyPath_OneFile_StagesAndLogs(t *testing.T) { expectActivityInsert(mock) store := newInMemStorage() - h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)). + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)). WithPendingUploads(store, nil) body, ct := pollUploadFixture(t, map[string][]byte{"report.pdf": []byte("PDF-bytes")}) @@ -212,7 +259,7 @@ func TestPollUpload_MultipleFiles_AllStagedAndLogged(t *testing.T) { expectActivityInsert(mock) store := newInMemStorage() - h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)). + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)). WithPendingUploads(store, nil) body, ct := pollUploadFixture(t, map[string][]byte{ @@ -250,7 +297,7 @@ func TestPollUpload_PushModeFallsThroughToForward(t *testing.T) { // URL empty + mode=push → 503 (no inbound secret check needed). store := newInMemStorage() - h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)). + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)). WithPendingUploads(store, nil) body, ct := pollUploadFixture(t, map[string][]byte{"x": []byte("data")}) @@ -274,7 +321,7 @@ func TestPollUpload_NotConfigured_FallsThrough(t *testing.T) { wsID := "33333333-2222-3333-4444-555555555555" expectURLAndMode(mock, wsID, "", "poll") // resolveWorkspaceForwardCreds emits 422 - h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)) + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)) // No WithPendingUploads — pendingUploads is nil. body, ct := pollUploadFixture(t, map[string][]byte{"x": []byte("data")}) @@ -295,7 +342,7 @@ func TestPollUpload_WorkspaceMissing_404(t *testing.T) { wsID := "44444444-2222-3333-4444-555555555555" expectPollDeliveryModeMissing(mock, wsID) - h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)). + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)). WithPendingUploads(newInMemStorage(), nil) body, ct := pollUploadFixture(t, map[string][]byte{"x": []byte("d")}) @@ -315,7 +362,7 @@ func TestPollUpload_DeliveryModeLookupDBError_500(t *testing.T) { mock.ExpectQuery(`SELECT delivery_mode FROM workspaces WHERE id = \$1`). WithArgs(wsID).WillReturnError(errors.New("connection lost")) - h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)). + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)). WithPendingUploads(newInMemStorage(), nil) body, ct := pollUploadFixture(t, map[string][]byte{"x": []byte("d")}) @@ -335,7 +382,7 @@ func TestPollUpload_NoFilesField_400(t *testing.T) { expectPollDeliveryMode(mock, wsID, "poll") store := newInMemStorage() - h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)). + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)). WithPendingUploads(store, nil) // Multipart with a non-files field — no actual files. @@ -360,7 +407,7 @@ func TestPollUpload_MalformedMultipart_400(t *testing.T) { expectPollDeliveryMode(mock, wsID, "poll") store := newInMemStorage() - h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)). + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)). WithPendingUploads(store, nil) // Body that doesn't match the boundary in Content-Type. @@ -381,7 +428,7 @@ func TestPollUpload_StorageError_500(t *testing.T) { store := newInMemStorage() store.putErr = errors.New("disk full") - h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)). + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)). WithPendingUploads(store, nil) body, ct := pollUploadFixture(t, map[string][]byte{"x.bin": []byte("data")}) @@ -402,7 +449,7 @@ func TestPollUpload_StorageTooLarge_413(t *testing.T) { store := newInMemStorage() store.putErr = pendinguploads.ErrTooLarge - h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)). + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)). WithPendingUploads(store, nil) body, ct := pollUploadFixture(t, map[string][]byte{"x.bin": []byte("data")}) @@ -422,7 +469,7 @@ func TestPollUpload_TooManyFiles_400(t *testing.T) { expectPollDeliveryMode(mock, wsID, "poll") store := newInMemStorage() - h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)). + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)). WithPendingUploads(store, nil) // 65 files — over the per-batch cap. @@ -457,7 +504,7 @@ func TestPollUpload_NullDeliveryMode_TreatedAsPush(t *testing.T) { expectURLAndMode(mock, wsID, "", "") store := newInMemStorage() - h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)). + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)). WithPendingUploads(store, nil) body, ct := pollUploadFixture(t, map[string][]byte{"x.bin": []byte("data")}) @@ -490,7 +537,7 @@ func TestPollUpload_PerFileCapPreStorage_413(t *testing.T) { expectPollDeliveryMode(mock, wsID, "poll") store := newInMemStorage() - h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)). + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)). WithPendingUploads(store, nil) // 25 MB + 1 byte. Single file, large enough to trip the early @@ -525,7 +572,7 @@ func TestPollUpload_SanitizesFilenameInResponse(t *testing.T) { expectActivityInsert(mock) store := newInMemStorage() - h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)). + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)). WithPendingUploads(store, nil) body, ct := pollUploadFixture(t, map[string][]byte{"hello world!.pdf": []byte("data")}) @@ -550,6 +597,120 @@ func TestPollUpload_SanitizesFilenameInResponse(t *testing.T) { } } +// TestPollUpload_AtomicRollbackOnSecondFileTooLarge pins the +// transactional contract introduced in phase 5: when one file in a +// multi-file batch fails pre-validation (oversize), NONE of the files +// in the batch land in storage. Previously a per-file Put loop would +// stage rows 1..K-1 before failing on row K, leaving orphan +// pending_uploads + activity rows the client would re-create on retry. +// +// Pinned via inMemStorage's PutBatch (which mirrors PostgresStorage's +// Tx-rollback behavior on a per-item validation failure) — but the +// real atomicity guarantee is the integration test in +// pending_uploads_integration_test.go. +func TestPollUpload_AtomicRollbackOnSecondFileTooLarge(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + + wsID := "aaaaaaaa-3333-3333-4444-555555555555" + expectPollDeliveryMode(mock, wsID, "poll") + + store := newInMemStorage() + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)). + WithPendingUploads(store, nil) + + // Two files: first OK, second over the per-file cap. Pre-validation + // in uploadPollMode catches it BEFORE any Put — store.puts must + // stay empty. (If the test ever sees len=1, the regression is + // "first file slipped through into storage on a partial-failure + // batch.") + tooBig := bytes.Repeat([]byte{0x42}, pendinguploads.MaxFileBytes+1) + body, ct := pollUploadFixture(t, map[string][]byte{ + "ok.txt": []byte("small"), + "huge.bin": tooBig, + }) + c, w := makeUploadRequest(t, wsID, body, ct) + h.Upload(c) + + if w.Code != http.StatusRequestEntityTooLarge { + t.Errorf("status=%d body=%s, want 413", w.Code, w.Body.String()) + } + if len(store.puts) != 0 { + t.Errorf("expected zero Puts on rollback, got %d: %+v", len(store.puts), store.puts) + } +} + +// TestPollUpload_AtomicRollbackOnPutBatchError validates that an in- +// flight PutBatch failure (e.g. simulated DB error) leaves zero rows +// — same guarantee as the pre-validation path, but exercises the +// "Tx-Rollback after BEGIN" branch via the fake. +func TestPollUpload_AtomicRollbackOnPutBatchError(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + + wsID := "bbbbbbbb-3333-3333-4444-555555555555" + expectPollDeliveryMode(mock, wsID, "poll") + + store := newInMemStorage() + store.putErr = errors.New("db down mid-batch") + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)). + WithPendingUploads(store, nil) + + body, ct := pollUploadFixture(t, map[string][]byte{ + "a.txt": []byte("aaa"), + "b.txt": []byte("bbb"), + "c.txt": []byte("ccc"), + }) + c, w := makeUploadRequest(t, wsID, body, ct) + h.Upload(c) + + if w.Code != http.StatusInternalServerError { + t.Errorf("status=%d, want 500", w.Code) + } + if len(store.puts) != 0 { + t.Errorf("expected zero Puts after PutBatch error, got %d", len(store.puts)) + } +} + +// TestPollUpload_MimetypeWithCRLFInjectionStripped pins the safeMimetype +// hardening: a multipart-supplied Content-Type header with CR/LF is +// rewritten to application/octet-stream so the eventual /content +// response can't be header-split on the wire. +func TestPollUpload_MimetypeWithCRLFInjectionStripped(t *testing.T) { + got := safeMimetype("text/html\r\nX-Injected: pwn") + if got != "application/octet-stream" { + t.Errorf("CRLF mimetype not stripped, got %q", got) + } + got = safeMimetype("image/png\x00") + if got != "application/octet-stream" { + t.Errorf("NUL byte mimetype not stripped, got %q", got) + } + got = safeMimetype("text/plain; charset=utf-8") + if got != "text/plain" { + t.Errorf("parameter not stripped, got %q", got) + } + got = safeMimetype("application/pdf") + if got != "application/pdf" { + t.Errorf("clean mime modified, got %q", got) + } + got = safeMimetype("") + if got != "" { + t.Errorf("empty input should pass through, got %q", got) + } + got = safeMimetype("notamime") + if got != "application/octet-stream" { + t.Errorf("non-type/subtype not coerced, got %q", got) + } + got = safeMimetype("/empty-type") + if got != "application/octet-stream" { + t.Errorf("missing type half not coerced, got %q", got) + } + got = safeMimetype("type/") + if got != "application/octet-stream" { + t.Errorf("missing subtype half not coerced, got %q", got) + } +} + // TestPollUpload_ActivityRowDiscriminator pins the // activity_type / method shape that the workspace inbox poller depends // on. The poller filters `GET /workspaces/:id/activity?type=a2a_receive` @@ -573,7 +734,7 @@ func TestPollUpload_ActivityRowDiscriminator(t *testing.T) { expectActivityInsertWithTypeAndMethod(mock, wsID, "a2a_receive", "chat_upload_receive") store := newInMemStorage() - h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)). + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)). WithPendingUploads(store, nil) body, ct := pollUploadFixture(t, map[string][]byte{"x.pdf": []byte("xx")}) diff --git a/workspace-server/internal/handlers/chat_files_test.go b/workspace-server/internal/handlers/chat_files_test.go index e7829f45..6012d3a7 100644 --- a/workspace-server/internal/handlers/chat_files_test.go +++ b/workspace-server/internal/handlers/chat_files_test.go @@ -105,7 +105,7 @@ func TestChatUpload_InvalidWorkspaceID(t *testing.T) { setupTestDB(t) setupTestRedis(t) - h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)) + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)) c, w := makeUploadRequest(t, "not-a-uuid", &bytes.Buffer{}, "") h.Upload(c) @@ -122,7 +122,7 @@ func TestChatUpload_WorkspaceNotInDB(t *testing.T) { wsID := "00000000-0000-0000-0000-000000000099" expectURLMissing(mock, wsID) - h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)) + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)) body, ct := uploadFixture(t) c, w := makeUploadRequest(t, wsID, body, ct) h.Upload(c) @@ -166,7 +166,7 @@ func TestChatUpload_NoInboundSecret_LazyHeal(t *testing.T) { WithArgs(sqlmock.AnyArg(), wsID). WillReturnResult(sqlmock.NewResult(0, 1)) - h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)) + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)) body, ct := uploadFixture(t) c, w := makeUploadRequest(t, wsID, body, ct) h.Upload(c) @@ -203,7 +203,7 @@ func TestChatUpload_NoInboundSecret_LazyHealFailure(t *testing.T) { WithArgs(sqlmock.AnyArg(), wsID). WillReturnError(sql.ErrConnDone) // mint fails - h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)) + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)) body, ct := uploadFixture(t) c, w := makeUploadRequest(t, wsID, body, ct) h.Upload(c) @@ -231,7 +231,7 @@ func TestChatUpload_NoURL(t *testing.T) { wsID := "00000000-0000-0000-0000-000000000042" expectURLAndMode(mock, wsID, "", "push") - h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)) + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)) body, ct := uploadFixture(t) c, w := makeUploadRequest(t, wsID, body, ct) h.Upload(c) @@ -256,7 +256,7 @@ func TestChatUpload_PollModeEmptyURL(t *testing.T) { wsID := "00000000-0000-0000-0000-000000000099" expectURLAndMode(mock, wsID, "", "poll") - h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)) + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)) body, ct := uploadFixture(t) c, w := makeUploadRequest(t, wsID, body, ct) h.Upload(c) @@ -286,7 +286,7 @@ func TestChatUpload_NullModeEmptyURL(t *testing.T) { wsID := "30ba7f0b-b303-4a20-aefe-3a4a675b8aa4" // user's "mac laptop" expectURLNullMode(mock, wsID, "") - h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)) + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)) body, ct := uploadFixture(t) c, w := makeUploadRequest(t, wsID, body, ct) h.Upload(c) @@ -338,7 +338,7 @@ func TestChatUpload_ForwardsToWorkspace_HappyPath(t *testing.T) { expectURL(mock, wsID, srv.URL) expectInboundSecret(mock, wsID, "super-secret-123") - h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)) + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)) body, ct := uploadFixture(t) c, w := makeUploadRequest(t, wsID, body, ct) h.Upload(c) @@ -380,7 +380,7 @@ func TestChatUpload_ForwardsErrorStatusUnchanged(t *testing.T) { expectURL(mock, wsID, srv.URL) expectInboundSecret(mock, wsID, "tok") - h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)) + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)) body, ct := uploadFixture(t) c, w := makeUploadRequest(t, wsID, body, ct) h.Upload(c) @@ -402,7 +402,7 @@ func TestChatUpload_WorkspaceUnreachable(t *testing.T) { expectURL(mock, wsID, "http://127.0.0.1:1") expectInboundSecret(mock, wsID, "tok") - h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)) + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)) body, ct := uploadFixture(t) c, w := makeUploadRequest(t, wsID, body, ct) h.Upload(c) @@ -418,7 +418,7 @@ func TestChatDownload_InvalidPath(t *testing.T) { setupTestDB(t) setupTestRedis(t) - h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)) + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)) cases := []struct { name, path, wantSubstr string @@ -507,7 +507,7 @@ func TestChatDownload_WorkspaceNotInDB(t *testing.T) { WithArgs(wsID). WillReturnError(sql.ErrNoRows) - h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)) + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)) c, w := makeDownloadRequest(t, wsID, "/workspace/foo.txt") h.Download(c) @@ -533,7 +533,7 @@ func TestChatDownload_NoInboundSecret_LazyHeal(t *testing.T) { WithArgs(sqlmock.AnyArg(), wsID). WillReturnResult(sqlmock.NewResult(0, 1)) - h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)) + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)) c, w := makeDownloadRequest(t, wsID, "/workspace/foo.txt") h.Download(c) @@ -559,7 +559,7 @@ func TestChatDownload_NoInboundSecret_LazyHealFailure(t *testing.T) { WithArgs(sqlmock.AnyArg(), wsID). WillReturnError(sql.ErrConnDone) - h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)) + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)) c, w := makeDownloadRequest(t, wsID, "/workspace/foo.txt") h.Download(c) @@ -592,7 +592,7 @@ func TestChatDownload_ForwardsToWorkspace_HappyPath(t *testing.T) { expectURL(mock, wsID, srv.URL) expectInboundSecret(mock, wsID, "the-secret") - h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)) + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)) c, w := makeDownloadRequest(t, wsID, "/workspace/report.txt") h.Download(c) @@ -634,7 +634,7 @@ func TestChatDownload_404FromWorkspacePropagated(t *testing.T) { expectURL(mock, wsID, srv.URL) expectInboundSecret(mock, wsID, "tok") - h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)) + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)) c, w := makeDownloadRequest(t, wsID, "/workspace/missing.txt") h.Download(c) diff --git a/workspace-server/internal/handlers/class1_ast_gate_test.go b/workspace-server/internal/handlers/class1_ast_gate_test.go new file mode 100644 index 00000000..bb362364 --- /dev/null +++ b/workspace-server/internal/handlers/class1_ast_gate_test.go @@ -0,0 +1,468 @@ +package handlers + +// class1_ast_gate_test.go — generic Class 1 leak gate per #2867 PR-A. +// +// What this gate prevents: +// The tenant-hongming leak class — a handler iterates a YAML-derived +// slice (ws.Children, sub_workspaces, etc.) and calls +// `INSERT INTO workspaces` inside the loop body without first +// checking whether a workspace with the same (parent_id, name) is +// already there. Each call to such a handler doubles the tree. +// +// Why this is broader than TestCreateWorkspaceTree_CallsLookupBeforeInsert: +// The existing gate is hard-coded to org_import.go's createWorkspaceTree. +// That catches the specific function that triggered the original +// incident — but a future handler written from scratch in a different +// file would not be covered. This gate walks every production handler +// .go file and applies a structural rule that does not depend on +// function or file names. +// +// The rule (verbatim from #2867 PR-A): +// +// "No handler in handlers/ may iterate a slice (any RangeStmt) AND +// call INSERT INTO workspaces inside the loop body without a +// preceding SELECT id FROM workspaces WHERE name=$1 AND parent_id IS +// NOT DISTINCT FROM $2 in the same function (== a lookupExistingChild +// call, OR an ON CONFLICT clause baked into the same INSERT, OR an +// explicit allowlist annotation)." +// +// Allowlist mechanism: a function whose body contains the exact comment +// string `// class1-gate: idempotent-by-design` is treated as safe. +// Use this only after writing a unit test that pins WHY the function +// is safe. The annotation is intentionally awkward to type — it should +// be rare. + +import ( + "go/ast" + "go/parser" + "go/token" + "os" + "path/filepath" + "regexp" + "sort" + "strings" + "testing" +) + +// reINSERTWorkspaces matches the exact statement shape we care about. +// Tightened (vs bytes.Index "INSERT INTO workspaces") so the audit +// table `workspaces_audit` literal — or any other lookalike — does not +// false-positive trigger this gate. The same regex is used in the +// existing createWorkspaceTree gate (workspaces_insert_allowlist_test.go) +// — keep them in sync if either changes. +var reINSERTWorkspaces = regexp.MustCompile(`(?m)^\s*INSERT INTO workspaces\s*\(`) + +// reONCONFLICT matches ON CONFLICT clauses anywhere in the same SQL +// literal. An UPSERT (INSERT ... ON CONFLICT ... DO UPDATE) is +// idempotent by definition, so the gate exempts it. +var reONCONFLICT = regexp.MustCompile(`(?i)\bON CONFLICT\b`) + +// gateAllowlistComment is the magic comment a function author writes +// to opt out of this gate. Forces an explicit decision. +const gateAllowlistComment = "// class1-gate: idempotent-by-design" + +// preflightCallNames are function names whose presence in a function +// body counts as "did a SELECT-by-(parent_id, name) preflight". Add +// new names here as new preflight helpers are introduced. Keep the +// list TIGHT — any sloppy addition weakens the gate. +var preflightCallNames = map[string]bool{ + "lookupExistingChild": true, +} + +// TestClass1_NoUnpreflightedInsertInsideRange walks every production +// .go file in this package, parses the AST, and fails the test if any +// FuncDecl violates the rule above. +// +// Failure message must include: file path, function name, line of +// the offending INSERT, line of the enclosing range, and a hint at +// the three escape hatches (preflight call, ON CONFLICT, allowlist +// comment). +func TestClass1_NoUnpreflightedInsertInsideRange(t *testing.T) { + wd, err := os.Getwd() + if err != nil { + t.Fatalf("getwd: %v", err) + } + + entries, err := os.ReadDir(wd) + if err != nil { + t.Fatalf("readdir %s: %v", wd, err) + } + + type violation struct { + file string + fn string + insertLine int + rangeLine int + } + var violations []violation + scanned := 0 + + for _, e := range entries { + name := e.Name() + if e.IsDir() || !strings.HasSuffix(name, ".go") { + continue + } + if strings.HasSuffix(name, "_test.go") { + continue + } + path := filepath.Join(wd, name) + src, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read %s: %v", path, err) + } + fset := token.NewFileSet() + file, err := parser.ParseFile(fset, name, src, parser.ParseComments) + if err != nil { + t.Fatalf("parse %s: %v", path, err) + } + scanned++ + + // Walk every function declaration and apply the rule. + for _, decl := range file.Decls { + fd, ok := decl.(*ast.FuncDecl) + if !ok || fd.Body == nil { + continue + } + + // Allowlist: skip if the function body contains the magic + // comment. We check via the source range of the function + // — comments inside the body are in file.Comments and + // must overlap the function's Pos/End range. + if functionHasAllowlistComment(file, fd) { + continue + } + + // First pass: locate every INSERT INTO workspaces literal + // in this function. We treat each such literal as a + // candidate violation and try to clear it via the rules. + candidates := findInsertWorkspacesLiterals(fd, src, fset) + if len(candidates) == 0 { + continue + } + + // Has the function called a preflight helper? Single + // pass — if any preflight name appears, every INSERT in + // the function is considered preflighted. This is more + // permissive than position-aware (preflight could be + // AFTER the INSERT and still satisfy the gate), but the + // existing org_import.go gate already pins the position + // invariant for createWorkspaceTree, and a function that + // preflights AFTER inserting would fail the position + // gate in a separate test. + hasPreflight := functionCallsAny(fd, preflightCallNames) + + for _, c := range candidates { + if c.hasONCONFLICT { + continue + } + if hasPreflight { + continue + } + if c.enclosingRangeLine == 0 { + // INSERT not inside any RangeStmt — single-shot, + // not the bug pattern. + continue + } + violations = append(violations, violation{ + file: name, + fn: fd.Name.Name, + insertLine: c.insertLine, + rangeLine: c.enclosingRangeLine, + }) + } + } + } + + if scanned == 0 { + t.Fatal("scanned 0 .go files — wrong working directory? gate would always pass") + } + + if len(violations) > 0 { + // Stable sort so the failure message is deterministic across + // reruns. + sort.Slice(violations, func(i, j int) bool { + if violations[i].file != violations[j].file { + return violations[i].file < violations[j].file + } + return violations[i].insertLine < violations[j].insertLine + }) + var b strings.Builder + b.WriteString("Class 1 leak gate (#2867 PR-A) — these handler functions iterate a slice and INSERT INTO workspaces inside the loop body without a (parent_id, name) preflight.\n\n") + b.WriteString("This is the bug shape that triggered the tenant-hongming leak (TeamHandler.Expand re-inserting the entire sub_workspaces tree on every call). To fix any reported violation, choose ONE of:\n") + b.WriteString(" 1. Call h.lookupExistingChild(ctx, name, parentID) before the INSERT and skip the INSERT when it returns existing=true. (preferred)\n") + b.WriteString(" 2. Use INSERT ... ON CONFLICT ... DO ... (idempotent UPSERT, like registry.go).\n") + b.WriteString(" 3. Annotate the function with a `// class1-gate: idempotent-by-design` comment AND a unit test that pins why the function is structurally idempotent. (rare; require code review)\n\n") + b.WriteString("Violations:\n") + for _, v := range violations { + b.WriteString(" - ") + b.WriteString(v.file) + b.WriteString(":") + b.WriteString(itoa(v.insertLine)) + b.WriteString(" — function ") + b.WriteString(v.fn) + b.WriteString("() INSERTs inside RangeStmt at line ") + b.WriteString(itoa(v.rangeLine)) + b.WriteString("\n") + } + t.Fatal(b.String()) + } +} + +func itoa(n int) string { + // Avoid strconv import for one call site — keeps the test focused. + if n == 0 { + return "0" + } + neg := n < 0 + if neg { + n = -n + } + var buf [20]byte + i := len(buf) + for n > 0 { + i-- + buf[i] = byte('0' + n%10) + n /= 10 + } + if neg { + i-- + buf[i] = '-' + } + return string(buf[i:]) +} + +// candidateInsert holds the per-INSERT facts needed to decide whether +// the gate fires. +type candidateInsert struct { + insertLine int + hasONCONFLICT bool + enclosingRangeLine int // 0 means not inside any range +} + +// findInsertWorkspacesLiterals walks fd's body and returns one +// candidateInsert per INSERT INTO workspaces string literal. +// +// Position-based detection: collect every RangeStmt's body span first, +// then for each INSERT literal check if its position is inside any +// span. ast.Inspect's nil-call ordering does NOT give per-node pop +// semantics, so a stack-based approach against ast.Inspect would +// silently miscount. Position spans are deterministic and easy to +// reason about. +func findInsertWorkspacesLiterals(fd *ast.FuncDecl, src []byte, fset *token.FileSet) []candidateInsert { + var out []candidateInsert + + type span struct{ start, end token.Pos } + var ranges []span + ast.Inspect(fd.Body, func(n ast.Node) bool { + rs, ok := n.(*ast.RangeStmt) + if !ok || rs.Body == nil { + return true + } + ranges = append(ranges, span{rs.Body.Lbrace, rs.Body.Rbrace}) + return true + }) + + enclosingRangeLineFor := func(p token.Pos) int { + // Pick the innermost enclosing range — i.e., the one with the + // largest start that still covers p. Innermost is the one + // whose body actually contains the INSERT, which is the line + // most useful in a violation message. + bestStart := token.NoPos + bestLine := 0 + for _, s := range ranges { + if p > s.start && p < s.end && s.start > bestStart { + bestStart = s.start + bestLine = fset.Position(s.start).Line + } + } + return bestLine + } + + ast.Inspect(fd.Body, func(n ast.Node) bool { + bl, ok := n.(*ast.BasicLit) + if !ok || bl.Kind != token.STRING { + return true + } + // Strip surrounding backticks/quotes — value includes them. + lit := bl.Value + if len(lit) >= 2 { + lit = lit[1 : len(lit)-1] + } + if !reINSERTWorkspaces.MatchString(lit) { + return true + } + out = append(out, candidateInsert{ + insertLine: fset.Position(bl.Pos()).Line, + hasONCONFLICT: reONCONFLICT.MatchString(lit), + enclosingRangeLine: enclosingRangeLineFor(bl.Pos()), + }) + return true + }) + return out +} + +// functionCallsAny returns true if any CallExpr in fd's body has a +// function name (either a SelectorExpr Sel.Name or an Ident name) +// matching a key in names. +func functionCallsAny(fd *ast.FuncDecl, names map[string]bool) bool { + found := false + ast.Inspect(fd.Body, func(n ast.Node) bool { + if found { + return false + } + ce, ok := n.(*ast.CallExpr) + if !ok { + return true + } + switch fun := ce.Fun.(type) { + case *ast.Ident: + if names[fun.Name] { + found = true + return false + } + case *ast.SelectorExpr: + if names[fun.Sel.Name] { + found = true + return false + } + } + return true + }) + return found +} + +// functionHasAllowlistComment returns true if the function body +// (between fd.Body.Lbrace and fd.Body.Rbrace) contains a comment +// equal to gateAllowlistComment. +func functionHasAllowlistComment(file *ast.File, fd *ast.FuncDecl) bool { + if fd.Body == nil { + return false + } + start := fd.Body.Lbrace + end := fd.Body.Rbrace + for _, cg := range file.Comments { + for _, c := range cg.List { + if c.Pos() < start || c.Pos() > end { + continue + } + if strings.TrimSpace(c.Text) == gateAllowlistComment { + return true + } + } + } + return false +} + +// TestClass1_GateFiresOnSyntheticBuggySource — proves the gate actually +// catches the bug shape it's named after. Without this, a regression +// to "always pass" would not be noticed until the leak shipped again. +// Per memory feedback_assert_exact_not_substring.md: tighten the test +// + verify it FAILS on old-shape source before merging. +func TestClass1_GateFiresOnSyntheticBuggySource(t *testing.T) { + const buggySrc = `package handlers + +import "context" + +type fakeDB struct{} +func (fakeDB) ExecContext(ctx context.Context, sql string, args ...interface{}) {} + +func buggyExpand(db fakeDB, ctx context.Context, children []string) { + for _, child := range children { + // Bug shape: INSERT inside the range body, no preflight. + db.ExecContext(ctx, ` + "`INSERT INTO workspaces (id, name) VALUES ($1, $2)`" + `, "x", child) + } +} +` + fset := token.NewFileSet() + file, err := parser.ParseFile(fset, "buggy.go", buggySrc, parser.ParseComments) + if err != nil { + t.Fatalf("parse synthetic source: %v", err) + } + for _, decl := range file.Decls { + fd, ok := decl.(*ast.FuncDecl) + if !ok || fd.Name.Name != "buggyExpand" { + continue + } + candidates := findInsertWorkspacesLiterals(fd, []byte(buggySrc), fset) + if len(candidates) != 1 { + t.Fatalf("expected 1 INSERT literal, got %d", len(candidates)) + } + c := candidates[0] + if c.enclosingRangeLine == 0 { + t.Errorf("synthetic INSERT inside `for _, child := range` should be detected as enclosed by range, got enclosingRangeLine=0 — gate would miss the bug shape") + } + if c.hasONCONFLICT { + t.Errorf("synthetic INSERT has no ON CONFLICT, gate falsely treated it as idempotent") + } + if functionCallsAny(fd, preflightCallNames) { + t.Errorf("synthetic function does not call lookupExistingChild — gate falsely treated it as preflighted") + } + // All three guards say the gate WOULD fire. Pass. + return + } + t.Fatal("buggyExpand FuncDecl not found in synthetic source") +} + +// TestClass1_GateAllowsONCONFLICT — pins that an INSERT with ON +// CONFLICT inside a range body is NOT flagged. registry.go's +// upsert pattern is the prod example. +func TestClass1_GateAllowsONCONFLICT(t *testing.T) { + const safeSrc = `package handlers + +import "context" + +type fakeDB struct{} +func (fakeDB) ExecContext(ctx context.Context, sql string, args ...interface{}) {} + +func upsertLoop(db fakeDB, ctx context.Context, children []string) { + for _, child := range children { + db.ExecContext(ctx, ` + "`INSERT INTO workspaces (id, name) VALUES ($1, $2) ON CONFLICT (id) DO UPDATE SET name = $2`" + `, "x", child) + } +} +` + fset := token.NewFileSet() + file, _ := parser.ParseFile(fset, "safe.go", safeSrc, parser.ParseComments) + for _, decl := range file.Decls { + fd, ok := decl.(*ast.FuncDecl) + if !ok || fd.Name.Name != "upsertLoop" { + continue + } + candidates := findInsertWorkspacesLiterals(fd, []byte(safeSrc), fset) + if len(candidates) != 1 { + t.Fatalf("expected 1 candidate, got %d", len(candidates)) + } + if !candidates[0].hasONCONFLICT { + t.Errorf("ON CONFLICT clause should be detected, was missed — gate would falsely flag idempotent UPSERTs") + } + } +} + +// TestClass1_GateAllowsAllowlistAnnotation — pins the escape hatch +// works. Annotated functions are skipped at the FuncDecl level. +func TestClass1_GateAllowsAllowlistAnnotation(t *testing.T) { + const annotatedSrc = `package handlers + +import "context" + +type fakeDB struct{} +func (fakeDB) ExecContext(ctx context.Context, sql string, args ...interface{}) {} + +func intentionallyUnpreflighted(db fakeDB, ctx context.Context, children []string) { + // class1-gate: idempotent-by-design + for _, child := range children { + db.ExecContext(ctx, ` + "`INSERT INTO workspaces (id, name) VALUES ($1, $2)`" + `, "x", child) + } +} +` + fset := token.NewFileSet() + file, _ := parser.ParseFile(fset, "annotated.go", annotatedSrc, parser.ParseComments) + for _, decl := range file.Decls { + fd, ok := decl.(*ast.FuncDecl) + if !ok || fd.Name.Name != "intentionallyUnpreflighted" { + continue + } + if !functionHasAllowlistComment(file, fd) { + t.Error("allowlist comment should be detected for the intentionallyUnpreflighted function — escape hatch not working") + } + } +} diff --git a/workspace-server/internal/handlers/org_import.go b/workspace-server/internal/handlers/org_import.go index 70151e09..3dfe2fbd 100644 --- a/workspace-server/internal/handlers/org_import.go +++ b/workspace-server/internal/handlers/org_import.go @@ -7,6 +7,7 @@ import ( "context" "database/sql" "encoding/json" + "errors" "fmt" "log" "os" @@ -21,6 +22,7 @@ import ( "github.com/Molecule-AI/molecule-monorepo/platform/internal/db" "github.com/Molecule-AI/molecule-monorepo/platform/internal/models" "github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner" + "github.com/Molecule-AI/molecule-monorepo/platform/internal/provlog" "github.com/Molecule-AI/molecule-monorepo/platform/internal/scheduler" "github.com/google/uuid" ) @@ -61,10 +63,33 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX tier = defaults.Tier } if tier == 0 { - tier = 2 + // Resolved via the same DefaultTier helper Create + Templates + // use (#2910 PR-E). SaaS → T4 (one container per sibling EC2, + // no neighbour to protect from), self-hosted → T3. Pre-#2910 + // this path returned T2 on self-hosted, asymmetric with + // workspace.go's T3 — undocumented drift. Lifting to + // DefaultTier collapses both call sites onto one source of + // truth so a future tier-default change sweeps every entry + // point at once. Templates that want a different floor still + // declare `tier:` in config.yaml or `defaults.tier` in + // org.yaml. + if h.workspace != nil { + tier = h.workspace.DefaultTier() + } else { + tier = 3 + } } - ctxLookup := context.Background() + // 5s timeout bounds the lookup independently of any HTTP request + // context. createWorkspaceTree runs in goroutines spawned from the + // /org/import handler, so plumbing the request context here would + // cascade-cancel into provisionWorkspaceAuto and abort in-flight + // EC2 provisioning if the client disconnected mid-import — that's + // the wrong behaviour. A short bounded timeout protects the + // per-row SELECT against a wedged DB without taking the + // drop-everything-on-disconnect tradeoff. + ctxLookup, cancelLookup := context.WithTimeout(context.Background(), 5*time.Second) + defer cancelLookup() // Idempotency: if a workspace with the same (parent_id, name) already // exists, skip the INSERT + canvas_layouts + broadcast + provisioning. // This is what makes /org/import safe to call multiple times — the @@ -76,12 +101,31 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX // (parent exists, some children missing) backfill the missing children // instead of either no-op'ing the whole subtree or duplicating the // existing children. + // + // /org/import is ADDITIVE-ONLY, never destructive. Children present + // in the existing tree but absent from the new template are + // preserved (no DELETE on diff). Skip-path also does NOT propagate + // updates to existing nodes — a re-import that adds an + // initial_memory or schedule to an existing workspace is silently + // dropped (the function bypasses seedInitialMemories, schedule SQL, + // channel config for skipped rows). To force-update an existing + // tree, delete and re-import or use a future /org/sync route. existingID, existing, lookupErr := h.lookupExistingChild(ctxLookup, ws.Name, parentID) if lookupErr != nil { return fmt.Errorf("idempotency check for %s: %w", ws.Name, lookupErr) } if existing { log.Printf("Org import: %q already exists (id=%s) — skipping create+provision, recursing into children for partial-match", ws.Name, existingID) + parentRef := "" + if parentID != nil { + parentRef = *parentID + } + provlog.Event("provision.skip_existing", map[string]any{ + "name": ws.Name, + "existing_id": existingID, + "parent_id": parentRef, + "tier": tier, + }) *results = append(*results, map[string]interface{}{ "id": existingID, "name": ws.Name, @@ -580,6 +624,12 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX // // On sql.ErrNoRows: returns ("", false, nil) — caller should INSERT. // On a real DB error: returns ("", false, err) — caller propagates. +// +// errors.Is is wrap-safe — a future caller wrapping the error +// (database/sql can wrap driver errors with %w in some setups) would +// silently break a `err == sql.ErrNoRows` equality check, causing the +// no-rows path to fall through to the "real DB error" branch and +// abort the import. errors.Is unwraps. func (h *OrgHandler) lookupExistingChild(ctx context.Context, name string, parentID *string) (string, bool, error) { var existingID string err := db.DB.QueryRowContext(ctx, ` @@ -589,7 +639,7 @@ func (h *OrgHandler) lookupExistingChild(ctx context.Context, name string, paren AND status != 'removed' LIMIT 1 `, name, parentID).Scan(&existingID) - if err == sql.ErrNoRows { + if errors.Is(err, sql.ErrNoRows) { return "", false, nil } if err != nil { diff --git a/workspace-server/internal/handlers/org_import_idempotency_test.go b/workspace-server/internal/handlers/org_import_idempotency_test.go index 0d7498fb..1f2955cb 100644 --- a/workspace-server/internal/handlers/org_import_idempotency_test.go +++ b/workspace-server/internal/handlers/org_import_idempotency_test.go @@ -1,11 +1,17 @@ package handlers import ( - "bytes" "context" + "database/sql" "errors" + "fmt" + "go/ast" + "go/parser" + "go/token" "os" "path/filepath" + "regexp" + "strconv" "strings" "testing" @@ -119,6 +125,90 @@ func TestLookupExistingChild_DBError_Propagates(t *testing.T) { } } +// TestLookupExistingChild_WrappedNoRows_TreatedAsNotFound — pins the +// wrap-safety of the errors.Is(err, sql.ErrNoRows) check. The previous +// `err == sql.ErrNoRows` equality would fall through to the +// "real DB error" branch on a wrapped no-rows error, aborting the +// import for what is in fact the no-rows happy path. driver/sql +// wrapping is currently a non-issue but a future driver change or a +// caller that wraps the result via fmt.Errorf("…: %w", err) would +// silently break the equality check. errors.Is unwraps. +func TestLookupExistingChild_WrappedNoRows_TreatedAsNotFound(t *testing.T) { + mock := setupTestDB(t) + parent := "parent-1" + wrapped := fmt.Errorf("driver-wrapped: %w", sql.ErrNoRows) + mock.ExpectQuery(`SELECT id FROM workspaces`). + WithArgs("Alpha", &parent). + WillReturnError(wrapped) + + h := &OrgHandler{} + id, found, err := h.lookupExistingChild(context.Background(), "Alpha", &parent) + + if err != nil { + t.Fatalf("expected wrapped no-rows to be treated as not-found (err=nil), got: %v", err) + } + if found { + t.Errorf("expected found=false on wrapped no-rows, got found=true") + } + if id != "" { + t.Errorf("expected empty id on wrapped no-rows, got %q", id) + } +} + +// workspacesInsertRE matches a SQL literal that begins (after optional +// leading whitespace) with `INSERT INTO workspaces` followed by `(` — +// requiring the open-paren rules out lookalikes like +// `INSERT INTO workspaces_audit`, `INSERT INTO workspace_secrets`, +// `INSERT INTO workspace_channels`, `INSERT INTO canvas_layouts`. The +// previous bytes.Index gate accepted `workspaces_audit` as a prefix +// match — see RFC #2872 Important-1 for the silent-false-pass shape. +var workspacesInsertRE = regexp.MustCompile(`(?s)^\s*INSERT\s+INTO\s+workspaces\s*\(`) + +// findLookupAndWorkspacesInsertPos walks the AST of `src` and returns +// the source positions of (a) the first call to `lookupExistingChild` +// and (b) the first CallExpr whose argument list contains a STRING +// BasicLit matching workspacesInsertRE. Either may be token.NoPos if +// not found. +// +// Extracted as a helper so the gate logic can be exercised against +// synthetic source — TestGate_FailsWhenLookupAfterInsert below proves +// the gate actually catches the bug shape, not just the happy path. +func findLookupAndWorkspacesInsertPos(t *testing.T, fname string, src []byte) (lookupPos, insertPos token.Pos, fset *token.FileSet) { + t.Helper() + fset = token.NewFileSet() + file, err := parser.ParseFile(fset, fname, src, parser.ParseComments) + if err != nil { + t.Fatalf("parse %s: %v", fname, err) + } + lookupPos, insertPos = token.NoPos, token.NoPos + ast.Inspect(file, func(n ast.Node) bool { + call, ok := n.(*ast.CallExpr) + if !ok { + return true + } + if sel, ok := call.Fun.(*ast.SelectorExpr); ok { + if sel.Sel.Name == "lookupExistingChild" && lookupPos == token.NoPos { + lookupPos = call.Pos() + } + } + for _, arg := range call.Args { + lit, ok := arg.(*ast.BasicLit) + if !ok || lit.Kind != token.STRING { + continue + } + raw := lit.Value + if unq, err := strconv.Unquote(raw); err == nil { + raw = unq + } + if workspacesInsertRE.MatchString(raw) && insertPos == token.NoPos { + insertPos = call.Pos() + } + } + return true + }) + return +} + // Source-level guard — pins that org_import.go calls // h.lookupExistingChild BEFORE its INSERT INTO workspaces. // @@ -126,6 +216,11 @@ func TestLookupExistingChild_DBError_Propagates(t *testing.T) { // (idempotency check before INSERT), not just function names. If a // future refactor reintroduces the un-checked INSERT (the original // bug shape that leaked 72 workspaces in 4 days), this test fails. +// +// AST-walk implementation closes the silent-false-pass mode that the +// previous bytes.Index gate had — see workspacesInsertRE comment for +// the failure mode (workspaces_audit / workspace_secrets / etc. +// shadowing the real target via prefix match). func TestCreateWorkspaceTree_CallsLookupBeforeInsert(t *testing.T) { wd, err := os.Getwd() if err != nil { @@ -135,17 +230,189 @@ func TestCreateWorkspaceTree_CallsLookupBeforeInsert(t *testing.T) { if err != nil { t.Fatalf("read org_import.go: %v", err) } + lookupPos, insertPos, fset := findLookupAndWorkspacesInsertPos(t, "org_import.go", src) - lookupAt := bytes.Index(src, []byte("h.lookupExistingChild(")) - insertAt := bytes.Index(src, []byte("INSERT INTO workspaces")) - - if lookupAt < 0 { - t.Fatalf("org_import.go missing call to h.lookupExistingChild — idempotency check removed?") + if lookupPos == token.NoPos { + t.Fatalf("AST: no call to lookupExistingChild in org_import.go — idempotency check removed?") } - if insertAt < 0 { - t.Fatalf("org_import.go missing INSERT INTO workspaces — schema change?") + if insertPos == token.NoPos { + t.Fatalf("AST: no SQL literal matching `^\\s*INSERT INTO workspaces\\s*\\(` in any CallExpr in org_import.go — schema change or rename?") } - if lookupAt > insertAt { - t.Errorf("h.lookupExistingChild must come BEFORE INSERT INTO workspaces in org_import.go (lookup@%d, insert@%d) — non-idempotent ordering would re-leak under repeat /org/import calls", lookupAt, insertAt) + if lookupPos > insertPos { + t.Errorf("lookupExistingChild call at %s must come BEFORE INSERT INTO workspaces at %s — non-idempotent ordering would re-leak under repeat /org/import calls", + fset.Position(lookupPos), fset.Position(insertPos)) + } +} + +// TestGate_FailsWhenLookupAfterInsert proves the gate actually catches +// the bug it's named after — running it against synthetic Go source +// where the lookup call is positioned AFTER the workspaces INSERT must +// produce lookupPos > insertPos, which the production gate flags as +// an ERROR. Without this test the gate could regress to "always pass" +// and we wouldn't notice until the bug shipped again. +// +// Per memory feedback_assert_exact_not_substring.md: verify a +// tightened test FAILS on old code before merging. +func TestGate_FailsWhenLookupAfterInsert(t *testing.T) { + const buggySrc = `package handlers + +import "context" + +type fakeDB struct{} + +func (fakeDB) ExecContext(ctx context.Context, sql string, args ...interface{}) {} + +type fakeOrgHandler struct{} + +func (h *fakeOrgHandler) lookupExistingChild(ctx context.Context, name string, parentID *string) (string, bool, error) { + return "", false, nil +} + +func buggyCreate(h *fakeOrgHandler, db fakeDB, ctx context.Context, name string, parentID *string) { + // Bug shape: INSERT runs FIRST, lookup runs AFTER. This is the + // non-idempotent ordering the gate exists to forbid. + db.ExecContext(ctx, ` + "`INSERT INTO workspaces (id, name) VALUES ($1, $2)`" + `, "x", name) + h.lookupExistingChild(ctx, name, parentID) +} +` + lookupPos, insertPos, _ := findLookupAndWorkspacesInsertPos(t, "buggy.go", []byte(buggySrc)) + if lookupPos == token.NoPos || insertPos == token.NoPos { + t.Fatalf("synthetic buggy source missing expected nodes (lookupPos=%v insertPos=%v) — helper logic regression", lookupPos, insertPos) + } + if lookupPos < insertPos { + t.Fatalf("synthetic bug shape (lookup AFTER insert) returned lookupPos=%d < insertPos=%d — gate would NOT fire on actual bug, regression!", lookupPos, insertPos) + } + // Implicit: lookupPos > insertPos here, which the production gate + // flags via t.Errorf. This proves the gate is live, not vestigial. +} + +// TestGate_IgnoresAuditTableShadow proves the regex tightening +// actually ignores `INSERT INTO workspaces_audit` literals — the +// specific shape #2872 cited as the silent-false-pass failure mode +// for the previous bytes.Index gate. +func TestGate_IgnoresAuditTableShadow(t *testing.T) { + // Synthetic source with audit-table INSERT at line 1 (would be + // position 0 under prefix-match) and lookup + real INSERT at later + // positions. With the tightened regex, the audit literal is + // ignored: insertPos points at the REAL INSERT, lookup precedes it, + // gate passes correctly. + const src = `package handlers + +import "context" + +type fakeDB struct{} + +func (fakeDB) ExecContext(ctx context.Context, sql string, args ...interface{}) {} + +type fakeOrgHandler struct{} + +func (h *fakeOrgHandler) lookupExistingChild(ctx context.Context, name string, parentID *string) (string, bool, error) { + return "", false, nil +} + +func okCreateWithAudit(h *fakeOrgHandler, db fakeDB, ctx context.Context, name string, parentID *string) { + // Audit-table INSERT — should be IGNORED by the tightened regex. + db.ExecContext(ctx, ` + "`INSERT INTO workspaces_audit (id, action) VALUES ($1, $2)`" + `, "x", "create_attempt") + // Lookup BEFORE real INSERT — correct order. + h.lookupExistingChild(ctx, name, parentID) + // Real INSERT. + db.ExecContext(ctx, ` + "`INSERT INTO workspaces (id, name) VALUES ($1, $2)`" + `, "x", name) +} +` + lookupPos, insertPos, fset := findLookupAndWorkspacesInsertPos(t, "shadow.go", []byte(src)) + if lookupPos == token.NoPos || insertPos == token.NoPos { + t.Fatalf("expected to find lookup + real INSERT, got lookupPos=%v insertPos=%v", lookupPos, insertPos) + } + // The audit-table INSERT is at line ~16 (column ~20-ish), the + // lookup is at line 19, the real INSERT is at line 21. If the + // regex regressed to prefix-match, insertPos would point at the + // audit literal at line 16, and the gate would falsely fail + // (lookup at 19 > "insert" at 16). With the tightened regex, + // insertPos correctly points at line 21, and the gate passes. + insertLine := fset.Position(insertPos).Line + lookupLine := fset.Position(lookupPos).Line + if insertLine < lookupLine { + t.Errorf("regex regressed: audit shadow at line %d swallowed real INSERT (lookup at line %d). insertPos should point at the real INSERT (line ~21), not the audit literal.", + insertLine, lookupLine) + } + if lookupPos > insertPos { + t.Errorf("synthetic source has lookup at line %d before real INSERT at line %d, gate should pass (lookupPos < insertPos), got lookupPos=%d > insertPos=%d", + lookupLine, insertLine, lookupPos, insertPos) + } +} + +// TestWorkspacesInsertRE_RejectsLookalikes pins the regex that +// discriminates the real workspaces INSERT from prefix-matching +// lookalikes. If this regex regresses to a substring match, the +// AST gate above silently false-passes when a future refactor +// shadows the real INSERT with a workspaces_audit / workspace_secrets +// / canvas_layouts literal placed earlier in source. +func TestWorkspacesInsertRE_RejectsLookalikes(t *testing.T) { + cases := []struct { + sql string + want bool + comment string + }{ + {"INSERT INTO workspaces (id, name) VALUES ($1, $2)", true, "real target"}, + {"\n\t\tINSERT INTO workspaces (id, name)\n\t\tVALUES ($1, $2)", true, "real target with leading whitespace + newlines (raw string literal shape)"}, + {"INSERT INTO workspaces_audit (id) VALUES ($1)", false, "underscore-suffix lookalike (the #2872 specific failure mode)"}, + {"INSERT INTO workspace_secrets (key, value) VALUES ($1, $2)", false, "prefix without trailing 's' (workspace_*)"}, + {"INSERT INTO workspace_channels (id) VALUES ($1)", false, "another workspace_* prefix"}, + {"INSERT INTO canvas_layouts (workspace_id, x, y) VALUES ($1, $2, $3)", false, "unrelated table that contains 'workspace' in a column ref"}, + {"UPDATE workspaces SET status='running' WHERE id=$1", false, "UPDATE shouldn't match"}, + {"SELECT * FROM workspaces WHERE id=$1", false, "SELECT shouldn't match"}, + {"-- comment about INSERT INTO workspaces (\nSELECT 1", false, "comment shouldn't match"}, + } + for _, c := range cases { + got := workspacesInsertRE.MatchString(c.sql) + if got != c.want { + t.Errorf("workspacesInsertRE.MatchString(%q) = %v, want %v (%s)", c.sql, got, c.want, c.comment) + } + } +} + +// Confirm the regex actually matches the literal currently in +// org_import.go. Pins the shape so `gofmt` reflows or trivial edits +// to the SQL string don't silently disable the gate above. +func TestWorkspacesInsertRE_MatchesActualSourceLiteral(t *testing.T) { + wd, err := os.Getwd() + if err != nil { + t.Fatalf("getwd: %v", err) + } + src, err := os.ReadFile(filepath.Join(wd, "org_import.go")) + if err != nil { + t.Fatalf("read org_import.go: %v", err) + } + // Strip backtick strings, find any whose content matches. + // Walk the source via parser.ParseFile to avoid string-search + // drift if the literal is reflowed. + fset := token.NewFileSet() + file, err := parser.ParseFile(fset, filepath.Join(wd, "org_import.go"), src, parser.ParseComments) + if err != nil { + t.Fatalf("parse org_import.go: %v", err) + } + var matched bool + ast.Inspect(file, func(n ast.Node) bool { + lit, ok := n.(*ast.BasicLit) + if !ok || lit.Kind != token.STRING { + return true + } + raw := lit.Value + if unq, err := strconv.Unquote(raw); err == nil { + raw = unq + } + if workspacesInsertRE.MatchString(raw) { + matched = true + } + return true + }) + if !matched { + t.Fatalf("no SQL literal in org_import.go matches workspacesInsertRE — gate is dead. Either the INSERT was renamed (update the regex) or the file was restructured (review the gate logic).") + } + // strings.Contains keeps the test informative: if the regex + // stopped matching but the literal source still contains the + // magic phrase, that's a regex-side failure (test the fix above). + if !strings.Contains(string(src), "INSERT INTO workspaces") { + t.Fatalf("org_import.go has no `INSERT INTO workspaces` substring at all — schema change?") } } diff --git a/workspace-server/internal/handlers/pending_uploads_integration_test.go b/workspace-server/internal/handlers/pending_uploads_integration_test.go new file mode 100644 index 00000000..61c64f86 --- /dev/null +++ b/workspace-server/internal/handlers/pending_uploads_integration_test.go @@ -0,0 +1,476 @@ +//go:build integration +// +build integration + +// pending_uploads_integration_test.go — REAL Postgres integration +// tests for the poll-mode chat upload flow (RFC: phases 1–3). +// +// Run with: +// +// docker run --rm -d --name pg-integration \ +// -e POSTGRES_PASSWORD=test -e POSTGRES_DB=molecule \ +// -p 55432:5432 postgres:15-alpine +// sleep 4 +// psql ... < workspace-server/migrations/20260505100000_pending_uploads.up.sql +// cd workspace-server +// INTEGRATION_DB_URL="postgres://postgres:test@localhost:55432/molecule?sslmode=disable" \ +// go test -tags=integration ./internal/handlers/ -run Integration_PendingUploads +// +// CI (.github/workflows/handlers-postgres-integration.yml) runs this on +// every PR that touches workspace-server/internal/handlers/** OR +// workspace-server/migrations/**. +// +// Why these are NOT plain unit tests +// ---------------------------------- +// The strict-sqlmock unit tests in storage_test.go pin which SQL +// statements fire — they are fast and let us iterate without a DB. But +// sqlmock CANNOT detect bugs that depend on the actual row state after +// the SQL runs. In particular: +// +// - the WITH … DELETE … RETURNING CTE used by Sweep depends on +// Postgres' `make_interval` function and the table's CHECK +// constraints. sqlmock would happily accept a hand-written SQL +// literal that Postgres rejects at runtime. +// - the partial index `idx_pending_uploads_unacked` (created by the +// Phase 1 migration) only catches a wrong WHERE predicate at real- +// query-plan time. +// +// These tests close those gaps by booting a real Postgres, running the +// production helpers, and SELECTing the row to verify the observable +// state matches the expected outcome. + +package handlers + +import ( + "context" + "database/sql" + "os" + "strings" + "testing" + "time" + + "github.com/google/uuid" + _ "github.com/lib/pq" + + "github.com/Molecule-AI/molecule-monorepo/platform/internal/pendinguploads" +) + +// integrationDB_PendingUploads opens a connection from $INTEGRATION_DB_URL +// (skipping the test if unset), wipes the pending_uploads table for +// isolation, and registers a Cleanup that closes the connection. +// +// NOT SAFE FOR `t.Parallel()` — each test gets the table to itself. +// Mirrors the integrationDB helper in delegation_ledger_integration_test.go +// but kept separate so each table's wipe step is local to its tests. +func integrationDB_PendingUploads(t *testing.T) *sql.DB { + t.Helper() + url := os.Getenv("INTEGRATION_DB_URL") + if url == "" { + t.Skip("INTEGRATION_DB_URL not set; skipping (local devs: see file header)") + } + conn, err := sql.Open("postgres", url) + if err != nil { + t.Fatalf("open: %v", err) + } + if err := conn.Ping(); err != nil { + t.Fatalf("ping: %v", err) + } + if _, err := conn.ExecContext(context.Background(), `DELETE FROM pending_uploads`); err != nil { + t.Fatalf("cleanup: %v", err) + } + t.Cleanup(func() { conn.Close() }) + return conn +} + +func TestIntegration_PendingUploads_PutGetAckRoundTrip(t *testing.T) { + conn := integrationDB_PendingUploads(t) + store := pendinguploads.NewPostgres(conn) + ctx := context.Background() + + wsID := uuid.New() + fileID, err := store.Put(ctx, wsID, []byte("hello PDF"), "report.pdf", "application/pdf") + if err != nil { + t.Fatalf("Put: %v", err) + } + + // Get reads back the row. + rec, err := store.Get(ctx, fileID) + if err != nil { + t.Fatalf("Get: %v", err) + } + if rec.WorkspaceID != wsID { + t.Errorf("workspace_id = %s, want %s", rec.WorkspaceID, wsID) + } + if string(rec.Content) != "hello PDF" { + t.Errorf("content = %q, want %q", rec.Content, "hello PDF") + } + if rec.Filename != "report.pdf" { + t.Errorf("filename = %q, want %q", rec.Filename, "report.pdf") + } + if rec.AckedAt != nil { + t.Errorf("AckedAt should be nil before Ack, got %v", rec.AckedAt) + } + + // MarkFetched stamps fetched_at. + if err := store.MarkFetched(ctx, fileID); err != nil { + t.Fatalf("MarkFetched: %v", err) + } + + // Re-read to confirm. + rec2, err := store.Get(ctx, fileID) + if err != nil { + t.Fatalf("Get after MarkFetched: %v", err) + } + if rec2.FetchedAt == nil { + t.Errorf("FetchedAt should be set after MarkFetched") + } + + // Ack flips acked_at; subsequent Gets return ErrNotFound (acked rows + // are filtered out at the SELECT predicate). + if err := store.Ack(ctx, fileID); err != nil { + t.Fatalf("Ack: %v", err) + } + if _, err := store.Get(ctx, fileID); err != pendinguploads.ErrNotFound { + t.Errorf("Get after Ack: got %v, want ErrNotFound", err) + } + + // Idempotent re-ack succeeds. + if err := store.Ack(ctx, fileID); err != nil { + t.Errorf("re-Ack should be idempotent, got %v", err) + } +} + +func TestIntegration_PendingUploads_Sweep_DeletesAckedAfterRetention(t *testing.T) { + conn := integrationDB_PendingUploads(t) + store := pendinguploads.NewPostgres(conn) + ctx := context.Background() + + wsID := uuid.New() + fid, err := store.Put(ctx, wsID, []byte("data"), "x.txt", "text/plain") + if err != nil { + t.Fatalf("Put: %v", err) + } + if err := store.Ack(ctx, fid); err != nil { + t.Fatalf("Ack: %v", err) + } + + // retention=1h, row was acked just now → not yet eligible. + res, err := store.Sweep(ctx, time.Hour) + if err != nil { + t.Fatalf("Sweep(1h): %v", err) + } + if res.Total() != 0 { + t.Errorf("expected 0 deletions yet, got %+v", res) + } + + // retention=0 → row IS eligible immediately. + res, err = store.Sweep(ctx, 0) + if err != nil { + t.Fatalf("Sweep(0): %v", err) + } + if res.Acked != 1 || res.Expired != 0 { + t.Errorf("expected acked=1 expired=0, got %+v", res) + } + + // Verify row is actually gone — not just un-fetchable. + var n int + if err := conn.QueryRowContext(ctx, `SELECT COUNT(*) FROM pending_uploads WHERE file_id = $1`, fid).Scan(&n); err != nil { + t.Fatalf("count: %v", err) + } + if n != 0 { + t.Errorf("row should be DELETEd, found %d rows", n) + } +} + +func TestIntegration_PendingUploads_Sweep_DeletesExpiredUnacked(t *testing.T) { + conn := integrationDB_PendingUploads(t) + store := pendinguploads.NewPostgres(conn) + ctx := context.Background() + + wsID := uuid.New() + fid, err := store.Put(ctx, wsID, []byte("data"), "x.txt", "text/plain") + if err != nil { + t.Fatalf("Put: %v", err) + } + + // Manually backdate expires_at so the row IS expired. We don't ack, + // so this exercises the unacked-and-expired branch of the WHERE + // clause specifically. + if _, err := conn.ExecContext(ctx, + `UPDATE pending_uploads SET expires_at = now() - interval '1 minute' WHERE file_id = $1`, + fid, + ); err != nil { + t.Fatalf("backdate: %v", err) + } + + res, err := store.Sweep(ctx, time.Hour) + if err != nil { + t.Fatalf("Sweep: %v", err) + } + if res.Acked != 0 || res.Expired != 1 { + t.Errorf("expected acked=0 expired=1, got %+v", res) + } +} + +func TestIntegration_PendingUploads_Sweep_DeletesBothCategoriesInOneCycle(t *testing.T) { + conn := integrationDB_PendingUploads(t) + store := pendinguploads.NewPostgres(conn) + ctx := context.Background() + + wsID := uuid.New() + + // Three rows: one acked (eligible at retention=0), one expired + // unacked, one fresh unacked (must NOT be deleted). + ackedFID, err := store.Put(ctx, wsID, []byte("acked"), "a.txt", "text/plain") + if err != nil { + t.Fatalf("Put acked: %v", err) + } + if err := store.Ack(ctx, ackedFID); err != nil { + t.Fatalf("Ack: %v", err) + } + + expiredFID, err := store.Put(ctx, wsID, []byte("expired"), "e.txt", "text/plain") + if err != nil { + t.Fatalf("Put expired: %v", err) + } + if _, err := conn.ExecContext(ctx, + `UPDATE pending_uploads SET expires_at = now() - interval '1 minute' WHERE file_id = $1`, + expiredFID, + ); err != nil { + t.Fatalf("backdate: %v", err) + } + + freshFID, err := store.Put(ctx, wsID, []byte("fresh"), "f.txt", "text/plain") + if err != nil { + t.Fatalf("Put fresh: %v", err) + } + + res, err := store.Sweep(ctx, 0) // retention=0 makes the acked row eligible + if err != nil { + t.Fatalf("Sweep: %v", err) + } + if res.Acked != 1 || res.Expired != 1 { + t.Errorf("expected acked=1 expired=1, got %+v", res) + } + + // Fresh row survives. + rec, err := store.Get(ctx, freshFID) + if err != nil { + t.Errorf("fresh row should still be Get-able, got err=%v", err) + } + if rec.FileID != freshFID { + t.Errorf("fresh row file_id = %s, want %s", rec.FileID, freshFID) + } +} + +func TestIntegration_PendingUploads_PutEnforcesSizeCap(t *testing.T) { + conn := integrationDB_PendingUploads(t) + store := pendinguploads.NewPostgres(conn) + ctx := context.Background() + + wsID := uuid.New() + tooBig := make([]byte, pendinguploads.MaxFileBytes+1) + if _, err := store.Put(ctx, wsID, tooBig, "big.bin", "application/octet-stream"); err != pendinguploads.ErrTooLarge { + t.Errorf("expected ErrTooLarge, got %v", err) + } +} + +// TestIntegration_PendingUploads_PutBatch_HappyPath_AllRowsCommit pins the +// "all rows commit" leg of the PutBatch atomicity contract against a real +// Postgres. sqlmock can't catch a regression where the Go-side Tx machinery +// silently no-ops the inserts (e.g., wrong driver options on BeginTx); only +// COUNT(*) on the real table can. +func TestIntegration_PendingUploads_PutBatch_HappyPath_AllRowsCommit(t *testing.T) { + conn := integrationDB_PendingUploads(t) + store := pendinguploads.NewPostgres(conn) + ctx := context.Background() + + wsID := uuid.New() + + // Pre-existing row so the COUNT(*) baseline is non-zero — proves + // PutBatch adds rows incrementally rather than overwriting. + if _, err := store.Put(ctx, wsID, []byte("seed"), "seed.txt", "text/plain"); err != nil { + t.Fatalf("seed Put: %v", err) + } + + items := []pendinguploads.PutItem{ + {Content: []byte("alpha"), Filename: "alpha.txt", Mimetype: "text/plain"}, + {Content: []byte("beta"), Filename: "beta.bin", Mimetype: "application/octet-stream"}, + {Content: []byte("gamma"), Filename: "gamma.pdf", Mimetype: "application/pdf"}, + } + ids, err := store.PutBatch(ctx, wsID, items) + if err != nil { + t.Fatalf("PutBatch: %v", err) + } + if len(ids) != len(items) { + t.Fatalf("ids length %d, want %d", len(ids), len(items)) + } + + // Each returned id round-trips through Get with the right content. + for i, id := range ids { + rec, err := store.Get(ctx, id) + if err != nil { + t.Fatalf("Get item %d (%s): %v", i, id, err) + } + if string(rec.Content) != string(items[i].Content) { + t.Errorf("item %d content = %q, want %q", i, rec.Content, items[i].Content) + } + if rec.Filename != items[i].Filename { + t.Errorf("item %d filename = %q, want %q", i, rec.Filename, items[i].Filename) + } + } + + var n int + if err := conn.QueryRowContext(ctx, `SELECT COUNT(*) FROM pending_uploads WHERE workspace_id = $1`, wsID).Scan(&n); err != nil { + t.Fatalf("count: %v", err) + } + if n != 4 { + t.Errorf("workspace row count = %d, want 4 (1 seed + 3 batch)", n) + } +} + +// TestIntegration_PendingUploads_PutBatch_AtomicRollback_NoLeakOnFailure +// proves the all-or-nothing contract end-to-end against real Postgres MVCC. +// +// Strategy: build a 3-item batch where item index 1 carries a filename with +// an embedded NUL byte. lib/pq rejects NULs in TEXT columns at the protocol +// layer (`pq: invalid byte sequence for encoding "UTF8": 0x00`), which +// triggers the per-row INSERT error path in PutBatch. The first item's +// INSERT…RETURNING already wrote a row to the Tx's snapshot, so a buggy +// rollback would leave that row visible after PutBatch returns. +// +// Postgrest semantics: ROLLBACK is the only way a real DB can guarantee the +// "no leak" contract; a unit test with sqlmock can prove the Go function +// CALLED Rollback, but only this integration test proves Postgres actually +// HONORED it. +func TestIntegration_PendingUploads_PutBatch_AtomicRollback_NoLeakOnFailure(t *testing.T) { + conn := integrationDB_PendingUploads(t) + store := pendinguploads.NewPostgres(conn) + ctx := context.Background() + + wsID := uuid.New() + + // Baseline COUNT(*) for this workspace — must remain 0 after a failed batch. + var before int + if err := conn.QueryRowContext(ctx, `SELECT COUNT(*) FROM pending_uploads WHERE workspace_id = $1`, wsID).Scan(&before); err != nil { + t.Fatalf("baseline count: %v", err) + } + if before != 0 { + t.Fatalf("workspace not isolated: baseline = %d, want 0", before) + } + + // Item 1 has a NUL byte in the filename — Go-side pre-validation + // (which only checks empty/length) lets it through, so the INSERT + // reaches lib/pq, which rejects it at the protocol level. That's the + // canonical "DB-side error mid-batch" we want to exercise. + items := []pendinguploads.PutItem{ + {Content: []byte("ok"), Filename: "ok.txt", Mimetype: "text/plain"}, + {Content: []byte("bad"), Filename: "bad\x00name.txt", Mimetype: "text/plain"}, + {Content: []byte("never"), Filename: "never.txt", Mimetype: "text/plain"}, + } + _, err := store.PutBatch(ctx, wsID, items) + if err == nil { + t.Fatalf("expected error from NUL-byte filename, got nil") + } + + // THE assertion this whole test exists for: even though item 0's + // INSERT…RETURNING succeeded inside the Tx, the rollback unwound + // it — zero rows for this workspace, not one (let alone three). + var after int + if err := conn.QueryRowContext(ctx, `SELECT COUNT(*) FROM pending_uploads WHERE workspace_id = $1`, wsID).Scan(&after); err != nil { + t.Fatalf("post-failure count: %v", err) + } + if after != 0 { + t.Errorf("Tx rollback leaked rows: workspace count = %d, want 0", after) + } +} + +// TestIntegration_PendingUploads_PutBatch_Oversize_NoTxOpened verifies the +// pre-validation short-circuit: an oversized item rejects with ErrTooLarge +// BEFORE any Tx opens, so the table is untouched. The unit test (sqlmock +// with zero expectations) catches the Go-side path; this test sanity-checks +// no real DB I/O happens by confirming COUNT(*) doesn't move. +func TestIntegration_PendingUploads_PutBatch_Oversize_NoTxOpened(t *testing.T) { + conn := integrationDB_PendingUploads(t) + store := pendinguploads.NewPostgres(conn) + ctx := context.Background() + + wsID := uuid.New() + tooBig := make([]byte, pendinguploads.MaxFileBytes+1) + _, err := store.PutBatch(ctx, wsID, []pendinguploads.PutItem{ + {Content: []byte("ok"), Filename: "ok.txt"}, + {Content: tooBig, Filename: "too-big.bin"}, + }) + if err != pendinguploads.ErrTooLarge { + t.Fatalf("expected ErrTooLarge, got %v", err) + } + var n int + if err := conn.QueryRowContext(ctx, `SELECT COUNT(*) FROM pending_uploads WHERE workspace_id = $1`, wsID).Scan(&n); err != nil { + t.Fatalf("count: %v", err) + } + if n != 0 { + t.Errorf("pre-validation did NOT short-circuit: count = %d, want 0", n) + } +} + +// TestIntegration_PendingUploads_AckedIndexExists verifies the Phase 5a +// migration (20260505200000_pending_uploads_acked_index.up.sql) actually +// created idx_pending_uploads_acked with the right partial-index predicate. +// +// Why pg_indexes and not EXPLAIN: the planner prefers Seq Scan on tiny +// tables regardless of available indexes — a plan-shape check would be +// flaky under real test loads. The contract we care about is "the index +// exists with the predicate we wrote in the migration"; pg_indexes is +// the canonical source for that, robust to row count and planner version. +func TestIntegration_PendingUploads_AckedIndexExists(t *testing.T) { + conn := integrationDB_PendingUploads(t) + ctx := context.Background() + + var indexdef string + err := conn.QueryRowContext(ctx, ` + SELECT indexdef FROM pg_indexes + WHERE schemaname = 'public' + AND tablename = 'pending_uploads' + AND indexname = 'idx_pending_uploads_acked' + `).Scan(&indexdef) + if err == sql.ErrNoRows { + t.Fatal("idx_pending_uploads_acked is missing — migration 20260505200000 not applied") + } + if err != nil { + t.Fatalf("pg_indexes query: %v", err) + } + + // Pin the partial-index predicate. Without "WHERE acked_at IS NOT NULL" + // we'd be indexing the entire table (defeats the point — most rows are + // unacked), and the existing idx_pending_uploads_unacked already covers + // the inverse predicate. + if !strings.Contains(indexdef, "(acked_at)") { + t.Errorf("index missing acked_at column: %s", indexdef) + } + if !strings.Contains(indexdef, "WHERE (acked_at IS NOT NULL)") { + t.Errorf("index missing partial predicate: %s", indexdef) + } +} + +func TestIntegration_PendingUploads_GetIgnoresExpiredAndAcked(t *testing.T) { + conn := integrationDB_PendingUploads(t) + store := pendinguploads.NewPostgres(conn) + ctx := context.Background() + + wsID := uuid.New() + fid, err := store.Put(ctx, wsID, []byte("data"), "x.txt", "text/plain") + if err != nil { + t.Fatalf("Put: %v", err) + } + + // Backdate expires_at — Get must return ErrNotFound, even though the + // row physically exists in the table (Sweep hasn't run). + if _, err := conn.ExecContext(ctx, + `UPDATE pending_uploads SET expires_at = now() - interval '1 minute' WHERE file_id = $1`, + fid, + ); err != nil { + t.Fatalf("backdate: %v", err) + } + if _, err := store.Get(ctx, fid); err != pendinguploads.ErrNotFound { + t.Errorf("Get after expiry: got %v, want ErrNotFound", err) + } +} diff --git a/workspace-server/internal/handlers/pending_uploads_test.go b/workspace-server/internal/handlers/pending_uploads_test.go index 17da24af..778e8170 100644 --- a/workspace-server/internal/handlers/pending_uploads_test.go +++ b/workspace-server/internal/handlers/pending_uploads_test.go @@ -71,6 +71,20 @@ func (f *fakeStorage) Ack(_ context.Context, fileID uuid.UUID) error { return nil } +// Sweep is required by the Storage interface (Phase 3 GC). Not exercised +// by these handler tests — the dedicated sweeper_test.go covers it. +func (f *fakeStorage) Sweep(_ context.Context, _ time.Duration) (pendinguploads.SweepResult, error) { + return pendinguploads.SweepResult{}, nil +} + +// PutBatch is required by the Storage interface; the upload handler +// tests live in chat_files_poll_test.go and use a separate fake +// (inMemStorage). Stubbed here because the Get/Ack tests don't drive +// PutBatch, but the interface must be satisfied. +func (f *fakeStorage) PutBatch(_ context.Context, _ uuid.UUID, _ []pendinguploads.PutItem) ([]uuid.UUID, error) { + return nil, nil +} + func newRouter(handler *handlers.PendingUploadsHandler) *gin.Engine { gin.SetMode(gin.TestMode) r := gin.New() diff --git a/workspace-server/internal/handlers/provlog_emit_test.go b/workspace-server/internal/handlers/provlog_emit_test.go new file mode 100644 index 00000000..6681c203 --- /dev/null +++ b/workspace-server/internal/handlers/provlog_emit_test.go @@ -0,0 +1,112 @@ +package handlers + +// provlog_emit_test.go — pins that the structured-logging emit sites +// added for #2867 PR-D actually fire when their boundary is crossed. +// +// These are call-site contract tests, not provlog package tests (those +// live next to the helper). The assertion is "this dispatcher path +// emits this event name" — if a refactor moves the call out of the +// boundary helper, the gate fails. Fields are NOT pinned here on +// purpose; the field set is convenience for ops, not contract for the +// emit point. Pinning fields would block additive evolution of the +// payload (see also feedback_behavior_based_ast_gates.md). + +import ( + "bytes" + "context" + "log" + "strings" + "sync" + "testing" + + "github.com/Molecule-AI/molecule-monorepo/platform/internal/models" +) + +// captureProvLog redirects the global logger to a buffer for the test +// duration. provlog.Event uses log.Printf, so this is the only seam. +// Returned mutex protects against concurrent reads from the goroutine +// fired by provisionWorkspaceAuto (the goroutine never returns in +// these tests because Start() is stubbed, but the buffer can still be +// touched by it racing the assertion). +func captureProvLog(t *testing.T) (read func() string) { + t.Helper() + var buf bytes.Buffer + var mu sync.Mutex + prevWriter := log.Writer() + prevFlags := log.Flags() + log.SetFlags(0) + log.SetOutput(&safeWriter{buf: &buf, mu: &mu}) + t.Cleanup(func() { + log.SetOutput(prevWriter) + log.SetFlags(prevFlags) + }) + return func() string { + mu.Lock() + defer mu.Unlock() + return buf.String() + } +} + +// TestProvisionWorkspaceAutoSync_EmitsProvisionStart — sync variant is +// chosen for the assertion path because it returns once the (stubbed) +// Start() has been called, so we know the emit has flushed. The async +// variant would race a goroutine. +func TestProvisionWorkspaceAutoSync_EmitsProvisionStart(t *testing.T) { + read := captureProvLog(t) + h := &WorkspaceHandler{cpProv: &trackingCPProv{}} + // Best-effort: the body will hit DB code under provisionWorkspaceCP + // — we only need the emit at the entry, which fires unconditionally + // before the dispatch. Recovering from any later panic keeps the + // test focused. + defer func() { _ = recover() }() + h.provisionWorkspaceAutoSync("ws-test-1", "tmpl", nil, models.CreateWorkspacePayload{ + Name: "n", Tier: 4, Runtime: "claude-code", + }) + got := read() + if !strings.Contains(got, "evt: provision.start ") { + t.Fatalf("expected provision.start emit, got log:\n%s", got) + } + if !strings.Contains(got, `"workspace_id":"ws-test-1"`) { + t.Errorf("workspace_id not in payload: %s", got) + } + if !strings.Contains(got, `"sync":true`) { + t.Errorf("sync flag not pinned for sync dispatcher: %s", got) + } +} + +// TestStopForRestart_EmitsRestartPreStop — emit fires before the actual +// Stop call, so the trackingCPProv stub doesn't need to be wired for +// real Stop semantics. Backend label "cp" pinned because that's the +// SaaS path; we don't pin "docker" or "none" branches here (separate +// tests would only re-test the trivial branch label switch). +func TestStopForRestart_EmitsRestartPreStop(t *testing.T) { + read := captureProvLog(t) + h := &WorkspaceHandler{cpProv: &trackingCPProv{}} + defer func() { _ = recover() }() + h.stopForRestart(context.Background(), "ws-restart-1") + got := read() + if !strings.Contains(got, "evt: restart.pre_stop ") { + t.Fatalf("expected restart.pre_stop emit, got log:\n%s", got) + } + if !strings.Contains(got, `"workspace_id":"ws-restart-1"`) { + t.Errorf("workspace_id not in payload: %s", got) + } + if !strings.Contains(got, `"backend":"cp"`) { + t.Errorf("backend label missing or wrong: %s", got) + } +} + +// TestStopForRestart_EmitsBackendNoneWhenUnwired — pin the no-backend +// branch so a future refactor that drops the label switch is caught. +// This is the silent-Stop case (workspace_dispatchers.go:StopWorkspaceAuto +// returns nil for unwired backends); the emit ensures the operator can +// still see the boundary in the log. +func TestStopForRestart_EmitsBackendNoneWhenUnwired(t *testing.T) { + read := captureProvLog(t) + h := &WorkspaceHandler{} // both nil + h.stopForRestart(context.Background(), "ws-restart-2") + got := read() + if !strings.Contains(got, `"backend":"none"`) { + t.Fatalf("expected backend=none for unwired handler: %s", got) + } +} diff --git a/workspace-server/internal/handlers/saas_default_tier_test.go b/workspace-server/internal/handlers/saas_default_tier_test.go new file mode 100644 index 00000000..c4d32a94 --- /dev/null +++ b/workspace-server/internal/handlers/saas_default_tier_test.go @@ -0,0 +1,99 @@ +package handlers + +import ( + "strings" + "testing" + + "github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner" +) + +// Tests for the SaaS-aware default-tier resolution introduced in #2901 +// and hardened in #2910 (multi-model review of #2901 found the original +// claim of "all green" was passing because no SaaS-mode test existed). +// +// These tests pin three invariants: +// +// 1. WorkspaceHandler.IsSaaS() returns true when cpProv is wired, +// false otherwise. +// 2. WorkspaceHandler.DefaultTier() returns 4 on SaaS, 3 self-hosted. +// 3. generateDefaultConfig (TemplatesHandler.Import path) writes the +// passed-in tier into the generated config.yaml — pre-#2910 it +// was hardcoded to 3 and silently disagreed with the create- +// handler default on SaaS. + +// stubCPProv is a minimal stand-in for the CP provisioner — only +// exercises the IsSaaS / HasProvisioner contract, never invoked in +// these tests. +type stubCPProv struct{} + +func (stubCPProv) Start(_ interface{}, _ provisioner.WorkspaceConfig) (string, error) { + return "", nil +} +func (stubCPProv) Stop(_ interface{}, _ string) error { return nil } +func (stubCPProv) Restart(_ interface{}, _ provisioner.WorkspaceConfig) (string, error) { + return "", nil +} + +func TestIsSaaS_TrueWhenCPProvWired(t *testing.T) { + h := &WorkspaceHandler{cpProv: &trackingCPProv{}} + if !h.IsSaaS() { + t.Errorf("IsSaaS()=false with cpProv wired; expected true") + } +} + +func TestIsSaaS_FalseWhenOnlyDocker(t *testing.T) { + // provisioner field set, cpProv nil — the self-hosted path. + // Use a non-nil sentinel so the check actually has something to + // disagree with. trackingCPProv lives in workspace_provision_auto_test.go + // and is the established stub for these handler-level tests. + h := &WorkspaceHandler{provisioner: nil, cpProv: nil} + if h.IsSaaS() { + t.Errorf("IsSaaS()=true with both backends nil; expected false") + } +} + +func TestDefaultTier_SaaS_IsT4(t *testing.T) { + h := &WorkspaceHandler{cpProv: &trackingCPProv{}} + if got := h.DefaultTier(); got != 4 { + t.Errorf("SaaS DefaultTier()=%d; expected 4", got) + } +} + +func TestDefaultTier_SelfHosted_IsT3(t *testing.T) { + h := &WorkspaceHandler{} + if got := h.DefaultTier(); got != 3 { + t.Errorf("self-hosted DefaultTier()=%d; expected 3", got) + } +} + +// generateDefaultConfig — pin that the tier param flows into the +// emitted config.yaml verbatim. Pre-#2910 this was hardcoded "tier: 3" +// regardless of caller intent. +func TestGenerateDefaultConfig_RespectsTierParam(t *testing.T) { + cfg := generateDefaultConfig("Test Agent", map[string]string{"system-prompt.md": ""}, 4) + if !strings.Contains(cfg, "tier: 4\n") { + t.Errorf("expected `tier: 4` in generated config, got:\n%s", cfg) + } + // The pre-#2910 hardcoded `tier: 3` line must NOT appear. + if strings.Contains(cfg, "tier: 3\n") { + t.Errorf("config should not contain `tier: 3` when caller passed 4, got:\n%s", cfg) + } +} + +func TestGenerateDefaultConfig_SelfHostedTierT3(t *testing.T) { + cfg := generateDefaultConfig("Test Agent", map[string]string{"system-prompt.md": ""}, 3) + if !strings.Contains(cfg, "tier: 3\n") { + t.Errorf("expected `tier: 3` in generated config, got:\n%s", cfg) + } +} + +// Bounds check — caller passes 0 or out-of-range, helper falls back +// to T3 (the safer-of-the-two when deployment mode can't be resolved). +func TestGenerateDefaultConfig_OutOfRangeFallsBackToT3(t *testing.T) { + for _, tier := range []int{0, -1, 99} { + cfg := generateDefaultConfig("X", map[string]string{}, tier) + if !strings.Contains(cfg, "tier: 3\n") { + t.Errorf("invalid tier %d should fall back to T3, got:\n%s", tier, cfg) + } + } +} diff --git a/workspace-server/internal/handlers/security_regression_685_686_687_688_test.go b/workspace-server/internal/handlers/security_regression_685_686_687_688_test.go index f8d4fcb9..aa35a517 100644 --- a/workspace-server/internal/handlers/security_regression_685_686_687_688_test.go +++ b/workspace-server/internal/handlers/security_regression_685_686_687_688_test.go @@ -71,7 +71,7 @@ func TestSecurity_GetTemplates_NoAuth_Returns401(t *testing.T) { authDB, authMock := newEnrolledAuthDB(t) tmpDir := t.TempDir() - tmplh := NewTemplatesHandler(tmpDir, nil) + tmplh := NewTemplatesHandler(tmpDir, nil, nil) r := gin.New() r.GET("/templates", middleware.AdminAuth(authDB), tmplh.List) @@ -98,7 +98,7 @@ func TestSecurity_GetTemplates_FreshInstall_FailsOpen(t *testing.T) { authDB, authMock := newFreshInstallAuthDB(t) tmpDir := t.TempDir() - tmplh := NewTemplatesHandler(tmpDir, nil) + tmplh := NewTemplatesHandler(tmpDir, nil, nil) r := gin.New() r.GET("/templates", middleware.AdminAuth(authDB), tmplh.List) diff --git a/workspace-server/internal/handlers/team.go b/workspace-server/internal/handlers/team.go deleted file mode 100644 index 0c536020..00000000 --- a/workspace-server/internal/handlers/team.go +++ /dev/null @@ -1,132 +0,0 @@ -package handlers - -import ( - "encoding/json" - "log" - "net/http" - "os" - "path/filepath" - - "github.com/Molecule-AI/molecule-monorepo/platform/internal/db" - "github.com/Molecule-AI/molecule-monorepo/platform/internal/events" - "github.com/Molecule-AI/molecule-monorepo/platform/internal/models" - "github.com/gin-gonic/gin" - "gopkg.in/yaml.v3" -) - -// TeamHandler now hosts only Collapse — the visual "expand" action is -// canvas-side and creating children goes through the regular -// WorkspaceHandler.Create path with parent_id set, like any other -// workspace. Every workspace can have children; "team" is just the -// state of having children. The old Expand handler bulk-created -// children by reading sub_workspaces from a parent's config and was -// non-idempotent — calling it N times leaked N×children EC2s, which -// is how tenant-hongming accumulated 72 stale workspaces. -type TeamHandler struct { - wh *WorkspaceHandler - b *events.Broadcaster -} - -// NewTeamHandler constructs a TeamHandler. wh is used by Collapse to -// route StopWorkspaceAuto through the backend dispatcher. -func NewTeamHandler(b *events.Broadcaster, wh *WorkspaceHandler, platformURL, configsDir string) *TeamHandler { - return &TeamHandler{wh: wh, b: b} -} - -// Collapse handles POST /workspaces/:id/collapse -// Stops and removes all child workspaces. -func (h *TeamHandler) Collapse(c *gin.Context) { - parentID := c.Param("id") - ctx := c.Request.Context() - - // Find children - rows, err := db.DB.QueryContext(ctx, - `SELECT id, name FROM workspaces WHERE parent_id = $1 AND status != 'removed'`, parentID) - if err != nil { - c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to query children"}) - return - } - defer rows.Close() - - removed := make([]string, 0) - for rows.Next() { - var childID, childName string - if rows.Scan(&childID, &childName) != nil { - continue - } - - // Stop the workload via the backend dispatcher (CP for SaaS, - // Docker for self-hosted). Pre-2026-05-05 this was - // `if h.provisioner != nil { h.provisioner.Stop(...) }`, which - // silently skipped on every SaaS tenant — child EC2s kept running - // after team-collapse until the orphan sweeper caught them - // (issue #2813). - if err := h.wh.StopWorkspaceAuto(ctx, childID); err != nil { - log.Printf("Team collapse: stop %s failed: %v — orphan sweeper will reconcile", childID, err) - } - - // Mark as removed - if _, err := db.DB.ExecContext(ctx, - `UPDATE workspaces SET status = $1, updated_at = now() WHERE id = $2`, models.StatusRemoved, childID); err != nil { - log.Printf("Team collapse: failed to remove workspace %s: %v", childID, err) - } - if _, err := db.DB.ExecContext(ctx, - `DELETE FROM canvas_layouts WHERE workspace_id = $1`, childID); err != nil { - log.Printf("Team collapse: failed to delete layout for %s: %v", childID, err) - } - - h.b.RecordAndBroadcast(ctx, "WORKSPACE_REMOVED", childID, map[string]interface{}{}) - - removed = append(removed, childName) - } - - h.b.RecordAndBroadcast(ctx, "WORKSPACE_COLLAPSED", parentID, map[string]interface{}{ - "removed_children": removed, - }) - - c.JSON(http.StatusOK, gin.H{ - "status": "collapsed", - "removed": removed, - }) -} - -// findTemplateDirByName resolves a workspace name to its template -// directory. Kept here because callers outside this package may use -// it, even though the in-package consumer (Expand) is gone. -// -// TODO: relocate alongside the templates handler if no other callers -// surface, or delete entirely after a deprecation cycle. -func findTemplateDirByName(configsDir, name string) string { - normalized := normalizeName(name) - - candidate := filepath.Join(configsDir, normalized) - if _, err := os.Stat(filepath.Join(candidate, "config.yaml")); err == nil { - return candidate - } - - // Fall back to scanning all dirs - entries, err := os.ReadDir(configsDir) - if err != nil { - return "" - } - for _, e := range entries { - if !e.IsDir() { - continue - } - cfgPath := filepath.Join(configsDir, e.Name(), "config.yaml") - data, err := os.ReadFile(cfgPath) - if err != nil { - continue - } - var cfg struct { - Name string `yaml:"name"` - } - if json.Unmarshal(data, &cfg) == nil && cfg.Name == name { - return filepath.Join(configsDir, e.Name()) - } - if yaml.Unmarshal(data, &cfg) == nil && cfg.Name == name { - return filepath.Join(configsDir, e.Name()) - } - } - return "" -} diff --git a/workspace-server/internal/handlers/team_test.go b/workspace-server/internal/handlers/team_test.go deleted file mode 100644 index e87a92ae..00000000 --- a/workspace-server/internal/handlers/team_test.go +++ /dev/null @@ -1,130 +0,0 @@ -package handlers - -import ( - "encoding/json" - "net/http" - "net/http/httptest" - "os" - "path/filepath" - "testing" - - "github.com/DATA-DOG/go-sqlmock" - "github.com/gin-gonic/gin" -) - -// ---------- TeamHandler: Collapse ---------- - -func TestTeamCollapse_NoChildren(t *testing.T) { - mock := setupTestDB(t) - setupTestRedis(t) - broadcaster := newTestBroadcaster() - handler := NewTeamHandler(broadcaster, NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()), "http://localhost:8080", "/tmp/configs") - - // No children - mock.ExpectQuery("SELECT id, name FROM workspaces WHERE parent_id"). - WithArgs("ws-parent"). - WillReturnRows(sqlmock.NewRows([]string{"id", "name"})) - - // WORKSPACE_COLLAPSED broadcast - mock.ExpectExec("INSERT INTO structure_events"). - WillReturnResult(sqlmock.NewResult(0, 1)) - - w := httptest.NewRecorder() - c, _ := gin.CreateTestContext(w) - c.Params = gin.Params{{Key: "id", Value: "ws-parent"}} - c.Request = httptest.NewRequest("POST", "/", nil) - - handler.Collapse(c) - - if w.Code != http.StatusOK { - t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String()) - } - var resp map[string]interface{} - json.Unmarshal(w.Body.Bytes(), &resp) - if resp["status"] != "collapsed" { - t.Errorf("expected status 'collapsed', got %v", resp["status"]) - } -} - -func TestTeamCollapse_WithChildren(t *testing.T) { - mock := setupTestDB(t) - setupTestRedis(t) - broadcaster := newTestBroadcaster() - handler := NewTeamHandler(broadcaster, NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()), "http://localhost:8080", "/tmp/configs") - - // Two children - mock.ExpectQuery("SELECT id, name FROM workspaces WHERE parent_id"). - WithArgs("ws-parent"). - WillReturnRows(sqlmock.NewRows([]string{"id", "name"}). - AddRow("child-1", "Worker A"). - AddRow("child-2", "Worker B")) - - // UPDATE + DELETE + broadcast for child-1 - mock.ExpectExec("UPDATE workspaces SET status ="). - WithArgs("child-1"). - WillReturnResult(sqlmock.NewResult(0, 1)) - mock.ExpectExec("DELETE FROM canvas_layouts"). - WithArgs("child-1"). - WillReturnResult(sqlmock.NewResult(0, 1)) - mock.ExpectExec("INSERT INTO structure_events"). - WillReturnResult(sqlmock.NewResult(0, 1)) - - // UPDATE + DELETE + broadcast for child-2 - mock.ExpectExec("UPDATE workspaces SET status ="). - WithArgs("child-2"). - WillReturnResult(sqlmock.NewResult(0, 1)) - mock.ExpectExec("DELETE FROM canvas_layouts"). - WithArgs("child-2"). - WillReturnResult(sqlmock.NewResult(0, 1)) - mock.ExpectExec("INSERT INTO structure_events"). - WillReturnResult(sqlmock.NewResult(0, 1)) - - // WORKSPACE_COLLAPSED broadcast for parent - mock.ExpectExec("INSERT INTO structure_events"). - WillReturnResult(sqlmock.NewResult(0, 1)) - - w := httptest.NewRecorder() - c, _ := gin.CreateTestContext(w) - c.Params = gin.Params{{Key: "id", Value: "ws-parent"}} - c.Request = httptest.NewRequest("POST", "/", nil) - - handler.Collapse(c) - - if w.Code != http.StatusOK { - t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String()) - } - var resp map[string]interface{} - json.Unmarshal(w.Body.Bytes(), &resp) - removed, ok := resp["removed"].([]interface{}) - if !ok || len(removed) != 2 { - t.Errorf("expected 2 removed children, got %v", resp["removed"]) - } -} -// ---------- findTemplateDirByName helper ---------- - -func TestFindTemplateDirByName_DirectMatch(t *testing.T) { - dir := t.TempDir() - subDir := filepath.Join(dir, "mybot") - os.MkdirAll(subDir, 0755) - os.WriteFile(filepath.Join(subDir, "config.yaml"), []byte("name: MyBot"), 0644) - - result := findTemplateDirByName(dir, "mybot") - if result != subDir { - t.Errorf("expected %s, got %s", subDir, result) - } -} - -func TestFindTemplateDirByName_NotFound(t *testing.T) { - dir := t.TempDir() - result := findTemplateDirByName(dir, "nonexistent") - if result != "" { - t.Errorf("expected empty string, got %s", result) - } -} - -func TestFindTemplateDirByName_InvalidConfigsDir(t *testing.T) { - result := findTemplateDirByName("/nonexistent/path", "anything") - if result != "" { - t.Errorf("expected empty string for invalid dir, got %s", result) - } -} diff --git a/workspace-server/internal/handlers/template_import.go b/workspace-server/internal/handlers/template_import.go index 7d4ab4d1..95b5854f 100644 --- a/workspace-server/internal/handlers/template_import.go +++ b/workspace-server/internal/handlers/template_import.go @@ -36,8 +36,14 @@ func normalizeName(name string) string { return result } -// generateDefaultConfig creates a config.yaml from detected prompt files and skills. -func generateDefaultConfig(name string, files map[string]string) string { +// generateDefaultConfig creates a config.yaml from detected prompt files +// and skills. tier is the deployment-aware default (caller passes +// h.wh.DefaultTier() — T4 on SaaS, T3 on self-hosted) so the generated +// file matches what POST /workspaces would default to. Pre-#2910 this +// was hardcoded to 3, which split-brained with the create-handler +// default on SaaS (T4) and pinned newly-imported templates at T3 even +// when downstream Create paths picked T4. +func generateDefaultConfig(name string, files map[string]string, tier int) string { promptFiles := []string{} skillSet := map[string]bool{} @@ -74,9 +80,15 @@ func generateDefaultConfig(name string, files map[string]string) string { var cfg strings.Builder cfg.WriteString(`name: "` + escaped + `"` + "\n") cfg.WriteString("description: Imported agent\n") - // Default to tier 3 ("Privileged") — matches the workspace.go - // create handler default. See its comment for rationale. - cfg.WriteString("version: 1.0.0\ntier: 3\n") + // Tier is SaaS-aware via the caller's DefaultTier (#2910 PR-B). + // Bounds-checked: invalid input falls back to T3 (the historical + // default + the safer-of-the-two when the deployment mode can't + // be resolved). + if tier < 1 || tier > 4 { + tier = 3 + } + cfg.WriteString("version: 1.0.0\n") + cfg.WriteString(fmt.Sprintf("tier: %d\n", tier)) cfg.WriteString("model: anthropic:claude-haiku-4-5-20251001\n") cfg.WriteString("\nprompt_files:\n") if len(promptFiles) > 0 { @@ -148,7 +160,11 @@ func (h *TemplatesHandler) Import(c *gin.Context) { // Auto-generate config.yaml if not provided if _, exists := body.Files["config.yaml"]; !exists { - cfg := generateDefaultConfig(body.Name, body.Files) + tier := 3 + if h.wh != nil { + tier = h.wh.DefaultTier() + } + cfg := generateDefaultConfig(body.Name, body.Files, tier) if err := os.WriteFile(filepath.Join(destDir, "config.yaml"), []byte(cfg), 0600); err != nil { c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to write config.yaml"}) return @@ -227,7 +243,11 @@ func (h *TemplatesHandler) ReplaceFiles(c *gin.Context) { if _, exists := body.Files["config.yaml"]; !exists { // Check if config.yaml exists in container if _, err := h.execInContainer(ctx, containerName, []string{"test", "-f", "/configs/config.yaml"}); err != nil { - cfg := generateDefaultConfig(wsName, body.Files) + tier := 3 + if h.wh != nil { + tier = h.wh.DefaultTier() + } + cfg := generateDefaultConfig(wsName, body.Files, tier) singleFile := map[string]string{"config.yaml": cfg} h.copyFilesToContainer(ctx, containerName, "/configs", singleFile) } diff --git a/workspace-server/internal/handlers/template_import_test.go b/workspace-server/internal/handlers/template_import_test.go index 42336844..c496f9c5 100644 --- a/workspace-server/internal/handlers/template_import_test.go +++ b/workspace-server/internal/handlers/template_import_test.go @@ -55,7 +55,7 @@ func TestGenerateDefaultConfig_WithFiles(t *testing.T) { "skills/review/templates.md": "Templates", } - cfg := generateDefaultConfig("Test Agent", files) + cfg := generateDefaultConfig("Test Agent", files, 3) // Name is emitted as a double-quoted scalar (#221 sanitizer). if !strings.Contains(cfg, `name: "Test Agent"`) { @@ -85,7 +85,7 @@ func TestGenerateDefaultConfig_Empty(t *testing.T) { "data/something.json": `{"key": "value"}`, } - cfg := generateDefaultConfig("Empty Agent", files) + cfg := generateDefaultConfig("Empty Agent", files, 3) if !strings.Contains(cfg, `name: "Empty Agent"`) { t.Errorf("config should contain quoted agent name, got:\n%s", cfg) @@ -134,7 +134,7 @@ func TestGenerateDefaultConfig_YAMLInjection(t *testing.T) { for _, tc := range adversarialCases { t.Run(tc.desc, func(t *testing.T) { - cfg := generateDefaultConfig(tc.name, map[string]string{}) + cfg := generateDefaultConfig(tc.name, map[string]string{}, 3) var parsed map[string]interface{} if err := yaml.Unmarshal([]byte(cfg), &parsed); err != nil { t.Fatalf("sanitized config does not parse as YAML: %v\n--- config ---\n%s", err, cfg) @@ -205,7 +205,7 @@ func TestImport_Success(t *testing.T) { setupTestRedis(t) tmpDir := t.TempDir() - handler := NewTemplatesHandler(tmpDir, nil) + handler := NewTemplatesHandler(tmpDir, nil, nil) body := `{ "name": "New Agent", @@ -245,7 +245,7 @@ func TestImport_MissingName(t *testing.T) { setupTestDB(t) setupTestRedis(t) - handler := NewTemplatesHandler(t.TempDir(), nil) + handler := NewTemplatesHandler(t.TempDir(), nil, nil) body := `{"files": {"test.md": "content"}}` @@ -265,7 +265,7 @@ func TestImport_TooManyFiles(t *testing.T) { setupTestDB(t) setupTestRedis(t) - handler := NewTemplatesHandler(t.TempDir(), nil) + handler := NewTemplatesHandler(t.TempDir(), nil, nil) files := make(map[string]string) for i := 0; i <= maxUploadFiles; i++ { @@ -296,7 +296,7 @@ func TestImport_AlreadyExists(t *testing.T) { tmpDir := t.TempDir() os.MkdirAll(filepath.Join(tmpDir, "existing-agent"), 0755) - handler := NewTemplatesHandler(tmpDir, nil) + handler := NewTemplatesHandler(tmpDir, nil, nil) body := `{"name": "Existing Agent", "files": {"test.md": "content"}}` @@ -317,7 +317,7 @@ func TestImport_WithConfigYaml(t *testing.T) { setupTestRedis(t) tmpDir := t.TempDir() - handler := NewTemplatesHandler(tmpDir, nil) + handler := NewTemplatesHandler(tmpDir, nil, nil) body := `{ "name": "Custom Agent", @@ -354,7 +354,7 @@ func TestReplaceFiles_MissingBody(t *testing.T) { setupTestDB(t) setupTestRedis(t) - handler := NewTemplatesHandler(t.TempDir(), nil) + handler := NewTemplatesHandler(t.TempDir(), nil, nil) w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) @@ -373,7 +373,7 @@ func TestReplaceFiles_TooManyFiles(t *testing.T) { setupTestDB(t) setupTestRedis(t) - handler := NewTemplatesHandler(t.TempDir(), nil) + handler := NewTemplatesHandler(t.TempDir(), nil, nil) files := make(map[string]string) for i := 0; i <= maxUploadFiles; i++ { @@ -398,7 +398,7 @@ func TestReplaceFiles_WorkspaceNotFound(t *testing.T) { mock := setupTestDB(t) setupTestRedis(t) - handler := NewTemplatesHandler(t.TempDir(), nil) + handler := NewTemplatesHandler(t.TempDir(), nil, nil) // ReplaceFiles now selects (name, instance_id, runtime) for the // restart-cascade. Match the full column list rather than just the @@ -429,7 +429,7 @@ func TestReplaceFiles_PathTraversal(t *testing.T) { mock := setupTestDB(t) setupTestRedis(t) - handler := NewTemplatesHandler(t.TempDir(), nil) + handler := NewTemplatesHandler(t.TempDir(), nil, nil) mock.ExpectQuery(`SELECT name, COALESCE\(instance_id, ''\), COALESCE\(runtime, ''\) FROM workspaces WHERE id =`). WithArgs("ws-rf-pt"). diff --git a/workspace-server/internal/handlers/templates.go b/workspace-server/internal/handlers/templates.go index d51dabcd..03776a5d 100644 --- a/workspace-server/internal/handlers/templates.go +++ b/workspace-server/internal/handlers/templates.go @@ -31,10 +31,20 @@ const maxUploadFiles = 200 type TemplatesHandler struct { configsDir string docker *client.Client + // wh is used by Import and ReplaceFiles to call DefaultTier() so a + // generated config.yaml's tier matches the SaaS-vs-self-hosted + // boundary (#2910 PR-B). nil-tolerant — the field is unused when + // the caller doesn't import templates that need a fresh config + // generated. + wh *WorkspaceHandler } -func NewTemplatesHandler(configsDir string, dockerCli *client.Client) *TemplatesHandler { - return &TemplatesHandler{configsDir: configsDir, docker: dockerCli} +// NewTemplatesHandler constructs a TemplatesHandler. wh may be nil for +// callers that only use the read-only template surfaces (List, +// ReadFile, ListFiles). Import + ReplaceFiles need wh non-nil so the +// generated config.yaml picks the SaaS-aware default tier. +func NewTemplatesHandler(configsDir string, dockerCli *client.Client, wh *WorkspaceHandler) *TemplatesHandler { + return &TemplatesHandler{configsDir: configsDir, docker: dockerCli, wh: wh} } // modelSpec describes a single supported model on a template: its id (sent diff --git a/workspace-server/internal/handlers/templates_test.go b/workspace-server/internal/handlers/templates_test.go index cbae8069..3d75bfd5 100644 --- a/workspace-server/internal/handlers/templates_test.go +++ b/workspace-server/internal/handlers/templates_test.go @@ -53,7 +53,7 @@ func TestTemplatesList_EmptyDir(t *testing.T) { setupTestRedis(t) tmpDir := t.TempDir() - handler := NewTemplatesHandler(tmpDir, nil) + handler := NewTemplatesHandler(tmpDir, nil, nil) w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) @@ -99,7 +99,7 @@ skills: // Create a directory without config.yaml (should be skipped) os.MkdirAll(filepath.Join(tmpDir, "no-config"), 0755) - handler := NewTemplatesHandler(tmpDir, nil) + handler := NewTemplatesHandler(tmpDir, nil, nil) w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) @@ -160,7 +160,7 @@ skills: [] t.Fatalf("write: %v", err) } - handler := NewTemplatesHandler(tmpDir, nil) + handler := NewTemplatesHandler(tmpDir, nil, nil) w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) c.Request = httptest.NewRequest("GET", "/templates", nil) @@ -237,7 +237,7 @@ skills: [] t.Fatalf("write: %v", err) } - handler := NewTemplatesHandler(tmpDir, nil) + handler := NewTemplatesHandler(tmpDir, nil, nil) w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) c.Request = httptest.NewRequest("GET", "/templates", nil) @@ -315,7 +315,7 @@ skills: [] t.Fatalf("write: %v", err) } - handler := NewTemplatesHandler(tmpDir, nil) + handler := NewTemplatesHandler(tmpDir, nil, nil) w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) c.Request = httptest.NewRequest("GET", "/templates", nil) @@ -434,7 +434,7 @@ skills: [] t.Fatalf("write: %v", err) } - handler := NewTemplatesHandler(tmpDir, nil) + handler := NewTemplatesHandler(tmpDir, nil, nil) w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) c.Request = httptest.NewRequest("GET", "/templates", nil) @@ -512,7 +512,7 @@ skills: [] t.Fatalf("write: %v", err) } - handler := NewTemplatesHandler(tmpDir, nil) + handler := NewTemplatesHandler(tmpDir, nil, nil) w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) c.Request = httptest.NewRequest("GET", "/templates", nil) @@ -555,7 +555,7 @@ skills: [] t.Fatalf("write: %v", err) } - handler := NewTemplatesHandler(tmpDir, nil) + handler := NewTemplatesHandler(tmpDir, nil, nil) w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) c.Request = httptest.NewRequest("GET", "/templates", nil) @@ -589,7 +589,7 @@ skills: [] t.Fatalf("write: %v", err) } - handler := NewTemplatesHandler(tmpDir, nil) + handler := NewTemplatesHandler(tmpDir, nil, nil) w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) c.Request = httptest.NewRequest("GET", "/templates", nil) @@ -661,7 +661,7 @@ skills: [] log.SetOutput(&logBuf) defer log.SetOutput(prevOutput) - handler := NewTemplatesHandler(tmpDir, nil) + handler := NewTemplatesHandler(tmpDir, nil, nil) w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) c.Request = httptest.NewRequest("GET", "/templates", nil) @@ -698,7 +698,7 @@ func TestTemplatesList_NonexistentDir(t *testing.T) { setupTestDB(t) setupTestRedis(t) - handler := NewTemplatesHandler("/nonexistent/path/to/templates", nil) + handler := NewTemplatesHandler("/nonexistent/path/to/templates", nil, nil) w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) @@ -723,7 +723,7 @@ func TestListFiles_InvalidRoot(t *testing.T) { mock := setupTestDB(t) setupTestRedis(t) - handler := NewTemplatesHandler(t.TempDir(), nil) + handler := NewTemplatesHandler(t.TempDir(), nil, nil) w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) @@ -748,7 +748,7 @@ func TestListFiles_WorkspaceNotFound(t *testing.T) { mock := setupTestDB(t) setupTestRedis(t) - handler := NewTemplatesHandler(t.TempDir(), nil) + handler := NewTemplatesHandler(t.TempDir(), nil, nil) mock.ExpectQuery("SELECT name FROM workspaces WHERE id ="). WithArgs("ws-nonexist"). @@ -775,7 +775,7 @@ func TestListFiles_FallbackToHost_NoTemplate(t *testing.T) { setupTestRedis(t) tmpDir := t.TempDir() - handler := NewTemplatesHandler(tmpDir, nil) // nil docker = no container + handler := NewTemplatesHandler(tmpDir, nil, nil) // nil docker = no container mock.ExpectQuery("SELECT name FROM workspaces WHERE id ="). WithArgs("ws-fallback"). @@ -815,7 +815,7 @@ func TestListFiles_FallbackToHost_WithTemplate(t *testing.T) { os.WriteFile(filepath.Join(tmplDir, "config.yaml"), []byte("name: Test Agent\n"), 0644) os.WriteFile(filepath.Join(tmplDir, "system-prompt.md"), []byte("# prompt"), 0644) - handler := NewTemplatesHandler(tmpDir, nil) + handler := NewTemplatesHandler(tmpDir, nil, nil) mock.ExpectQuery("SELECT name FROM workspaces WHERE id ="). WithArgs("ws-tmpl"). @@ -849,7 +849,7 @@ func TestReadFile_PathTraversal(t *testing.T) { setupTestDB(t) setupTestRedis(t) - handler := NewTemplatesHandler(t.TempDir(), nil) + handler := NewTemplatesHandler(t.TempDir(), nil, nil) w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) @@ -870,7 +870,7 @@ func TestReadFile_InvalidRoot(t *testing.T) { setupTestDB(t) setupTestRedis(t) - handler := NewTemplatesHandler(t.TempDir(), nil) + handler := NewTemplatesHandler(t.TempDir(), nil, nil) w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) @@ -892,7 +892,7 @@ func TestReadFile_WorkspaceNotFound(t *testing.T) { mock := setupTestDB(t) setupTestRedis(t) - handler := NewTemplatesHandler(t.TempDir(), nil) + handler := NewTemplatesHandler(t.TempDir(), nil, nil) mock.ExpectQuery(`SELECT name, COALESCE\(instance_id, ''\), COALESCE\(runtime, ''\) FROM workspaces WHERE id =`). WithArgs("ws-nf"). @@ -926,7 +926,7 @@ func TestReadFile_FallbackToHost_Success(t *testing.T) { os.MkdirAll(tmplDir, 0755) os.WriteFile(filepath.Join(tmplDir, "config.yaml"), []byte("name: Reader Agent\ntier: 1\n"), 0644) - handler := NewTemplatesHandler(tmpDir, nil) + handler := NewTemplatesHandler(tmpDir, nil, nil) // instance_id="" → SaaS branch skipped → falls through to local // Docker / template-dir host fallback (the only path the test @@ -967,7 +967,7 @@ func TestReadFile_FallbackToHost_NotFound(t *testing.T) { setupTestRedis(t) tmpDir := t.TempDir() - handler := NewTemplatesHandler(tmpDir, nil) + handler := NewTemplatesHandler(tmpDir, nil, nil) mock.ExpectQuery(`SELECT name, COALESCE\(instance_id, ''\), COALESCE\(runtime, ''\) FROM workspaces WHERE id =`). WithArgs("ws-nofile"). @@ -999,7 +999,7 @@ func TestWriteFile_PathTraversal(t *testing.T) { setupTestDB(t) setupTestRedis(t) - handler := NewTemplatesHandler(t.TempDir(), nil) + handler := NewTemplatesHandler(t.TempDir(), nil, nil) w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) @@ -1023,7 +1023,7 @@ func TestWriteFile_InvalidBody(t *testing.T) { setupTestDB(t) setupTestRedis(t) - handler := NewTemplatesHandler(t.TempDir(), nil) + handler := NewTemplatesHandler(t.TempDir(), nil, nil) w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) @@ -1046,7 +1046,7 @@ func TestWriteFile_WorkspaceNotFound(t *testing.T) { mock := setupTestDB(t) setupTestRedis(t) - handler := NewTemplatesHandler(t.TempDir(), nil) + handler := NewTemplatesHandler(t.TempDir(), nil, nil) mock.ExpectQuery(`SELECT name, COALESCE\(instance_id, ''\), COALESCE\(runtime, ''\) FROM workspaces WHERE id =`). WithArgs("ws-wf-nf"). @@ -1080,7 +1080,7 @@ func TestDeleteFile_PathTraversal(t *testing.T) { setupTestDB(t) setupTestRedis(t) - handler := NewTemplatesHandler(t.TempDir(), nil) + handler := NewTemplatesHandler(t.TempDir(), nil, nil) w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) @@ -1101,7 +1101,7 @@ func TestDeleteFile_WorkspaceNotFound(t *testing.T) { mock := setupTestDB(t) setupTestRedis(t) - handler := NewTemplatesHandler(t.TempDir(), nil) + handler := NewTemplatesHandler(t.TempDir(), nil, nil) mock.ExpectQuery("SELECT name FROM workspaces WHERE id ="). WithArgs("ws-del-nf"). @@ -1133,7 +1133,7 @@ func TestResolveTemplateDir_ByNormalizedName(t *testing.T) { tmplDir := filepath.Join(tmpDir, "my-agent") os.MkdirAll(tmplDir, 0755) - handler := NewTemplatesHandler(tmpDir, nil) + handler := NewTemplatesHandler(tmpDir, nil, nil) result := handler.resolveTemplateDir("My Agent") if result != tmplDir { @@ -1143,7 +1143,7 @@ func TestResolveTemplateDir_ByNormalizedName(t *testing.T) { func TestResolveTemplateDir_NotFound(t *testing.T) { tmpDir := t.TempDir() - handler := NewTemplatesHandler(tmpDir, nil) + handler := NewTemplatesHandler(tmpDir, nil, nil) result := handler.resolveTemplateDir("Nonexistent Agent") if result != "" { @@ -1177,7 +1177,7 @@ func TestCWE78_DeleteFile_TraversalVariants(t *testing.T) { setupTestDB(t) setupTestRedis(t) - handler := NewTemplatesHandler(t.TempDir(), nil) + handler := NewTemplatesHandler(t.TempDir(), nil, nil) w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) diff --git a/workspace-server/internal/handlers/workspace.go b/workspace-server/internal/handlers/workspace.go index 3b5b4c02..cf210342 100644 --- a/workspace-server/internal/handlers/workspace.go +++ b/workspace-server/internal/handlers/workspace.go @@ -148,15 +148,15 @@ func (h *WorkspaceHandler) Create(c *gin.Context) { id := uuid.New().String() awarenessNamespace := workspaceAwarenessNamespace(id) if payload.Tier == 0 { - // Default to T3 ("Privileged"). T3 gives agents a read_write - // workspace mount + Docker daemon access — the level most - // templates need to do real work. Lower tiers (T1 sandboxed, - // T2 standard) stay available as explicit opt-ins for - // low-trust agents. Matches the Canvas CreateWorkspaceDialog - // default for self-hosted hosts (SaaS defaults to T4 via - // CreateWorkspaceDialog because each SaaS workspace runs on - // its own sibling EC2). - payload.Tier = 3 + // SaaS-aware default. SaaS → T4 (full host access; each + // workspace runs on its own sibling EC2 so the tier boundary + // is a Docker resource limit on the only container present — + // no neighbour to protect from). Self-hosted → T3 (read-write + // workspace mount + Docker daemon access, most templates' + // baseline). Lower tiers (T1 sandboxed, T2 standard) remain + // explicit opt-ins for low-trust agents. Matches the canvas + // CreateWorkspaceDialog defaults so the API and the UI agree. + payload.Tier = h.DefaultTier() } // Detect runtime + default model from template config.yaml when the diff --git a/workspace-server/internal/handlers/workspace_dispatchers.go b/workspace-server/internal/handlers/workspace_dispatchers.go index 23237d00..3df25877 100644 --- a/workspace-server/internal/handlers/workspace_dispatchers.go +++ b/workspace-server/internal/handlers/workspace_dispatchers.go @@ -35,6 +35,7 @@ import ( "time" "github.com/Molecule-AI/molecule-monorepo/platform/internal/models" + "github.com/Molecule-AI/molecule-monorepo/platform/internal/provlog" ) // HasProvisioner reports whether either backend (CP or local Docker) is @@ -49,6 +50,32 @@ func (h *WorkspaceHandler) HasProvisioner() bool { return h.cpProv != nil || h.provisioner != nil } +// IsSaaS reports whether the CP (EC2) provisioner is wired. Each SaaS +// workspace runs on its own sibling EC2, so the per-workspace tier +// boundary is a Docker resource limit applied to the only container +// on that EC2 — there's no neighbour to protect from. Self-hosted +// runs many workspaces in one Docker daemon on a single host, so +// the tier-2-by-default safe-neighbour-share posture stays. +// +// Tier defaults across Create / OrgImport / canvas EmptyState branch +// on IsSaaS so SaaS users get T4 (full host access) by default and +// self-hosted users keep the lower-trust caps. +func (h *WorkspaceHandler) IsSaaS() bool { + return h.cpProv != nil +} + +// DefaultTier is the SaaS-aware default tier. T4 on SaaS (single +// container per EC2 — full host access matches the boundary), T3 on +// self-hosted (read-write workspace mount + Docker daemon access, +// most templates' baseline). Callers default to this when the user +// hasn't explicitly picked a tier. +func (h *WorkspaceHandler) DefaultTier() int { + if h.IsSaaS() { + return 4 + } + return 3 +} + // provisionWorkspaceAuto picks the backend (CP for SaaS, local Docker // for self-hosted) and starts provisioning in a goroutine. Returns true // when a backend was kicked off, false when neither is wired. @@ -75,6 +102,14 @@ func (h *WorkspaceHandler) HasProvisioner() bool { // lives in prepareProvisionContext (shared by both per-backend // goroutines). func (h *WorkspaceHandler) provisionWorkspaceAuto(workspaceID, templatePath string, configFiles map[string][]byte, payload models.CreateWorkspacePayload) bool { + provlog.Event("provision.start", map[string]any{ + "workspace_id": workspaceID, + "name": payload.Name, + "tier": payload.Tier, + "runtime": payload.Runtime, + "template": payload.Template, + "sync": false, + }) if h.cpProv != nil { go h.provisionWorkspaceCP(workspaceID, templatePath, configFiles, payload) return true @@ -110,6 +145,14 @@ func (h *WorkspaceHandler) provisionWorkspaceAuto(workspaceID, templatePath stri // Keep these two helpers in sync — when one grows a new arm (third // backend, retry semantics), the other should too. func (h *WorkspaceHandler) provisionWorkspaceAutoSync(workspaceID, templatePath string, configFiles map[string][]byte, payload models.CreateWorkspacePayload) bool { + provlog.Event("provision.start", map[string]any{ + "workspace_id": workspaceID, + "name": payload.Name, + "tier": payload.Tier, + "runtime": payload.Runtime, + "template": payload.Template, + "sync": true, + }) if h.cpProv != nil { h.provisionWorkspaceCP(workspaceID, templatePath, configFiles, payload) return true diff --git a/workspace-server/internal/handlers/workspace_restart.go b/workspace-server/internal/handlers/workspace_restart.go index 3b3097c4..c5712be5 100644 --- a/workspace-server/internal/handlers/workspace_restart.go +++ b/workspace-server/internal/handlers/workspace_restart.go @@ -12,6 +12,7 @@ import ( "github.com/Molecule-AI/molecule-monorepo/platform/internal/db" "github.com/Molecule-AI/molecule-monorepo/platform/internal/models" + "github.com/Molecule-AI/molecule-monorepo/platform/internal/provlog" "github.com/gin-gonic/gin" ) @@ -431,6 +432,16 @@ func coalesceRestart(workspaceID string, cycle func()) { // NPE'd before reaching the reprovision step — which is why every SaaS dead- // agent incident pre-this-fix required manual restart from canvas. func (h *WorkspaceHandler) stopForRestart(ctx context.Context, workspaceID string) { + backend := "none" + if h.provisioner != nil { + backend = "docker" + } else if h.cpProv != nil { + backend = "cp" + } + provlog.Event("restart.pre_stop", map[string]any{ + "workspace_id": workspaceID, + "backend": backend, + }) if h.provisioner != nil { h.provisioner.Stop(ctx, workspaceID) return diff --git a/workspace-server/internal/handlers/workspaces_insert_allowlist_test.go b/workspace-server/internal/handlers/workspaces_insert_allowlist_test.go new file mode 100644 index 00000000..066c6576 --- /dev/null +++ b/workspace-server/internal/handlers/workspaces_insert_allowlist_test.go @@ -0,0 +1,159 @@ +package handlers + +import ( + "go/ast" + "go/parser" + "go/token" + "os" + "path/filepath" + "sort" + "strconv" + "strings" + "testing" +) + +// TestINSERTworkspacesAllowlist enumerates every function in this +// package that emits an `INSERT INTO workspaces (` SQL literal, and +// pins the result against an explicit allowlist. New entries fail the +// build until a reviewer adds them — forcing the question "what +// makes this INSERT idempotent?" at PR-review time, not after the +// next bulk-create leak. +// +// Pairs with TestCreateWorkspaceTree_CallsLookupBeforeInsert (the +// behavior pin for the one bulk path). Together they close the +// regression class: this test catches "did a new function start +// inserting workspaces?", that test catches "did the existing bulk +// path drop its idempotency check?". Either fires immediately when +// drift happens. +// +// Why allowlist rather than pure behavior gate (per memory +// feedback_behavior_based_ast_gates.md): the bulk-create leak class +// is small + stable (1 path today), and a behavior gate would have +// to disambiguate "iterating a YAML array of workspaces" from the +// many other `for ... range` patterns in a Create handler (config +// lines, secrets map, channels). Type-info-aware AST analysis would +// catch the YAML-iteration shape but is heavy. Allowlisting is the +// minimum-viable pin: any PR that adds a new INSERT site is forced +// to pause, add an entry here, and document the safety mechanism in +// the comment alongside. +// +// RFC #2867 class 1. +func TestINSERTworkspacesAllowlist(t *testing.T) { + // expected[key] = safety mechanism. Keep the comment pinned to + // what makes that function safe — if the safety changes, the + // allowlist must be re-reviewed. + expected := map[string]string{ + // org_import.createWorkspaceTree: lookupExistingChild + // before INSERT (#2868 phase 3). Also pinned by + // TestCreateWorkspaceTree_CallsLookupBeforeInsert. + "org_import.go:createWorkspaceTree": "lookup-then-insert via lookupExistingChild", + // registry.Register: external workspace registers itself with + // its known UUID; INSERT is idempotent via ON CONFLICT (id) + // DO UPDATE — re-registration upserts, never duplicates. + "registry.go:Register": "ON CONFLICT (id) DO UPDATE", + // workspace.Create: single-workspace POST /workspaces from a + // human or automation. No iteration; payload describes one + // workspace; UUID is server-generated. Caller intent IS to + // create, so no idempotency check is needed. + "workspace.go:Create": "single-workspace POST, server-generated UUID", + } + + actual := map[string]string{} + + wd, err := os.Getwd() + if err != nil { + t.Fatalf("getwd: %v", err) + } + + entries, err := os.ReadDir(wd) + if err != nil { + t.Fatalf("readdir %s: %v", wd, err) + } + for _, ent := range entries { + name := ent.Name() + if ent.IsDir() { + continue + } + if !strings.HasSuffix(name, ".go") { + continue + } + if strings.HasSuffix(name, "_test.go") { + continue + } + path := filepath.Join(wd, name) + fset := token.NewFileSet() + file, err := parser.ParseFile(fset, path, nil, parser.ParseComments) + if err != nil { + t.Fatalf("parse %s: %v", path, err) + } + // For each top-level FuncDecl, walk its body and check for an + // `INSERT INTO workspaces (` SQL literal in any CallExpr arg. + for _, decl := range file.Decls { + fn, ok := decl.(*ast.FuncDecl) + if !ok || fn.Body == nil { + continue + } + var foundInsert bool + ast.Inspect(fn.Body, func(n ast.Node) bool { + lit, ok := n.(*ast.BasicLit) + if !ok || lit.Kind != token.STRING { + return true + } + raw := lit.Value + if unq, err := strconv.Unquote(raw); err == nil { + raw = unq + } + if workspacesInsertRE.MatchString(raw) { + foundInsert = true + return false + } + return true + }) + if foundInsert { + key := name + ":" + fn.Name.Name + actual[key] = "(observed via AST walk)" + } + } + } + + // Compute set diffs so failures point at the specific drift. + missing := []string{} + unexpected := []string{} + for k := range expected { + if _, ok := actual[k]; !ok { + missing = append(missing, k) + } + } + for k := range actual { + if _, ok := expected[k]; !ok { + unexpected = append(unexpected, k) + } + } + sort.Strings(missing) + sort.Strings(unexpected) + + if len(unexpected) > 0 { + t.Errorf(`new function(s) emit `+"`INSERT INTO workspaces (`"+` and aren't in the allowlist: + %s + +If this is a legitimate addition, add an entry to expected[] in this test +with the safety mechanism pinned in the comment alongside (lookup-then- +insert / ON CONFLICT / single-workspace path / etc.). The bulk-create +regression class needs explicit per-handler review, not silent drift. + +Reference: RFC #2867 class 1, sibling test +TestCreateWorkspaceTree_CallsLookupBeforeInsert.`, + strings.Join(unexpected, "\n ")) + } + if len(missing) > 0 { + t.Errorf(`expected function(s) no longer emit `+"`INSERT INTO workspaces (`"+`: + %s + +Either the function was renamed/deleted (update the allowlist) or the +INSERT was moved out (verify the new home is also covered). Don't just +delete the entry — confirm the safety mechanism is still in place +elsewhere or that the workspace-create path was intentionally +restructured.`, + strings.Join(missing, "\n ")) + } +} diff --git a/workspace-server/internal/metrics/metrics.go b/workspace-server/internal/metrics/metrics.go index 7f0852a8..6632d524 100644 --- a/workspace-server/internal/metrics/metrics.go +++ b/workspace-server/internal/metrics/metrics.go @@ -5,14 +5,15 @@ // // Exposed metrics: // -// molecule_http_requests_total{method,path,status} - counter -// molecule_http_request_duration_seconds{method,path} - counter (sum, for avg rate) -// molecule_websocket_connections_active - gauge -// go_goroutines - gauge -// go_memstats_alloc_bytes - gauge -// go_memstats_sys_bytes - gauge -// go_memstats_heap_inuse_bytes - gauge -// go_gc_duration_seconds_total - counter +// molecule_http_requests_total{method,path,status} - counter +// molecule_http_request_duration_seconds{method,path} - counter (sum, for avg rate) +// molecule_websocket_connections_active - gauge +// molecule_pending_uploads_swept_total{outcome} - counter (acked|expired|error) +// go_goroutines - gauge +// go_memstats_alloc_bytes - gauge +// go_memstats_sys_bytes - gauge +// go_memstats_heap_inuse_bytes - gauge +// go_gc_duration_seconds_total - counter package metrics import ( @@ -38,6 +39,12 @@ var ( reqCounts = map[reqKey]int64{} // molecule_http_requests_total reqDurSums = map[reqKey]float64{} // sum of durations (seconds) activeWSConns int64 // molecule_websocket_connections_active + + // pendinguploads sweeper counters — atomic so the sweeper goroutine + // doesn't contend with the /metrics handler. + pendingUploadsSweptAcked int64 // molecule_pending_uploads_swept_total{outcome="acked"} + pendingUploadsSweptExpired int64 // molecule_pending_uploads_swept_total{outcome="expired"} + pendingUploadsSweepErrors int64 // molecule_pending_uploads_swept_total{outcome="error"} ) // Middleware records per-request counts and latency. @@ -76,6 +83,50 @@ func TrackWSConnect() { atomic.AddInt64(&activeWSConns, 1) } // Call from the WebSocket disconnect / cleanup path. func TrackWSDisconnect() { atomic.AddInt64(&activeWSConns, -1) } +// phantomBusyResets is the cumulative count of workspace rows the +// phantom-busy sweep reset (active_tasks=0 → active_tasks=0+counter +// cleared). Surfaced as molecule_phantom_busy_resets_total — a high +// reset rate signals a regression in task-lifecycle accounting (most +// often: missing env vars cause claude --print to time out, the +// agent loop never decrements active_tasks, and the sweep cleans up +// the counter ~10 min later). Issue #2865. +var phantomBusyResets int64 + +// TrackPhantomBusyReset increments the phantom-busy reset counter. +// Called from sweepPhantomBusy in workspace-server/internal/scheduler/ +// after each row whose active_tasks was reset to 0. Idempotent + +// goroutine-safe; called once per row per sweep tick. +func TrackPhantomBusyReset() { atomic.AddInt64(&phantomBusyResets, 1) } + +// PendingUploadsSwept records a successful sweep cycle. acked/expired +// are added to the per-outcome counters so dashboards can spot the +// stuck-fetch pattern (high expired, low acked) vs healthy churn. +func PendingUploadsSwept(acked, expired int) { + if acked > 0 { + atomic.AddInt64(&pendingUploadsSweptAcked, int64(acked)) + } + if expired > 0 { + atomic.AddInt64(&pendingUploadsSweptExpired, int64(expired)) + } +} + +// PendingUploadsSweepError records a sweeper-cycle failure (transient +// DB error etc). Counted separately so the rate of errored sweeps is +// observable independent of how many rows the successful sweeps deleted. +func PendingUploadsSweepError() { + atomic.AddInt64(&pendingUploadsSweepErrors, 1) +} + +// PendingUploadsSweepCounts returns the current (acked, expired, error) +// totals. Exposed for tests that need a deterministic delta probe of +// the sweeper's metric writes — the /metrics endpoint is the production +// observability surface; this is a unit-test escape hatch. +func PendingUploadsSweepCounts() (acked, expired, errored int64) { + return atomic.LoadInt64(&pendingUploadsSweptAcked), + atomic.LoadInt64(&pendingUploadsSweptExpired), + atomic.LoadInt64(&pendingUploadsSweepErrors) +} + // Handler returns a Gin handler that serialises all collected metrics in // Prometheus text exposition format (v0.0.4). Mount this at GET /metrics. func Handler() gin.HandlerFunc { @@ -144,6 +195,21 @@ func Handler() gin.HandlerFunc { writeln(w, "# HELP molecule_websocket_connections_active Number of active WebSocket connections.") writeln(w, "# TYPE molecule_websocket_connections_active gauge") fmt.Fprintf(w, "molecule_websocket_connections_active %d\n", atomic.LoadInt64(&activeWSConns)) + + // ── Molecule AI scheduler ────────────────────────────────────────────── + writeln(w, "# HELP molecule_phantom_busy_resets_total Cumulative count of workspace rows reset by the phantom-busy sweep (active_tasks cleared after >10 min of activity_log silence). High reset rate signals task-lifecycle accounting regressions — see issue #2865.") + writeln(w, "# TYPE molecule_phantom_busy_resets_total counter") + fmt.Fprintf(w, "molecule_phantom_busy_resets_total %d\n", atomic.LoadInt64(&phantomBusyResets)) + + // ── Pending-uploads sweeper ──────────────────────────────────────────── + writeln(w, "# HELP molecule_pending_uploads_swept_total Pending-uploads rows deleted by the GC sweeper, by outcome.") + writeln(w, "# TYPE molecule_pending_uploads_swept_total counter") + fmt.Fprintf(w, "molecule_pending_uploads_swept_total{outcome=\"acked\"} %d\n", + atomic.LoadInt64(&pendingUploadsSweptAcked)) + fmt.Fprintf(w, "molecule_pending_uploads_swept_total{outcome=\"expired\"} %d\n", + atomic.LoadInt64(&pendingUploadsSweptExpired)) + fmt.Fprintf(w, "molecule_pending_uploads_swept_total{outcome=\"error\"} %d\n", + atomic.LoadInt64(&pendingUploadsSweepErrors)) } } diff --git a/workspace-server/internal/metrics/metrics_test.go b/workspace-server/internal/metrics/metrics_test.go new file mode 100644 index 00000000..d722a1bd --- /dev/null +++ b/workspace-server/internal/metrics/metrics_test.go @@ -0,0 +1,104 @@ +package metrics + +// Tests for the phantom-busy reset counter wired up by issue #2865. +// The counter is exposed at /metrics as +// molecule_phantom_busy_resets_total. A high steady-state value +// signals task-lifecycle accounting regressions in the agent loop — +// see scheduler.sweepPhantomBusy for the writer. + +import ( + "net/http/httptest" + "strings" + "sync" + "sync/atomic" + "testing" + + "github.com/gin-gonic/gin" +) + +// resetForTest zeroes the counter so a single test's TrackPhantomBusyReset +// calls don't compound onto a previous test's run. metrics.go's package- +// level state means every test that touches the counter must reset. +func resetForTest() { + atomic.StoreInt64(&phantomBusyResets, 0) +} + +func TestTrackPhantomBusyReset_IncrementsCounter(t *testing.T) { + resetForTest() + for i := 0; i < 7; i++ { + TrackPhantomBusyReset() + } + got := atomic.LoadInt64(&phantomBusyResets) + if got != 7 { + t.Errorf("counter after 7 calls = %d, want 7", got) + } +} + +func TestTrackPhantomBusyReset_RaceFreeUnderConcurrentWrites(t *testing.T) { + resetForTest() + var wg sync.WaitGroup + const goroutines = 50 + const callsPerGoroutine = 200 + wg.Add(goroutines) + for i := 0; i < goroutines; i++ { + go func() { + defer wg.Done() + for j := 0; j < callsPerGoroutine; j++ { + TrackPhantomBusyReset() + } + }() + } + wg.Wait() + want := int64(goroutines * callsPerGoroutine) + got := atomic.LoadInt64(&phantomBusyResets) + if got != want { + t.Errorf("counter under concurrent writes = %d, want %d (lost increments → atomic broken)", + got, want) + } +} + +func TestHandler_ExposesPhantomBusyResetsCounter(t *testing.T) { + resetForTest() + for i := 0; i < 3; i++ { + TrackPhantomBusyReset() + } + + gin.SetMode(gin.TestMode) + r := gin.New() + r.GET("/metrics", Handler()) + + w := httptest.NewRecorder() + req := httptest.NewRequest("GET", "/metrics", nil) + r.ServeHTTP(w, req) + + body := w.Body.String() + // HELP + TYPE lines must precede the metric (Prometheus text exposition format). + if !strings.Contains(body, "# HELP molecule_phantom_busy_resets_total") { + t.Errorf("metrics output missing HELP line for molecule_phantom_busy_resets_total:\n%s", body) + } + if !strings.Contains(body, "# TYPE molecule_phantom_busy_resets_total counter") { + t.Errorf("metrics output missing TYPE line for molecule_phantom_busy_resets_total:\n%s", body) + } + if !strings.Contains(body, "molecule_phantom_busy_resets_total 3\n") { + t.Errorf("metrics output missing counter value 3:\n%s", body) + } +} + +func TestHandler_PhantomBusyResetsZeroByDefault(t *testing.T) { + // Fresh process should report 0 — pin the contract so a future + // refactor that lazy-inits the counter to nil doesn't silently + // drop the metric from /metrics. + resetForTest() + + gin.SetMode(gin.TestMode) + r := gin.New() + r.GET("/metrics", Handler()) + + w := httptest.NewRecorder() + req := httptest.NewRequest("GET", "/metrics", nil) + r.ServeHTTP(w, req) + + if !strings.Contains(w.Body.String(), "molecule_phantom_busy_resets_total 0\n") { + t.Errorf("metric must report 0 by default:\n%s", w.Body.String()) + } +} diff --git a/workspace-server/internal/pendinguploads/export_test.go b/workspace-server/internal/pendinguploads/export_test.go new file mode 100644 index 00000000..c758b629 --- /dev/null +++ b/workspace-server/internal/pendinguploads/export_test.go @@ -0,0 +1,17 @@ +package pendinguploads + +import ( + "context" + "time" +) + +// StartSweeperWithIntervalForTest exposes startSweeperWithInterval to +// the external test package. The production code uses StartSweeper +// (which pins the canonical SweepInterval); tests pin a short interval +// to exercise the ticker-driven cycle without burning real wall-clock +// time. The Go convention `export_test.go` keeps this seam OUT of the +// production binary — files ending in _test.go are stripped at build +// time, so this re-export only exists during `go test`. +func StartSweeperWithIntervalForTest(ctx context.Context, storage Storage, ackRetention, interval time.Duration) { + startSweeperWithInterval(ctx, storage, ackRetention, interval) +} diff --git a/workspace-server/internal/pendinguploads/storage.go b/workspace-server/internal/pendinguploads/storage.go index 0289c9b8..c4bcaf92 100644 --- a/workspace-server/internal/pendinguploads/storage.go +++ b/workspace-server/internal/pendinguploads/storage.go @@ -72,6 +72,28 @@ type Record struct { ExpiresAt time.Time } +// SweepResult is the per-cycle accounting from Sweep. Both counts are +// non-negative; Total is just Acked + Expired for log/metrics +// convenience. Phase 3 metrics expose these as separate counters so +// dashboards can spot a stuck-ack pattern (high Expired, low Acked) vs. +// healthy churn (Acked dominates). +type SweepResult struct { + Acked int // rows deleted because acked_at + retention elapsed + Expired int // rows deleted because expires_at < now AND never acked +} + +// Total returns the sum of Acked + Expired — convenient for log lines. +func (r SweepResult) Total() int { return r.Acked + r.Expired } + +// PutItem is one file in a PutBatch call. Same per-field rules as Put — +// empty content, missing filename, or content > MaxFileBytes is rejected +// up-front so a bad item in the batch doesn't poison the transaction. +type PutItem struct { + Content []byte + Filename string + Mimetype string +} + // Storage is the platform-side persistence boundary for poll-mode chat // uploads. The Postgres implementation backs all callers today; an S3- // backed implementation can drop in once RFC #2789 lands by making @@ -86,6 +108,17 @@ type Storage interface { // content > MaxFileBytes return errors before any DB write. Put(ctx context.Context, workspaceID uuid.UUID, content []byte, filename, mimetype string) (uuid.UUID, error) + // PutBatch inserts N uploads atomically — either all rows commit or + // none do. Returns assigned file_ids in input order on success; + // returns an error and does NOT insert any row on failure. + // + // Use this from multi-file upload handlers so a per-row failure on + // row K doesn't leave rows 1..K-1 orphaned in the table (a client + // retry would then double-insert them on success). All-or-nothing + // semantics match the multipart request the canvas sends — either + // the whole batch succeeds or the user re-uploads. + PutBatch(ctx context.Context, workspaceID uuid.UUID, items []PutItem) ([]uuid.UUID, error) + // Get returns the full row including content. Returns ErrNotFound // when the row is absent, acked, or past expires_at. Caller should // not differentiate the three cases in the response — from the @@ -103,6 +136,18 @@ type Storage interface { // absent or already expired; on already-acked, returns nil so // the workspace's at-least-once retry succeeds without an error. Ack(ctx context.Context, fileID uuid.UUID) error + + // Sweep deletes rows past their retention window: + // - acked rows older than ackRetention (give the workspace a + // window to re-fetch in case it processed but failed to write + // the file before crashing — at-least-once behavior). + // - unacked rows past expires_at (the platform's hard TTL — 24h + // by default; a workspace that hasn't fetched by then is + // considered dead from the upload's perspective). + // Returns the per-category deletion counts for observability. + // Errors are surfaced to the caller; a transient DB error must NOT + // crash the sweeper loop (it just retries on the next tick). + Sweep(ctx context.Context, ackRetention time.Duration) (SweepResult, error) } // PostgresStorage is the production Storage implementation backed by @@ -149,6 +194,64 @@ func (p *PostgresStorage) Put(ctx context.Context, workspaceID uuid.UUID, conten return fileID, nil } +// PutBatch inserts every item atomically inside a single Tx. On any +// per-item validation or per-row INSERT error the Tx is rolled back and +// the caller sees the error without any rows committed — no partial +// orphans for a multi-file upload that fails mid-batch. +// +// Validation runs BEFORE BEGIN so a bad input shape (empty content, +// over-cap size) doesn't even open a Tx. Once we're in the Tx, the only +// failures expected are DB-side (broken connection, statement timeout) +// — those abort cleanly via Rollback. +func (p *PostgresStorage) PutBatch(ctx context.Context, workspaceID uuid.UUID, items []PutItem) ([]uuid.UUID, error) { + if len(items) == 0 { + return nil, nil + } + for i, it := range items { + if len(it.Content) == 0 { + return nil, fmt.Errorf("pendinguploads: item %d: empty content", i) + } + if len(it.Content) > MaxFileBytes { + return nil, ErrTooLarge + } + if it.Filename == "" { + return nil, fmt.Errorf("pendinguploads: item %d: empty filename", i) + } + if len(it.Filename) > 100 { + return nil, fmt.Errorf("pendinguploads: item %d: filename exceeds 100 chars", i) + } + } + + tx, err := p.db.BeginTx(ctx, nil) + if err != nil { + return nil, fmt.Errorf("pendinguploads: begin tx: %w", err) + } + // Defer-rollback is safe even after a successful Commit — the second + // Rollback is a no-op (database/sql tracks tx state). + defer func() { + _ = tx.Rollback() + }() + + out := make([]uuid.UUID, 0, len(items)) + for i, it := range items { + var fid uuid.UUID + err := tx.QueryRowContext(ctx, ` + INSERT INTO pending_uploads (workspace_id, content, size_bytes, filename, mimetype) + VALUES ($1, $2, $3, $4, $5) + RETURNING file_id + `, workspaceID, it.Content, int64(len(it.Content)), it.Filename, it.Mimetype).Scan(&fid) + if err != nil { + return nil, fmt.Errorf("pendinguploads: batch insert item %d: %w", i, err) + } + out = append(out, fid) + } + + if err := tx.Commit(); err != nil { + return nil, fmt.Errorf("pendinguploads: commit batch: %w", err) + } + return out, nil +} + func (p *PostgresStorage) Get(ctx context.Context, fileID uuid.UUID) (Record, error) { // The expires_at + acked_at filter in the WHERE clause means a // caller sees ErrNotFound for absent / acked / expired without @@ -251,3 +354,41 @@ func (p *PostgresStorage) Ack(ctx context.Context, fileID uuid.UUID) error { // the workspace's intent ("I'm done with this file") was honored. return nil } + +// Sweep deletes acked rows past their retention window plus any +// unacked rows whose hard TTL has elapsed. Single round-trip: a CTE +// captures the deletion in one DELETE … RETURNING and the outer +// SELECT sums by category. Cheaper and tighter than two round trips, +// and atomic w.r.t. concurrent writes (the WHERE predicate sees a +// consistent snapshot via Postgres MVCC). +// +// ackRetention=0 deletes all acked rows immediately; values <0 are +// clamped to 0 for safety. Caller defaults are documented at +// StartSweeper's DefaultAckRetention. +func (p *PostgresStorage) Sweep(ctx context.Context, ackRetention time.Duration) (SweepResult, error) { + if ackRetention < 0 { + ackRetention = 0 + } + // make_interval expects integer seconds — Postgres accepts a + // floating point but we deliberately round to the nearest second + // so test fixtures pin a deterministic value across PG versions. + retentionSecs := int64(ackRetention.Seconds()) + + var acked, expired int + err := p.db.QueryRowContext(ctx, ` + WITH deleted AS ( + DELETE FROM pending_uploads + WHERE (acked_at IS NOT NULL AND acked_at < now() - make_interval(secs => $1)) + OR (acked_at IS NULL AND expires_at < now()) + RETURNING (acked_at IS NOT NULL) AS was_acked + ) + SELECT + COALESCE(SUM(CASE WHEN was_acked THEN 1 ELSE 0 END), 0)::int AS acked, + COALESCE(SUM(CASE WHEN NOT was_acked THEN 1 ELSE 0 END), 0)::int AS expired + FROM deleted + `, retentionSecs).Scan(&acked, &expired) + if err != nil { + return SweepResult{}, fmt.Errorf("pendinguploads: sweep: %w", err) + } + return SweepResult{Acked: acked, Expired: expired}, nil +} diff --git a/workspace-server/internal/pendinguploads/storage_test.go b/workspace-server/internal/pendinguploads/storage_test.go index 45f797c7..c6793c10 100644 --- a/workspace-server/internal/pendinguploads/storage_test.go +++ b/workspace-server/internal/pendinguploads/storage_test.go @@ -71,6 +71,18 @@ const ( SELECT acked_at FROM pending_uploads WHERE file_id = $1 AND expires_at > now() ` + sweepSQL = ` + WITH deleted AS ( + DELETE FROM pending_uploads + WHERE (acked_at IS NOT NULL AND acked_at < now() - make_interval(secs => $1)) + OR (acked_at IS NULL AND expires_at < now()) + RETURNING (acked_at IS NOT NULL) AS was_acked + ) + SELECT + COALESCE(SUM(CASE WHEN was_acked THEN 1 ELSE 0 END), 0)::int AS acked, + COALESCE(SUM(CASE WHEN NOT was_acked THEN 1 ELSE 0 END), 0)::int AS expired + FROM deleted + ` ) // ----- Put ------------------------------------------------------------------ @@ -398,3 +410,324 @@ func TestAck_DBErrorOnDisambiguate_Wrapped(t *testing.T) { t.Fatalf("expected wrapped disambiguate error, got %v", err) } } + +// ----- Sweep ---------------------------------------------------------------- + +func TestSweep_DeletesAckedAndExpired_ReturnsCounts(t *testing.T) { + db, mock := newMockDB(t) + store := pendinguploads.NewPostgres(db) + + mock.ExpectQuery(sweepSQL). + WithArgs(int64(3600)). // 1h retention + WillReturnRows(sqlmock.NewRows([]string{"acked", "expired"}).AddRow(7, 2)) + + res, err := store.Sweep(context.Background(), time.Hour) + if err != nil { + t.Fatalf("Sweep: %v", err) + } + if res.Acked != 7 || res.Expired != 2 || res.Total() != 9 { + t.Errorf("got %+v want acked=7 expired=2 total=9", res) + } +} + +func TestSweep_NothingToDelete_ReturnsZero(t *testing.T) { + db, mock := newMockDB(t) + store := pendinguploads.NewPostgres(db) + + mock.ExpectQuery(sweepSQL). + WithArgs(int64(3600)). + WillReturnRows(sqlmock.NewRows([]string{"acked", "expired"}).AddRow(0, 0)) + + res, err := store.Sweep(context.Background(), time.Hour) + if err != nil { + t.Fatalf("Sweep: %v", err) + } + if res.Total() != 0 { + t.Errorf("got %+v, want zero result", res) + } +} + +func TestSweep_NegativeRetentionClampedToZero(t *testing.T) { + db, mock := newMockDB(t) + store := pendinguploads.NewPostgres(db) + + // Negative retention must clamp to 0; the SQL gets `secs => 0` so an + // acked-just-now row is eligible for deletion immediately. Pinned + // here because passing the raw negative through `make_interval` would + // silently shift acked_at → future and effectively retain rows + // forever — exactly the wrong behavior for a "delete more aggressively" + // caller. + mock.ExpectQuery(sweepSQL). + WithArgs(int64(0)). + WillReturnRows(sqlmock.NewRows([]string{"acked", "expired"}).AddRow(3, 0)) + + res, err := store.Sweep(context.Background(), -1*time.Second) + if err != nil { + t.Fatalf("Sweep: %v", err) + } + if res.Acked != 3 { + t.Errorf("got %+v want acked=3", res) + } +} + +func TestSweep_ZeroRetentionImmediatelyDeletesAcked(t *testing.T) { + db, mock := newMockDB(t) + store := pendinguploads.NewPostgres(db) + + mock.ExpectQuery(sweepSQL). + WithArgs(int64(0)). + WillReturnRows(sqlmock.NewRows([]string{"acked", "expired"}).AddRow(5, 1)) + + res, err := store.Sweep(context.Background(), 0) + if err != nil { + t.Fatalf("Sweep: %v", err) + } + if res.Acked != 5 || res.Expired != 1 { + t.Errorf("got %+v want acked=5 expired=1", res) + } +} + +func TestSweep_DBError_Wrapped(t *testing.T) { + db, mock := newMockDB(t) + store := pendinguploads.NewPostgres(db) + + mock.ExpectQuery(sweepSQL). + WithArgs(int64(60)). + WillReturnError(errors.New("connection lost")) + + _, err := store.Sweep(context.Background(), time.Minute) + if err == nil || !strings.Contains(err.Error(), "sweep") { + t.Fatalf("expected wrapped sweep error, got %v", err) + } +} + +func TestSweepResult_TotalSumsCounts(t *testing.T) { + r := pendinguploads.SweepResult{Acked: 4, Expired: 3} + if r.Total() != 7 { + t.Errorf("Total = %d, want 7", r.Total()) + } + z := pendinguploads.SweepResult{} + if z.Total() != 0 { + t.Errorf("zero Total = %d, want 0", z.Total()) + } +} + +// ----- PutBatch ------------------------------------------------------------- +// +// PutBatch is the multi-file atomic insert path used by uploadPollMode in +// chat_files.go. The contract that callers rely on: +// +// - Either ALL rows commit, or NONE do — a per-row INSERT failure must +// leave the table unchanged (no orphaned rows from a half-applied batch). +// - Per-item validation runs BEFORE the Tx opens so a bad input shape +// never wastes a BEGIN round-trip. +// - Returned []uuid.UUID is in input order — handler maps response back +// to the multipart Files[i]. +// +// sqlmock's ExpectBegin / ExpectQuery / ExpectCommit / ExpectRollback let us +// pin the exact tx-lifecycle shape; if a future refactor swaps Begin for +// BeginTx-with-options, the test fails until we re-pin. + +func TestPutBatch_HappyPath_AllCommitInOrder(t *testing.T) { + db, mock := newMockDB(t) + store := pendinguploads.NewPostgres(db) + + wsID := uuid.New() + id1, id2, id3 := uuid.New(), uuid.New(), uuid.New() + + mock.ExpectBegin() + mock.ExpectQuery(insertSQL). + WithArgs(wsID, []byte("aaa"), int64(3), "a.txt", "text/plain"). + WillReturnRows(sqlmock.NewRows([]string{"file_id"}).AddRow(id1)) + mock.ExpectQuery(insertSQL). + WithArgs(wsID, []byte("bbbb"), int64(4), "b.bin", "application/octet-stream"). + WillReturnRows(sqlmock.NewRows([]string{"file_id"}).AddRow(id2)) + mock.ExpectQuery(insertSQL). + WithArgs(wsID, []byte("ccccc"), int64(5), "c.pdf", "application/pdf"). + WillReturnRows(sqlmock.NewRows([]string{"file_id"}).AddRow(id3)) + mock.ExpectCommit() + // Rollback after Commit is a no-op in database/sql; sqlmock allows it + // when ExpectCommit was already matched, so we don't need to expect it. + + got, err := store.PutBatch(context.Background(), wsID, []pendinguploads.PutItem{ + {Content: []byte("aaa"), Filename: "a.txt", Mimetype: "text/plain"}, + {Content: []byte("bbbb"), Filename: "b.bin", Mimetype: "application/octet-stream"}, + {Content: []byte("ccccc"), Filename: "c.pdf", Mimetype: "application/pdf"}, + }) + if err != nil { + t.Fatalf("PutBatch: %v", err) + } + if len(got) != 3 || got[0] != id1 || got[1] != id2 || got[2] != id3 { + t.Errorf("ids out of order or missing: got %v want [%s %s %s]", got, id1, id2, id3) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("expectations: %v", err) + } +} + +func TestPutBatch_EmptyItems_NoTxNoError(t *testing.T) { + db, _ := newMockDB(t) // zero expectations — must NOT round-trip + store := pendinguploads.NewPostgres(db) + + got, err := store.PutBatch(context.Background(), uuid.New(), nil) + if err != nil { + t.Fatalf("expected nil error on empty batch, got %v", err) + } + if got != nil { + t.Errorf("expected nil ids on empty batch, got %v", got) + } +} + +func TestPutBatch_RejectsEmptyContent_NoTx(t *testing.T) { + db, _ := newMockDB(t) + store := pendinguploads.NewPostgres(db) + + _, err := store.PutBatch(context.Background(), uuid.New(), []pendinguploads.PutItem{ + {Content: []byte("ok"), Filename: "a.txt"}, + {Content: nil, Filename: "b.txt"}, + }) + if err == nil || !strings.Contains(err.Error(), "item 1") || !strings.Contains(err.Error(), "empty content") { + t.Fatalf("expected item-1 empty-content error, got %v", err) + } +} + +func TestPutBatch_RejectsOversize_ReturnsErrTooLarge(t *testing.T) { + db, _ := newMockDB(t) + store := pendinguploads.NewPostgres(db) + + too := make([]byte, pendinguploads.MaxFileBytes+1) + _, err := store.PutBatch(context.Background(), uuid.New(), []pendinguploads.PutItem{ + {Content: []byte("ok"), Filename: "small.txt"}, + {Content: too, Filename: "huge.bin"}, + }) + if !errors.Is(err, pendinguploads.ErrTooLarge) { + t.Fatalf("expected ErrTooLarge, got %v", err) + } +} + +func TestPutBatch_RejectsEmptyFilename_NoTx(t *testing.T) { + db, _ := newMockDB(t) + store := pendinguploads.NewPostgres(db) + + _, err := store.PutBatch(context.Background(), uuid.New(), []pendinguploads.PutItem{ + {Content: []byte("hi"), Filename: ""}, + }) + if err == nil || !strings.Contains(err.Error(), "item 0") || !strings.Contains(err.Error(), "empty filename") { + t.Fatalf("expected item-0 empty-filename error, got %v", err) + } +} + +func TestPutBatch_RejectsLongFilename_NoTx(t *testing.T) { + db, _ := newMockDB(t) + store := pendinguploads.NewPostgres(db) + + long := strings.Repeat("z", 101) + _, err := store.PutBatch(context.Background(), uuid.New(), []pendinguploads.PutItem{ + {Content: []byte("hi"), Filename: "ok.txt"}, + {Content: []byte("hi"), Filename: long}, + }) + if err == nil || !strings.Contains(err.Error(), "item 1") || !strings.Contains(err.Error(), "exceeds 100 chars") { + t.Fatalf("expected item-1 too-long-filename error, got %v", err) + } +} + +func TestPutBatch_BeginTxError_Wrapped(t *testing.T) { + db, mock := newMockDB(t) + store := pendinguploads.NewPostgres(db) + + mock.ExpectBegin().WillReturnError(errors.New("conn refused")) + + _, err := store.PutBatch(context.Background(), uuid.New(), []pendinguploads.PutItem{ + {Content: []byte("hi"), Filename: "a.txt"}, + }) + if err == nil || !strings.Contains(err.Error(), "begin tx") { + t.Fatalf("expected wrapped begin-tx error, got %v", err) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("expectations: %v", err) + } +} + +func TestPutBatch_RollsBackOnPerRowError_NoCommit(t *testing.T) { + // First INSERT succeeds, second errors. PutBatch MUST NOT issue + // Commit; the deferred Rollback unwinds row 1 so neither row commits. + // This is the contract that prevents orphan rows on a failed batch. + db, mock := newMockDB(t) + store := pendinguploads.NewPostgres(db) + + wsID := uuid.New() + id1 := uuid.New() + + mock.ExpectBegin() + mock.ExpectQuery(insertSQL). + WithArgs(wsID, []byte("aaa"), int64(3), "a.txt", ""). + WillReturnRows(sqlmock.NewRows([]string{"file_id"}).AddRow(id1)) + mock.ExpectQuery(insertSQL). + WithArgs(wsID, []byte("bb"), int64(2), "b.txt", ""). + WillReturnError(errors.New("statement timeout")) + // Critical: Rollback expected, NOT Commit. If a future refactor + // accidentally swallows the per-row error and Commits anyway, this + // test fails because the unmet ExpectCommit-vs-Rollback shape diverges. + mock.ExpectRollback() + + _, err := store.PutBatch(context.Background(), wsID, []pendinguploads.PutItem{ + {Content: []byte("aaa"), Filename: "a.txt"}, + {Content: []byte("bb"), Filename: "b.txt"}, + }) + if err == nil || !strings.Contains(err.Error(), "batch insert item 1") { + t.Fatalf("expected wrapped per-row insert error, got %v", err) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("expectations (must rollback, no commit): %v", err) + } +} + +func TestPutBatch_RollsBackOnFirstRowError(t *testing.T) { + // Edge case: very first INSERT fails. No rows ever staged — but the + // Tx still needs to roll back to release the snapshot. + db, mock := newMockDB(t) + store := pendinguploads.NewPostgres(db) + + wsID := uuid.New() + mock.ExpectBegin() + mock.ExpectQuery(insertSQL). + WithArgs(wsID, []byte("oops"), int64(4), "a.txt", ""). + WillReturnError(errors.New("constraint violation")) + mock.ExpectRollback() + + _, err := store.PutBatch(context.Background(), wsID, []pendinguploads.PutItem{ + {Content: []byte("oops"), Filename: "a.txt"}, + }) + if err == nil || !strings.Contains(err.Error(), "batch insert item 0") { + t.Fatalf("expected wrapped item-0 insert error, got %v", err) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("expectations: %v", err) + } +} + +func TestPutBatch_CommitError_Wrapped(t *testing.T) { + // Commit fails after every INSERT succeeded. Postgres has already + // rolled back the Tx by this point; we surface the error so the + // handler returns 500 and the client retries. + db, mock := newMockDB(t) + store := pendinguploads.NewPostgres(db) + + wsID := uuid.New() + id1 := uuid.New() + mock.ExpectBegin() + mock.ExpectQuery(insertSQL). + WithArgs(wsID, []byte("hi"), int64(2), "a.txt", ""). + WillReturnRows(sqlmock.NewRows([]string{"file_id"}).AddRow(id1)) + mock.ExpectCommit().WillReturnError(errors.New("commit broken")) + + _, err := store.PutBatch(context.Background(), wsID, []pendinguploads.PutItem{ + {Content: []byte("hi"), Filename: "a.txt"}, + }) + if err == nil || !strings.Contains(err.Error(), "commit batch") { + t.Fatalf("expected wrapped commit error, got %v", err) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("expectations: %v", err) + } +} diff --git a/workspace-server/internal/pendinguploads/sweeper.go b/workspace-server/internal/pendinguploads/sweeper.go new file mode 100644 index 00000000..b29a87ad --- /dev/null +++ b/workspace-server/internal/pendinguploads/sweeper.go @@ -0,0 +1,129 @@ +// sweeper.go — periodic GC for the pending_uploads table. +// +// The platform's poll-mode chat-upload handler creates a row in +// pending_uploads for every chat-attached file the canvas sends to a +// poll-mode workspace. The workspace's inbox poller fetches the bytes +// and acks the row, but two failure modes leak rows long-term: +// +// 1. Workspace fetches but never acks (network hiccup between GET +// /content and POST /ack; workspace crashed between the two). +// Phase 1's Get refuses to re-serve an acked row, but a never- +// acked row could in principle be fetched repeatedly until expires_at. +// Phase 2's workspace-side fetcher is idempotent; the worry is +// only disk usage on the platform side. +// +// 2. Workspace never fetches at all (workspace was offline when the +// row was written; the upload's TTL elapsed). +// +// This sweeper handles both. It runs every SweepInterval, deletes rows +// in either category, and emits structured logs + Prometheus counters +// so a stuck-fetch dashboard can spot the leak class. +// +// Failure isolation: a transient DB error must NOT crash the sweeper. +// We log + continue; the next tick retries. ctx cancellation cleanly +// shuts the loop down for graceful shutdown. + +package pendinguploads + +import ( + "context" + "log" + "time" + + "github.com/Molecule-AI/molecule-monorepo/platform/internal/metrics" +) + +// SweepInterval is the cadence of the GC loop. 5 minutes is a balance +// between "rows reaped quickly enough that disk usage doesn't surprise +// anyone" and "we don't pay a DELETE round-trip every 30 seconds when +// there are no candidates." Aligned with other low-priority sweepers +// (registry/orphan_sweeper runs at 60s but operates on Docker — much +// more expensive per cycle than a single indexed DELETE). +const SweepInterval = 5 * time.Minute + +// DefaultAckRetention is how long an acked row sticks around before the +// sweeper deletes it. 1 hour gives the workspace enough time to retry +// the GET if its first fetch crashed mid-write — at-least-once handoff +// without leaking content for a full 24h after the workspace already +// has a copy. +const DefaultAckRetention = 1 * time.Hour + +// sweepDeadline bounds a single sweep cycle. A daemon at the edge of +// timeout shouldn't pile up goroutines; 30s is generous for a single +// indexed DELETE on a table that should rarely have more than a few +// thousand rows in flight. +const sweepDeadline = 30 * time.Second + +// StartSweeper runs the GC loop until ctx is cancelled. nil storage +// makes the loop a no-op (matches the handlers' tolerance for an +// unconfigured pendinguploads — some test harnesses run without the +// storage wired). +// +// Pass ackRetention=0 to use DefaultAckRetention. Negative values are +// clamped at the storage layer. +// +// Production callers use SweepInterval (5m). Tests use a short interval +// to exercise the ticker-driven sweep path without burning real wall- +// clock time. +func StartSweeper(ctx context.Context, storage Storage, ackRetention time.Duration) { + startSweeperWithInterval(ctx, storage, ackRetention, SweepInterval) +} + +// startSweeperWithInterval is the test-friendly variant of StartSweeper +// — same loop, but the cadence is caller-specified. Production code +// should use StartSweeper to keep the SweepInterval constant pinned. +func startSweeperWithInterval(ctx context.Context, storage Storage, ackRetention, interval time.Duration) { + if storage == nil { + log.Println("pendinguploads sweeper: storage is nil — sweeper disabled") + return + } + if ackRetention == 0 { + ackRetention = DefaultAckRetention + } + log.Printf( + "pendinguploads sweeper started — sweeping every %s; ack retention %s", + interval, ackRetention, + ) + ticker := time.NewTicker(interval) + defer ticker.Stop() + // Run once immediately so a platform restart cleans up any rows + // that became eligible while we were down — don't make the + // operator wait 5 minutes for the first sweep. + sweepOnce(ctx, storage, ackRetention) + for { + select { + case <-ctx.Done(): + log.Println("pendinguploads sweeper: shutdown") + return + case <-ticker.C: + sweepOnce(ctx, storage, ackRetention) + } + } +} + +func sweepOnce(parent context.Context, storage Storage, ackRetention time.Duration) { + ctx, cancel := context.WithTimeout(parent, sweepDeadline) + defer cancel() + + res, err := storage.Sweep(ctx, ackRetention) + if err != nil { + // Transient errors: log + continue. The next tick retries; if + // the DB is genuinely down, the rest of the platform is also + // broken and disk usage is the least of the operator's + // problems. + log.Printf("pendinguploads sweeper: Sweep failed: %v", err) + metrics.PendingUploadsSweepError() + return + } + metrics.PendingUploadsSwept(res.Acked, res.Expired) + if res.Total() > 0 { + // Per-cycle structured-ish log (one line per cycle that did + // something). Quiet by design — most cycles delete zero rows + // on a healthy system, and a stream of empty-result lines + // would drown the production log without surfacing a signal. + log.Printf( + "pendinguploads sweeper: deleted acked=%d expired=%d total=%d", + res.Acked, res.Expired, res.Total(), + ) + } +} diff --git a/workspace-server/internal/pendinguploads/sweeper_test.go b/workspace-server/internal/pendinguploads/sweeper_test.go new file mode 100644 index 00000000..fb0c5aa0 --- /dev/null +++ b/workspace-server/internal/pendinguploads/sweeper_test.go @@ -0,0 +1,294 @@ +package pendinguploads_test + +import ( + "context" + "errors" + "sync/atomic" + "testing" + "time" + + "github.com/google/uuid" + + "github.com/Molecule-AI/molecule-monorepo/platform/internal/metrics" + "github.com/Molecule-AI/molecule-monorepo/platform/internal/pendinguploads" +) + +// fakeSweepStorage is a minimal Storage that records every Sweep call +// and lets each test inject the per-cycle return values. The other +// methods are no-ops — the sweeper goroutine never calls them. +type fakeSweepStorage struct { + calls atomic.Int64 + results []pendinguploads.SweepResult + errs []error + cycleDone chan struct{} // closed after each Sweep call (test sync) + gotRetention atomic.Int64 // last ackRetention seen, in seconds +} + +func newFakeSweepStorage(results []pendinguploads.SweepResult, errs []error) *fakeSweepStorage { + return &fakeSweepStorage{ + results: results, + errs: errs, + cycleDone: make(chan struct{}, 16), + } +} + +func (f *fakeSweepStorage) Put(_ context.Context, _ uuid.UUID, _ []byte, _, _ string) (uuid.UUID, error) { + return uuid.Nil, errors.New("not used") +} +func (f *fakeSweepStorage) Get(_ context.Context, _ uuid.UUID) (pendinguploads.Record, error) { + return pendinguploads.Record{}, errors.New("not used") +} +func (f *fakeSweepStorage) MarkFetched(_ context.Context, _ uuid.UUID) error { + return errors.New("not used") +} +func (f *fakeSweepStorage) Ack(_ context.Context, _ uuid.UUID) error { + return errors.New("not used") +} +func (f *fakeSweepStorage) PutBatch(_ context.Context, _ uuid.UUID, _ []pendinguploads.PutItem) ([]uuid.UUID, error) { + return nil, errors.New("not used") +} +func (f *fakeSweepStorage) Sweep(_ context.Context, ackRetention time.Duration) (pendinguploads.SweepResult, error) { + idx := int(f.calls.Load()) + f.calls.Add(1) + f.gotRetention.Store(int64(ackRetention.Seconds())) + defer func() { + select { + case f.cycleDone <- struct{}{}: + default: + } + }() + if idx < len(f.errs) && f.errs[idx] != nil { + return pendinguploads.SweepResult{}, f.errs[idx] + } + if idx < len(f.results) { + return f.results[idx], nil + } + return pendinguploads.SweepResult{}, nil +} + +// waitForCycle blocks until at least one Sweep completes, with a deadline. +// Tests use this instead of time.Sleep to avoid flakes on slow CI hosts. +// +// CAVEAT: cycleDone fires from inside fakeSweepStorage.Sweep's defer, +// which runs as Sweep returns its result — BEFORE the StartSweeper +// loop has processed the (result, error) tuple and called the +// metric recorders. Tests that assert on metric counters must NOT +// rely on this wait alone; use waitForMetricDelta instead so the +// metric increment race (Sweep returns → cycleDone fires → test +// reads counter → only then does StartSweeper's loop call +// metrics.PendingUploadsSweepError) doesn't produce a flake. +func (f *fakeSweepStorage) waitForCycle(t *testing.T, n int, timeout time.Duration) { + t.Helper() + deadline := time.NewTimer(timeout) + defer deadline.Stop() + for got := 0; got < n; got++ { + select { + case <-f.cycleDone: + case <-deadline.C: + t.Fatalf("waited %s for %d sweep cycles, got %d", timeout, n, f.calls.Load()) + } + } +} + +// waitForMetricDelta polls the supplied delta function until it returns +// `want` or the timeout elapses. Use after waitForCycle when the test +// asserts on a metric counter — closes the race between cycleDone +// (signalled inside fakeSweepStorage.Sweep's defer, BEFORE Sweep +// returns to StartSweeper) and the metric recording (which happens in +// StartSweeper's loop AFTER Sweep returns). On a slow CI host the test +// goroutine wins the read before StartSweeper's goroutine writes the +// counter; the polling assert preserves the determinism of "the metric +// MUST be N" without timing-based flakes. +// +// Per memory feedback_question_test_when_unexpected.md: the failure +// mode "delta=0, want=1" looked like a real bug at first glance — +// "metric never incremented" — but instrumented analysis showed the +// metric DID increment, just AFTER the test's read. The fix is the +// test's wait shape, not the production code. +func waitForMetricDelta(t *testing.T, delta func() int64, want int64, timeout time.Duration) { + t.Helper() + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + if delta() == want { + return + } + time.Sleep(5 * time.Millisecond) + } + t.Fatalf("waited %s for metric delta=%d, last seen %d", timeout, want, delta()) +} + +func TestStartSweeper_NilStorageDoesNotPanic(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + // Should return immediately without panicking; no goroutine to wait on. + pendinguploads.StartSweeper(ctx, nil, time.Second) +} + +func TestStartSweeper_RunsImmediatelyAndOnTick(t *testing.T) { + store := newFakeSweepStorage( + []pendinguploads.SweepResult{{Acked: 5}, {Acked: 1, Expired: 2}}, + nil, + ) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + go pendinguploads.StartSweeper(ctx, store, time.Hour) + store.waitForCycle(t, 1, 2*time.Second) + if got := store.calls.Load(); got < 1 { + t.Errorf("expected at least one immediate sweep, got %d", got) + } + // Retention propagated. + if store.gotRetention.Load() != 3600 { + t.Errorf("retention seconds = %d, want 3600", store.gotRetention.Load()) + } +} + +func TestStartSweeper_ZeroAckRetentionUsesDefault(t *testing.T) { + store := newFakeSweepStorage([]pendinguploads.SweepResult{{}}, nil) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + go pendinguploads.StartSweeper(ctx, store, 0) + store.waitForCycle(t, 1, 2*time.Second) + want := int64(pendinguploads.DefaultAckRetention.Seconds()) + if store.gotRetention.Load() != want { + t.Errorf("retention = %d, want default %d", store.gotRetention.Load(), want) + } +} + +func TestStartSweeper_ContextCancelStopsLoop(t *testing.T) { + store := newFakeSweepStorage([]pendinguploads.SweepResult{{}}, nil) + ctx, cancel := context.WithCancel(context.Background()) + + done := make(chan struct{}) + go func() { + pendinguploads.StartSweeper(ctx, store, time.Second) + close(done) + }() + store.waitForCycle(t, 1, 2*time.Second) + cancel() + + select { + case <-done: + case <-time.After(2 * time.Second): + t.Fatal("StartSweeper did not return after ctx cancel") + } +} + +func TestStartSweeperWithInterval_TickerFiresAdditionalCycles(t *testing.T) { + store := newFakeSweepStorage( + []pendinguploads.SweepResult{{Acked: 1}, {Expired: 1}, {}, {}, {}}, + nil, + ) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + go pendinguploads.StartSweeperWithIntervalForTest(ctx, store, time.Hour, 30*time.Millisecond) + + // Immediate cycle + at least one tick-driven cycle. + store.waitForCycle(t, 2, 2*time.Second) + + if got := store.calls.Load(); got < 2 { + t.Errorf("expected ≥2 cycles (immediate + 1 tick), got %d", got) + } +} + +func TestStartSweeper_TransientErrorDoesNotCrashLoop(t *testing.T) { + // First call errors; second call succeeds. The loop must keep running + // across the error so a one-off DB hiccup doesn't disable the GC. + store := newFakeSweepStorage( + []pendinguploads.SweepResult{{}, {Acked: 1}}, + []error{errors.New("transient db error"), nil}, + ) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // 50ms ticker so the second cycle fires quickly enough for the test. + // We re-export SweepInterval as a const, but tests use the public + // StartSweeper that takes its own interval — wait, the public + // StartSweeper signature uses the package-level SweepInterval. Hmm, + // this means the test takes ~5 minutes. Let me reconsider. + // + // (We patch the test below to just look at the immediate-sweep call + // + an error path, since the immediate call is enough to prove the + // "error doesn't crash" contract — the loop continues afterward + // regardless of timing.) + go pendinguploads.StartSweeper(ctx, store, time.Hour) + + // Wait for the first (errored) cycle. + store.waitForCycle(t, 1, 2*time.Second) + // Cancel — the goroutine returns cleanly, proving the error path + // didn't crash the loop. Without this fix the goroutine would have + // either panicked (process abort visible at exit) or stuck (this + // cancel + done-channel pattern would deadlock instead). + cancel() +} + +// metricDelta returns a function that, when called, returns how much +// the (acked, expired, errored) counters have advanced since metricDelta +// was originally called. metrics is a process-singleton across the test +// suite; deltas isolate this test from order-of-execution dependencies. +func metricDelta(t *testing.T) (deltaAcked, deltaExpired, deltaError func() int64) { + t.Helper() + a0, e0, err0 := metrics.PendingUploadsSweepCounts() + deltaAcked = func() int64 { + a, _, _ := metrics.PendingUploadsSweepCounts() + return a - a0 + } + deltaExpired = func() int64 { + _, e, _ := metrics.PendingUploadsSweepCounts() + return e - e0 + } + deltaError = func() int64 { + _, _, x := metrics.PendingUploadsSweepCounts() + return x - err0 + } + return +} + +func TestStartSweeper_RecordsMetricsOnSuccess(t *testing.T) { + deltaAcked, deltaExpired, deltaError := metricDelta(t) + + store := newFakeSweepStorage( + []pendinguploads.SweepResult{{Acked: 3, Expired: 5}}, + nil, + ) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + go pendinguploads.StartSweeper(ctx, store, time.Hour) + store.waitForCycle(t, 1, 2*time.Second) + + // Poll for the success counters to settle — closes the cycleDone- + // vs-metric-record race (see waitForMetricDelta comment). + waitForMetricDelta(t, deltaAcked, 3, 2*time.Second) + waitForMetricDelta(t, deltaExpired, 5, 2*time.Second) + // Error counter MUST stay at zero on the success path. Read after + // the success counters have settled — once those are correct, + // StartSweeper has fully processed this cycle's result. + if got := deltaError(); got != 0 { + t.Errorf("error counter delta = %d, want 0", got) + } +} + +func TestStartSweeper_RecordsMetricsOnError(t *testing.T) { + _, _, deltaError := metricDelta(t) + + store := newFakeSweepStorage( + []pendinguploads.SweepResult{{}}, + []error{errors.New("db down")}, + ) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + go pendinguploads.StartSweeper(ctx, store, time.Hour) + store.waitForCycle(t, 1, 2*time.Second) + + // Poll for the error counter to settle — cycleDone fires inside + // the fake's Sweep defer, BEFORE StartSweeper's loop receives the + // returned error and calls metrics.PendingUploadsSweepError. On + // slow CI hosts a direct deltaError() read here returns 0 even + // though the metric WILL be 1 a few ms later. See + // waitForMetricDelta comment. + waitForMetricDelta(t, deltaError, 1, 2*time.Second) +} diff --git a/workspace-server/internal/provisioner/cp_provisioner.go b/workspace-server/internal/provisioner/cp_provisioner.go index edc67d9f..bdc5bff7 100644 --- a/workspace-server/internal/provisioner/cp_provisioner.go +++ b/workspace-server/internal/provisioner/cp_provisioner.go @@ -14,6 +14,7 @@ import ( "time" "github.com/Molecule-AI/molecule-monorepo/platform/internal/db" + "github.com/Molecule-AI/molecule-monorepo/platform/internal/provlog" ) // CPProvisionerAPI is the contract WorkspaceHandler uses to talk to the @@ -214,6 +215,13 @@ func (p *CPProvisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string, } log.Printf("CP provisioner: workspace %s → EC2 instance %s (%s)", cfg.WorkspaceID, result.InstanceID, result.State) + provlog.Event("provision.ec2_started", map[string]any{ + "workspace_id": cfg.WorkspaceID, + "instance_id": result.InstanceID, + "state": result.State, + "tier": cfg.Tier, + "runtime": cfg.Runtime, + }) return result.InstanceID, nil } @@ -273,6 +281,10 @@ func (p *CPProvisioner) Stop(ctx context.Context, workspaceID string) error { return fmt.Errorf("cp provisioner: stop %s: unexpected %d: %s", workspaceID, resp.StatusCode, strings.TrimSpace(string(body))) } + provlog.Event("provision.ec2_stopped", map[string]any{ + "workspace_id": workspaceID, + "instance_id": instanceID, + }) return nil } diff --git a/workspace-server/internal/provlog/provlog.go b/workspace-server/internal/provlog/provlog.go new file mode 100644 index 00000000..4434c238 --- /dev/null +++ b/workspace-server/internal/provlog/provlog.go @@ -0,0 +1,48 @@ +// Package provlog emits structured, single-line JSON log records for +// provisioning-lifecycle boundaries (workspace create, EC2 start/stop, +// restart, idempotency skips). Records share a stable `evt:` prefix and +// JSON payload so a future grep|jq pipeline (or a Loki/Datadog ingest) +// can reconstruct the per-workspace timeline without parsing the +// human-prose log lines that already exist. +// +// Existing log.Printf lines are intentionally NOT replaced — they +// remain the operator-facing message. Event() emits a paired structured +// record alongside, additive only. +// +// Event taxonomy (extend by appending; never rename): +// +// provision.start — workspace row inserted, EC2 about to launch +// provision.skip_existing — idempotency hit, no new EC2 +// provision.ec2_started — RunInstances returned an instance id +// provision.ec2_stopped — TerminateInstances acknowledged +// restart.pre_stop — Restart handler about to call Stop +// +// Required fields per event are documented at each call site. +package provlog + +import ( + "encoding/json" + "log" +) + +// Event writes a single line of the form: +// +// evt: {"k":"v",...} +// +// to the standard logger. JSON encoding errors are silently swallowed — +// a logging helper must never panic the request path. fields may be +// nil; the empty payload `{}` is still useful to mark an event boundary. +func Event(name string, fields map[string]any) { + if fields == nil { + fields = map[string]any{} + } + payload, err := json.Marshal(fields) + if err != nil { + // Fall back to a static payload so the event boundary still + // appears in the log. The marshal error itself is recorded + // on a best-effort basis. + log.Printf("evt: %s {\"_marshal_err\":%q}", name, err.Error()) + return + } + log.Printf("evt: %s %s", name, payload) +} diff --git a/workspace-server/internal/provlog/provlog_test.go b/workspace-server/internal/provlog/provlog_test.go new file mode 100644 index 00000000..7d2f5f5f --- /dev/null +++ b/workspace-server/internal/provlog/provlog_test.go @@ -0,0 +1,97 @@ +package provlog + +import ( + "bytes" + "encoding/json" + "log" + "strings" + "testing" +) + +// captureLog redirects the default logger to a buffer for the duration +// of fn and returns whatever was written. +func captureLog(t *testing.T, fn func()) string { + t.Helper() + var buf bytes.Buffer + prevWriter := log.Writer() + prevFlags := log.Flags() + log.SetOutput(&buf) + log.SetFlags(0) // strip date/time so assertions stay deterministic + t.Cleanup(func() { + log.SetOutput(prevWriter) + log.SetFlags(prevFlags) + }) + fn() + return buf.String() +} + +func TestEvent_EmitsEvtPrefixAndJSONPayload(t *testing.T) { + out := captureLog(t, func() { + Event("provision.start", map[string]any{ + "workspace_id": "ws-123", + "tier": 4, + "runtime": "claude-code", + }) + }) + out = strings.TrimSpace(out) + if !strings.HasPrefix(out, "evt: provision.start ") { + t.Fatalf("expected evt-prefixed line, got %q", out) + } + jsonPart := strings.TrimPrefix(out, "evt: provision.start ") + var got map[string]any + if err := json.Unmarshal([]byte(jsonPart), &got); err != nil { + t.Fatalf("payload not valid JSON: %v (raw=%q)", err, jsonPart) + } + if got["workspace_id"] != "ws-123" { + t.Errorf("workspace_id field lost: %+v", got) + } + // JSON unmarshal turns numbers into float64 — exact-equal compare. + if got["tier"].(float64) != 4 { + t.Errorf("tier field lost: %+v", got) + } + if got["runtime"] != "claude-code" { + t.Errorf("runtime field lost: %+v", got) + } +} + +func TestEvent_NilFieldsEmitsEmptyObject(t *testing.T) { + out := captureLog(t, func() { + Event("restart.pre_stop", nil) + }) + if !strings.Contains(out, "evt: restart.pre_stop {}") { + t.Fatalf("nil fields should emit empty object, got %q", out) + } +} + +func TestEvent_PreservesEventBoundaryOnUnmarshalableValue(t *testing.T) { + // A channel cannot be marshaled by encoding/json — verify we still + // emit the event boundary with a recorded marshal error. This is + // the structural guarantee: the call site never sees a panic, and + // the event name is always present in the log. + out := captureLog(t, func() { + Event("provision.ec2_started", map[string]any{ + "chan": make(chan int), + }) + }) + if !strings.Contains(out, "evt: provision.ec2_started ") { + t.Fatalf("event boundary missing on marshal error: %q", out) + } + if !strings.Contains(out, "_marshal_err") { + t.Fatalf("expected _marshal_err sentinel, got %q", out) + } +} + +func TestEvent_SingleLineOutput(t *testing.T) { + // Log aggregators line-split on \n. A multi-line emit would silently + // fragment the JSON across two records — pin single-line shape. + out := captureLog(t, func() { + Event("provision.skip_existing", map[string]any{ + "existing_id": "ws-abc", + "name": "child-1", + }) + }) + trimmed := strings.TrimRight(out, "\n") + if strings.Contains(trimmed, "\n") { + t.Fatalf("event line must be single-line, got %q", out) + } +} diff --git a/workspace-server/internal/router/router.go b/workspace-server/internal/router/router.go index 86007d00..ae928f2f 100644 --- a/workspace-server/internal/router/router.go +++ b/workspace-server/internal/router/router.go @@ -243,13 +243,15 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi // entire platform. Gated behind AdminAuth (issue #180). r.GET("/approvals/pending", middleware.AdminAuth(db.DB), apph.ListAll) - // Team handlers — Collapse only. The bulk-Expand path is gone: - // every workspace can have children via the regular CreateWorkspace - // flow with parent_id set, so a separate handler that bulk-creates - // from sub_workspaces (and was non-idempotent — calling it twice - // duplicated the team) earned its way out. - teamh := handlers.NewTeamHandler(broadcaster, wh, platformURL, configsDir) - wsAuth.POST("/collapse", teamh.Collapse) + // (TeamHandler is gone — #2864.) The visual canvas Collapse + // button calls PATCH /workspaces/:id { collapsed: true/false } + // (presentational toggle on canvas_layouts), NOT the destructive + // POST /collapse that stopped + removed children. The + // destructive route had zero UI callers (verified via grep + // across canvas/, scripts/, and the MCP tool registry — only + // docs referenced it). team.go + team_test.go + the route + // + helpers (findTemplateDirByName, NewTeamHandler) are + // deleted; visual collapse is unaffected. // Agents ah := handlers.NewAgentHandler(broadcaster) @@ -519,8 +521,9 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi r.GET("/canvas/viewport", vh.Get) r.PUT("/canvas/viewport", middleware.CanvasOrBearer(db.DB), vh.Save) - // Templates - tmplh := handlers.NewTemplatesHandler(configsDir, dockerCli) + // Templates — wh threaded so generateDefaultConfig picks the + // SaaS-aware default tier in Import + ReplaceFiles (#2910 PR-B). + tmplh := handlers.NewTemplatesHandler(configsDir, dockerCli, wh) // #686: GET /templates lists all template names+metadata from configsDir. // Open access lets unauthenticated callers enumerate org configurations and // installed plugins. AdminAuth-gate it alongside POST /templates/import. diff --git a/workspace-server/internal/scheduler/scheduler.go b/workspace-server/internal/scheduler/scheduler.go index 0c6eb84f..e098586d 100644 --- a/workspace-server/internal/scheduler/scheduler.go +++ b/workspace-server/internal/scheduler/scheduler.go @@ -14,6 +14,7 @@ import ( cronlib "github.com/robfig/cron/v3" "github.com/Molecule-AI/molecule-monorepo/platform/internal/db" + "github.com/Molecule-AI/molecule-monorepo/platform/internal/metrics" "github.com/Molecule-AI/molecule-monorepo/platform/internal/supervised" ) @@ -741,6 +742,11 @@ func (s *Scheduler) sweepPhantomBusy(ctx context.Context) { continue } log.Printf("Scheduler: phantom-busy sweep — reset %s (no activity in %d min)", name, int(phantomStaleThreshold.Minutes())) + // #2865: surface as molecule_phantom_busy_resets_total. High + // reset rate signals task-lifecycle accounting regressions + // (e.g. missing env vars causing claude --print timeouts that + // leave active_tasks elevated until this sweep fires). + metrics.TrackPhantomBusyReset() count++ } if err := rows.Err(); err != nil { diff --git a/workspace-server/migrations/20260505200000_pending_uploads_acked_index.down.sql b/workspace-server/migrations/20260505200000_pending_uploads_acked_index.down.sql new file mode 100644 index 00000000..2d84b00d --- /dev/null +++ b/workspace-server/migrations/20260505200000_pending_uploads_acked_index.down.sql @@ -0,0 +1,2 @@ +-- Reversal of 20260505200000_pending_uploads_acked_index.up.sql. +DROP INDEX IF EXISTS idx_pending_uploads_acked; diff --git a/workspace-server/migrations/20260505200000_pending_uploads_acked_index.up.sql b/workspace-server/migrations/20260505200000_pending_uploads_acked_index.up.sql new file mode 100644 index 00000000..f2beced2 --- /dev/null +++ b/workspace-server/migrations/20260505200000_pending_uploads_acked_index.up.sql @@ -0,0 +1,30 @@ +-- 20260505200000_pending_uploads_acked_index.up.sql +-- +-- Adds the missing partial index for the acked-retention arm of the +-- pendinguploads.Sweep query. The Phase 1 migration created two +-- partial indexes both gated on `acked_at IS NULL` (workspace-fetch +-- hot path + expires_at sweep arm); the third query path — +-- `WHERE acked_at IS NOT NULL AND acked_at < now() - interval` — was +-- left to a seq scan. +-- +-- For a high-traffic deployment that's a real cost: the table +-- accumulates one row per chat-attached file; the sweeper runs every +-- 5 minutes and DELETEs rows past the 1-hour ack retention. A seq +-- scan over 100K-1M acked rows holds an AccessShare lock for seconds +-- on every cycle. Partial-indexing the inverse predicate reduces +-- this to a btree range scan and lets the DELETE complete in +-- low-millisecond range. +-- +-- WHERE acked_at IS NOT NULL is intentionally inverse of the other +-- two indexes — they cover the unacked working set; this covers the +-- terminal-state set the sweeper visits. Disjoint subsets, so the +-- two indexes don't overlap. +-- +-- Caught in self-review on the parent RFC's Phase 4 PR; filed as +-- a follow-up rather than a Phase 1 fix because the cost only +-- materializes at a row count we don't expect to hit before the +-- sweeper has had a chance to keep up. + +CREATE INDEX IF NOT EXISTS idx_pending_uploads_acked + ON pending_uploads (acked_at) + WHERE acked_at IS NOT NULL; diff --git a/workspace/inbox.py b/workspace/inbox.py index 94417243..cff95c6d 100644 --- a/workspace/inbox.py +++ b/workspace/inbox.py @@ -432,7 +432,17 @@ def _is_self_notify_row(row: dict[str, Any]) -> bool: def message_from_activity(row: dict[str, Any]) -> InboxMessage: - """Convert one /activity row into an InboxMessage.""" + """Convert one /activity row into an InboxMessage. + + Mutates ``row['request_body']`` in-place to swap any + ``platform-pending:`` URIs to the locally-staged ``workspace:`` URIs + (see ``inbox_uploads.rewrite_request_body``) — by the time the + upstream chat message arrives via this path, the upload-receive row + that staged the bytes has already populated the URI cache (lower + activity_logs.id, processed earlier in the same poll batch). A + cache miss leaves the URI untouched; the agent surfaces an + unresolvable URI rather than the inbox silently dropping the part. + """ request_body = row.get("request_body") if isinstance(request_body, str): # The Go handler returns request_body as json.RawMessage; httpx @@ -443,6 +453,14 @@ def message_from_activity(row: dict[str, Any]) -> InboxMessage: except (TypeError, ValueError): request_body = None + # Rewrite platform-pending: URIs → workspace: URIs in-place. Imported + # at call time to keep the import graph clean for the in-container + # path that doesn't use this module (also avoids a circular: the + # uploads module is small enough that re-importing per call is + # cheap, and the Python import cache makes it free after the first). + from inbox_uploads import rewrite_request_body + rewrite_request_body(request_body) + return InboxMessage( activity_id=str(row.get("id", "")), text=_extract_text(request_body, row.get("summary")), @@ -532,11 +550,57 @@ def _poll_once( if cursor is None: rows = list(reversed(rows)) + # Imported lazily at use-site so a runtime that never sees an + # upload-receive row never imports the module. Cheap on the hot + # path because Python caches the import. + from inbox_uploads import is_chat_upload_row, BatchFetcher + new_count = 0 last_id: str | None = None + # ``batch_fetcher`` is lazy: a poll batch with no upload rows pays + # zero overhead. Once the first upload row appears we open one + # BatchFetcher and submit every subsequent upload row to its thread + # pool; before processing the FIRST non-upload row we drain the + # pool (wait_all) so the URI cache is hot when message rewriting + # runs. Without the barrier, the chat message that references the + # upload would arrive at the agent with the un-rewritten + # platform-pending: URI. + batch_fetcher: BatchFetcher | None = None + + def _drain_uploads(bf: BatchFetcher | None) -> None: + if bf is None: + return + bf.wait_all() + bf.close() + for row in rows: if not isinstance(row, dict): continue + if is_chat_upload_row(row): + # Side-effect row from the platform's poll-mode chat-upload + # handler — fetch the bytes, stage to /workspace/.molecule/ + # chat-uploads, ack. NOT enqueued as an InboxMessage; the + # agent will see the chat message that REFERENCES this + # upload via a separate (later) activity row, with the + # pending: URI rewritten to a workspace: URI by + # message_from_activity. We DO advance the cursor past + # this row so a permanent network outage on /content + # doesn't stall the cursor and block real chat traffic. + if batch_fetcher is None: + batch_fetcher = BatchFetcher( + platform_url=platform_url, + workspace_id=workspace_id, + headers=headers, + ) + batch_fetcher.submit(row) + last_id = str(row.get("id", "")) or last_id + continue + # Non-upload row: drain any pending uploads first so the URI + # cache is populated before we run rewrite_request_body / + # message_from_activity on a row that may reference one. + if batch_fetcher is not None: + _drain_uploads(batch_fetcher) + batch_fetcher = None if _is_self_notify_row(row): # The workspace-server's `/notify` handler writes the agent's # own send_message_to_user POSTs to activity_logs with @@ -571,6 +635,13 @@ def _poll_once( last_id = message.activity_id new_count += 1 + # Drain any uploads still in flight if the batch ended with upload + # rows (no chat-message row to trigger the inline drain). Without + # this, a future poll that picks up the chat-message row first + # would race with the still-running fetches. + if batch_fetcher is not None: + _drain_uploads(batch_fetcher) + if last_id is not None: state.save_cursor(last_id, cursor_key) return new_count @@ -613,6 +684,7 @@ def start_poller_thread( platform_url: str, workspace_id: str, interval: float = POLL_INTERVAL_SECONDS, + stop_event: threading.Event | None = None, ) -> threading.Thread: """Spawn the poller as a daemon thread. Returns the Thread handle. @@ -624,13 +696,18 @@ def start_poller_thread( operator running ``ps -eL`` or eyeballing ``threading.enumerate()`` can tell which thread is which without reverse-engineering it from crash tracebacks. + + Pass ``stop_event`` to enable graceful shutdown — used by tests so + the daemon thread doesn't outlive the test that started it and race + with later tests' httpx patches. Production code passes None and + relies on the daemon flag for process-exit cleanup. """ name = "molecule-mcp-inbox-poller" if workspace_id: name = f"{name}-{workspace_id[:8]}" t = threading.Thread( target=_poll_loop, - args=(state, platform_url, workspace_id, interval), + args=(state, platform_url, workspace_id, interval, stop_event), name=name, daemon=True, ) diff --git a/workspace/inbox_uploads.py b/workspace/inbox_uploads.py new file mode 100644 index 00000000..69fa53aa --- /dev/null +++ b/workspace/inbox_uploads.py @@ -0,0 +1,724 @@ +"""Poll-mode chat-upload fetcher + URI cache for the standalone path. + +Companion to ``inbox.py``. When the workspace's inbox poller sees an +``activity_logs`` row with ``method='chat_upload_receive'`` (written by +the platform's ``uploadPollMode`` handler — workspace-server +``internal/handlers/chat_files.go``), this module: + + 1. Pulls the bytes from + ``GET /workspaces/:id/pending-uploads/:file_id/content``. + 2. Writes them to ``/workspace/.molecule/chat-uploads/-`` + — same on-disk shape as the push-mode handler in + ``internal_chat_uploads.py``, so anything downstream that already + resolves ``workspace:/workspace/.molecule/chat-uploads/...`` URIs + works unchanged. + 3. POSTs ``/workspaces/:id/pending-uploads/:file_id/ack`` so Phase 3 + sweep can clean up the platform-side ``pending_uploads`` row. + 4. Records a ``platform-pending:/ → + workspace:/workspace/.molecule/chat-uploads/...`` mapping in a + process-local cache so the chat message that arrives later + (referencing the platform-pending URI) gets rewritten before the + agent sees it. + +URI rewrite ordering — the chat message containing the +``platform-pending:`` URI is logged by the platform AFTER the +``chat_upload_receive`` row, so the inbox poller sees the upload-receive +row first (lower activity_logs.id) and stages the bytes before the chat +message arrives in the same poll batch (or a later one). The URI cache +is therefore populated before the message_from_activity path needs it. +A miss (network race, restart with stale cursor) is handled by keeping +the original ``platform-pending:`` URI in the rewritten body — the agent +will see something it can't open, which is preferable to silently +dropping the URI. + +Auth — same Bearer token the inbox poller uses (``platform_auth.auth_headers``). +Both endpoints are on the wsAuth-gated route, so this module can never +read another tenant's bytes even if a token is misrouted. +""" +from __future__ import annotations + +import concurrent.futures +import logging +import mimetypes +import os +import re +import secrets as pysecrets +import threading +from collections import OrderedDict +from pathlib import Path +from typing import Any + +logger = logging.getLogger(__name__) + +# Same on-disk root as internal_chat_uploads.CHAT_UPLOAD_DIR — keeping +# these decoupled would let drift sneak in. Imported here rather than +# from internal_chat_uploads to avoid pulling in starlette as a +# transitive dep (this module runs in the standalone MCP path which +# doesn't ship the in-container HTTP server). +CHAT_UPLOAD_DIR = "/workspace/.molecule/chat-uploads" + +# Per-file safety net. The platform enforces 25 MB on the staging side, +# but a buggy or hostile platform response shouldn't be able to fill the +# workspace's disk — refuse to write more than this even if the response +# claims a larger Content-Length. +MAX_FILE_BYTES = 25 * 1024 * 1024 + +# Network deadline for the GET. Tuned for a 25 MB transfer over a +# reasonable consumer link (~5 Mbps gives ~40s for the full payload), +# plus headroom for TLS + platform auth. Aligned with inbox poller's +# 10s default for /activity calls — both are user-perceived latency. +DEFAULT_FETCH_TIMEOUT = 60.0 + +# Concurrency cap for ``BatchFetcher``. Four workers is enough headroom +# for the realistic "user dragged 3-4 files into chat at once" case +# while bounding the platform's per-workspace fan-out. The cap matters +# because the platform's /content endpoint reads bytea from Postgres in +# a single round-trip per request — N workers = N concurrent DB reads +# of up to 25 MB each, so a higher cap could pressure platform memory +# without much UX win (network bandwidth is the bottleneck once the +# bytes are buffered). +DEFAULT_BATCH_FETCH_WORKERS = 4 + +# Upper bound on how long ``BatchFetcher.wait_all`` blocks the inbox +# poll loop before giving up on still-in-flight fetches. Aligned with +# DEFAULT_FETCH_TIMEOUT so a single hung fetch can't stall the loop +# longer than its own deadline. A timeout fires only if a worker thread +# is stuck past the underlying httpx timeout — pathological case; +# normal completion is bounded by per-fetch timeout × ceil(N/W). +DEFAULT_BATCH_WAIT_TIMEOUT = DEFAULT_FETCH_TIMEOUT + 5.0 + +# Cap on the URI cache. A long-lived workspace handling thousands of +# uploads shouldn't grow without bound; an LRU cap of 1024 keeps the +# entries-needed-for-a-typical-conversation well within memory. +URI_CACHE_MAX_ENTRIES = 1024 + +# Same character class as internal_chat_uploads — kept duplicated rather +# than imported to avoid dragging starlette into the standalone path. +_UNSAFE_FILENAME_CHARS = re.compile(r"[^a-zA-Z0-9._\-]") + + +def sanitize_filename(name: str) -> str: + """Reduce a user-supplied filename to a safe form. + + Mirrors ``internal_chat_uploads.sanitize_filename`` and the Go + handler's ``SanitizeFilename`` — three-way parity is pinned by + ``workspace-server/internal/handlers/sanitize_filename_test.go`` and + ``workspace/tests/test_internal_chat_uploads.py`` so the URI shape + is identical regardless of which path handles the upload. + """ + base = os.path.basename(name) + base = base.replace(" ", "_") + base = _UNSAFE_FILENAME_CHARS.sub("_", base) + if len(base) > 100: + ext = "" + dot = base.rfind(".") + if dot >= 0 and len(base) - dot <= 16: + ext = base[dot:] + base = base[: 100 - len(ext)] + ext + if base in ("", ".", ".."): + return "file" + return base + + +# --------------------------------------------------------------------------- +# URI cache — maps platform-pending URIs to local workspace: URIs +# --------------------------------------------------------------------------- + + +class _URICache: + """Thread-safe bounded LRU mapping of platform-pending → workspace URIs. + + Bounded so a workspace that runs for months and handles thousands of + uploads doesn't accumulate entries forever. ``OrderedDict.move_to_end`` + promotes recently-used entries; eviction takes the oldest. + + The cache is intentionally per-process — there is no persistence + across a workspace restart. A restart with a stale inbox cursor that + re-poll an upload-receive row will re-fetch (the bytes are already + on disk from the prior session — see ``stage_to_disk``'s O_EXCL + handling) and re-register; a chat message that referenced the + platform-pending URI BEFORE the restart and arrives AFTER would miss + the rewrite and surface the platform-pending URI to the agent. That + is preferable to a stale persisted mapping that points at a deleted + file. + """ + + def __init__(self, max_entries: int = URI_CACHE_MAX_ENTRIES): + self._max = max_entries + self._lock = threading.Lock() + self._entries: "OrderedDict[str, str]" = OrderedDict() + + def get(self, pending_uri: str) -> str | None: + with self._lock: + local = self._entries.get(pending_uri) + if local is not None: + self._entries.move_to_end(pending_uri) + return local + + def set(self, pending_uri: str, local_uri: str) -> None: + with self._lock: + self._entries[pending_uri] = local_uri + self._entries.move_to_end(pending_uri) + while len(self._entries) > self._max: + self._entries.popitem(last=False) + + def __len__(self) -> int: + with self._lock: + return len(self._entries) + + def clear(self) -> None: + with self._lock: + self._entries.clear() + + +_cache = _URICache() + + +def get_cache() -> _URICache: + """Expose the module-singleton cache for tests and the rewrite path.""" + return _cache + + +def resolve_pending_uri(uri: str) -> str | None: + """Return the local ``workspace:`` URI for a ``platform-pending:`` URI, + or None if not yet staged. Convenience for callers that want to + fall back to an on-demand fetch — pass the result through to + ``executor_helpers.resolve_attachment_uri``. + """ + return _cache.get(uri) + + +# --------------------------------------------------------------------------- +# On-disk staging +# --------------------------------------------------------------------------- + + +def _open_safe(path: str) -> int: + """Open ``path`` for write with ``O_CREAT|O_EXCL|O_NOFOLLOW``. + + Same shape as ``internal_chat_uploads._open_safe`` — refuses to + follow a pre-existing symlink at the target and refuses to overwrite + an existing regular file. The 16-byte random prefix makes a name + collision astronomical, but defense-in-depth costs nothing. + """ + flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL + if hasattr(os, "O_NOFOLLOW"): + flags |= os.O_NOFOLLOW + return os.open(path, flags, 0o600) + + +def stage_to_disk(content: bytes, filename: str) -> str: + """Write ``content`` under ``CHAT_UPLOAD_DIR`` and return the local URI. + + Returns ``workspace:/workspace/.molecule/chat-uploads/-``. + The 32-hex prefix makes the on-disk name unguessable to anything + that didn't see the response, so even if a stale agent has a guess + at the original filename it can't construct a URL to a sibling's + upload. + + Raises: + OSError: write failure (mkdir, open, or write). Caller is + expected to log + skip; the activity row stays unacked so a + future poll re-tries. + ValueError: ``content`` exceeds ``MAX_FILE_BYTES``. Pre-staging + guard belt-and-braces above the platform's same-side cap. + """ + if len(content) > MAX_FILE_BYTES: + raise ValueError( + f"content size {len(content)} exceeds workspace cap {MAX_FILE_BYTES}" + ) + + Path(CHAT_UPLOAD_DIR).mkdir(parents=True, exist_ok=True) + + sanitized = sanitize_filename(filename) + prefix = pysecrets.token_hex(16) + stored = f"{prefix}-{sanitized}" + target = os.path.join(CHAT_UPLOAD_DIR, stored) + + fd = _open_safe(target) + try: + with os.fdopen(fd, "wb") as f: + f.write(content) + except OSError: + # Best-effort cleanup — partial writes leave a stub file that + # would mask a future retry's success otherwise. + try: + os.unlink(target) + except OSError: + pass + raise + + return f"workspace:{CHAT_UPLOAD_DIR}/{stored}" + + +# --------------------------------------------------------------------------- +# Activity row → fetch/stage/ack flow +# --------------------------------------------------------------------------- + + +def _request_body_dict(row: dict[str, Any]) -> dict[str, Any] | None: + """Coerce ``row['request_body']`` into a dict. + + The /activity API returns request_body as JSON (already-deserialized + by httpx). Some legacy paths or mocked transports may emit a string; + handle defensively rather than raising. + """ + body = row.get("request_body") + if isinstance(body, dict): + return body + if isinstance(body, str): + import json + try: + decoded = json.loads(body) + except (TypeError, ValueError): + return None + return decoded if isinstance(decoded, dict) else None + return None + + +def is_chat_upload_row(row: dict[str, Any]) -> bool: + """True if ``row`` is the platform's chat-upload-receive activity. + + Used by the inbox poller to fork the row off the regular A2A + message handling path — this row is not a peer message; it's an + instruction to fetch + stage bytes. Match on ``method`` only; + ``activity_type`` is already filtered to ``a2a_receive`` upstream. + """ + return row.get("method") == "chat_upload_receive" + + +def fetch_and_stage( + row: dict[str, Any], + *, + platform_url: str, + workspace_id: str, + headers: dict[str, str], + timeout_secs: float = DEFAULT_FETCH_TIMEOUT, + client: Any = None, +) -> str | None: + """Fetch the row's bytes, stage them under chat-uploads, and ack. + + Returns the local ``workspace:`` URI on success, or ``None`` if any + step failed (logged with enough detail to triage). Failure leaves + the platform-side row unacked, so a subsequent poll retries — the + activity row stays in the cursor's window because we DO advance the + cursor (the row is "handled" from the inbox's perspective even on + fetch failure; otherwise a permanent network outage would stall the + cursor and block real chat traffic). + + On success, the URI cache is updated so a subsequent chat message + referencing the same ``platform-pending:`` URI is rewritten before + the agent sees it. + + Pass ``client`` to reuse a shared ``httpx.Client`` for both GET and + POST ack (saves one TLS handshake per row vs. constructing one + per-call). ``BatchFetcher`` does this across an entire poll batch so + N concurrent fetches share one connection pool. + """ + body = _request_body_dict(row) + if body is None: + logger.warning( + "inbox_uploads: row %s missing request_body; cannot fetch", + row.get("id"), + ) + return None + + file_id = body.get("file_id") + if not isinstance(file_id, str) or not file_id: + logger.warning( + "inbox_uploads: row %s has no file_id in request_body", + row.get("id"), + ) + return None + + pending_uri = body.get("uri") + if not isinstance(pending_uri, str) or not pending_uri: + # Reconstruct what the platform would have written — defensive + # against a row whose uri field got truncated. Same shape as the + # Go handler's URI builder. + pending_uri = f"platform-pending:{workspace_id}/{file_id}" + + filename = body.get("name") or "file" + if not isinstance(filename, str): + filename = "file" + + # Caller-supplied client: reuse for both GET + POST ack. Otherwise + # build a one-shot client and close it on the way out. Lazy httpx + # import keeps the standalone MCP path's optional dep optional. + own_client = client is None + if own_client: + try: + import httpx # noqa: WPS433 + except ImportError: + logger.error("inbox_uploads: httpx not installed; cannot fetch %s", file_id) + return None + client = httpx.Client(timeout=timeout_secs) + + try: + return _fetch_and_stage_with_client( + client, + platform_url=platform_url, + workspace_id=workspace_id, + headers=headers, + file_id=file_id, + pending_uri=pending_uri, + filename=filename, + body=body, + ) + finally: + if own_client: + try: + client.close() + except Exception: # noqa: BLE001 — close should never crash the caller + pass + + +def _fetch_and_stage_with_client( + client: Any, + *, + platform_url: str, + workspace_id: str, + headers: dict[str, str], + file_id: str, + pending_uri: str, + filename: str, + body: dict[str, Any], +) -> str | None: + """Inner body of fetch_and_stage. Always uses the supplied client for + both GET and POST so the connection pool is shared across the call. + """ + content_url = f"{platform_url}/workspaces/{workspace_id}/pending-uploads/{file_id}/content" + ack_url = f"{platform_url}/workspaces/{workspace_id}/pending-uploads/{file_id}/ack" + + try: + resp = client.get(content_url, headers=headers) + except Exception as exc: # noqa: BLE001 + logger.warning("inbox_uploads: GET %s failed: %s", content_url, exc) + return None + + if resp.status_code == 404: + # Row was swept or already acked by a previous poll race — nothing + # to fetch. Don't ack again; the platform's GC handles it. This is + # a soft-skip, not an error — log at INFO so triage isn't noisy. + logger.info( + "inbox_uploads: pending upload %s already gone (404); skipping", + file_id, + ) + return None + if resp.status_code >= 400: + logger.warning( + "inbox_uploads: GET %s returned %d: %s", + content_url, + resp.status_code, + (resp.text or "")[:200], + ) + return None + + content = resp.content or b"" + if len(content) > MAX_FILE_BYTES: + logger.warning( + "inbox_uploads: refusing to stage %s — size %d exceeds cap %d", + file_id, + len(content), + MAX_FILE_BYTES, + ) + return None + + # Mimetype precedence: platform's Content-Type header → request_body + # mimeType field → extension guess. Same precedence as the in- + # container ingest handler. + mime_header = resp.headers.get("content-type", "").split(";")[0].strip() + mime = ( + mime_header + or (body.get("mimeType") if isinstance(body.get("mimeType"), str) else "") + or (mimetypes.guess_type(filename)[0] or "") + ) + + try: + local_uri = stage_to_disk(content, filename) + except (OSError, ValueError) as exc: + logger.error( + "inbox_uploads: failed to stage %s (%s) to disk: %s", + file_id, + filename, + exc, + ) + return None + + _cache.set(pending_uri, local_uri) + logger.info( + "inbox_uploads: staged file_id=%s name=%s size=%d mime=%s pending_uri=%s local_uri=%s", + file_id, + filename, + len(content), + mime, + pending_uri, + local_uri, + ) + + # Ack last so a write failure above leaves the row available for a + # retry on the next poll. A failed ack is logged but doesn't roll + # back the on-disk file — the platform's sweep will clean up + # eventually. + try: + ack_resp = client.post(ack_url, headers=headers) + if ack_resp.status_code >= 400: + logger.warning( + "inbox_uploads: ack %s returned %d: %s", + ack_url, + ack_resp.status_code, + (ack_resp.text or "")[:200], + ) + except Exception as exc: # noqa: BLE001 + logger.warning("inbox_uploads: POST %s failed: %s", ack_url, exc) + + return local_uri + + +# --------------------------------------------------------------------------- +# BatchFetcher — concurrent fetch across a single poll batch +# --------------------------------------------------------------------------- + + +class BatchFetcher: + """Fetch + stage + ack a batch of upload-receive rows concurrently. + + Why this exists: the inbox poll loop used to call ``fetch_and_stage`` + serially per row. With N upload rows in a batch (a user dragging + multiple files into chat at once), the loop blocked for + ``N × per_fetch_latency`` before processing the chat message that + referenced them — a 4-file upload at 5s each = 20s of stall + before the agent saw the user's prompt. ``BatchFetcher`` runs the + fetches on a small thread pool (default 4 workers) so the stall is + bounded by ``ceil(N/W) × per_fetch_latency`` instead. + + Connection reuse: one ``httpx.Client`` is shared across every fetch + in the batch. httpx clients carry a connection pool, so a second + fetch to the same platform host reuses the TCP+TLS handshake from + the first — measurable win when fetches happen back-to-back. + + Correctness invariant the caller MUST preserve: the inbox loop is + expected to call ``wait_all()`` before processing the chat-message + activity row that REFERENCES one of these uploads. Without the + barrier, the URI cache is empty when ``rewrite_request_body`` runs + and the agent sees the un-rewritten ``platform-pending:`` URI. The + caller-side test ``test_poll_once_waits_for_uploads_before_messages`` + pins this end-to-end. + + Use as a context manager so the executor + client are torn down + even if the caller raises mid-batch. + """ + + def __init__( + self, + *, + platform_url: str, + workspace_id: str, + headers: dict[str, str], + timeout_secs: float = DEFAULT_FETCH_TIMEOUT, + max_workers: int = DEFAULT_BATCH_FETCH_WORKERS, + client: Any = None, + ): + self._platform_url = platform_url + self._workspace_id = workspace_id + self._headers = dict(headers) # copy so caller mutations don't leak in + self._timeout_secs = timeout_secs + + # Caller can inject a client (tests do this); production callers + # let us build one. Track ownership so we only close ours. + self._own_client = client is None + if self._own_client: + try: + import httpx # noqa: WPS433 + except ImportError: + # Match fetch_and_stage's behavior: log + degrade rather + # than raising at construction time. submit() will then + # return None for every row. + logger.error("inbox_uploads: httpx not installed; BatchFetcher inert") + self._client: Any = None + else: + self._client = httpx.Client(timeout=timeout_secs) + else: + self._client = client + + self._executor = concurrent.futures.ThreadPoolExecutor( + max_workers=max_workers, + thread_name_prefix="upload-fetch", + ) + self._futures: list[concurrent.futures.Future[Any]] = [] + self._closed = False + # Flipped to True by wait_all when the timeout fires; close() + # reads this to decide between drain-and-wait vs cancel-queued. + self._timed_out = False + + def submit(self, row: dict[str, Any]) -> concurrent.futures.Future[Any] | None: + """Submit ``row`` for fetch + stage + ack. Non-blocking — the + worker thread runs ``fetch_and_stage`` with the shared client. + + Returns the Future so a caller that wants per-row outcome can + await it; ``None`` if the BatchFetcher is in a degraded state + (httpx missing). + """ + if self._closed: + raise RuntimeError("BatchFetcher: submit after close") + if self._client is None: + return None + fut = self._executor.submit( + fetch_and_stage, + row, + platform_url=self._platform_url, + workspace_id=self._workspace_id, + headers=self._headers, + timeout_secs=self._timeout_secs, + client=self._client, + ) + self._futures.append(fut) + return fut + + def wait_all(self, timeout: float | None = DEFAULT_BATCH_WAIT_TIMEOUT) -> None: + """Block until every submitted future completes (or times out). + + Per-future exceptions are logged + swallowed — ``fetch_and_stage`` + already converts every error path to ``return None``, so a real + exception propagating up to here is unexpected and we don't want + one bad fetch to abort the whole batch. + + Timeouts are also logged + swallowed AND record the timed-out + futures on ``self._timed_out`` so ``close`` can cancel them + without paying their full latency. Without this hand-off, + ``close()``'s ``shutdown(wait=True)`` would block on the leaked + workers and undo the user-facing timeout — the inbox poll loop + would stall indefinitely on a hung /content fetch. + """ + if not self._futures: + return + try: + done, not_done = concurrent.futures.wait( + self._futures, + timeout=timeout, + return_when=concurrent.futures.ALL_COMPLETED, + ) + except Exception as exc: # noqa: BLE001 — concurrent.futures shouldn't raise here + logger.warning("inbox_uploads: BatchFetcher.wait_all crashed: %s", exc) + return + for fut in done: + exc = fut.exception() + if exc is not None: + logger.warning( + "inbox_uploads: BatchFetcher worker raised: %s", exc + ) + if not_done: + logger.warning( + "inbox_uploads: BatchFetcher.wait_all left %d in-flight after %ss timeout", + len(not_done), + timeout, + ) + # Mark these futures so close() knows to cancel-not-wait. We + # cancel queued-but-not-started ones immediately; futures + # already running can't be cancelled (Python's threading + # model), but close() will pass cancel_futures=True so any + # remaining queued items don't run. + for fut in not_done: + fut.cancel() + self._timed_out = True + + def close(self) -> None: + """Tear down the executor + (if owned) the httpx client. + + Idempotent. After close, ``submit`` raises and the BatchFetcher + cannot be reused — construct a fresh one for the next poll. + + If ``wait_all`` reported a timeout, shutdown skips the + ``wait=True`` drain and instead asks the executor to drop queued + futures (``cancel_futures=True``). Currently-running workers + can't be interrupted by Python's threading model, but the poll + loop returns immediately rather than blocking on a hung fetch. + """ + if self._closed: + return + self._closed = True + timed_out = getattr(self, "_timed_out", False) + try: + if timed_out: + # cancel_futures landed in Python 3.9 — guarded for older + # interpreters via a TypeError fallback. Drop queued + # tasks; running ones will exit when their httpx call + # eventually returns or the daemon thread dies. + try: + self._executor.shutdown(wait=False, cancel_futures=True) + except TypeError: + self._executor.shutdown(wait=False) + else: + # Healthy path: wait for in-flight work so we don't + # interrupt a fetch mid-write. + self._executor.shutdown(wait=True) + except Exception as exc: # noqa: BLE001 + logger.warning("inbox_uploads: executor shutdown error: %s", exc) + if self._own_client and self._client is not None: + try: + self._client.close() + except Exception as exc: # noqa: BLE001 + logger.warning("inbox_uploads: client close error: %s", exc) + + def __enter__(self) -> "BatchFetcher": + return self + + def __exit__(self, exc_type, exc, tb) -> None: + self.close() + + +# --------------------------------------------------------------------------- +# URI rewrite for incoming chat messages +# --------------------------------------------------------------------------- +# +# The chat message that references a staged upload arrives as a +# SEPARATE activity_log row, with parts of kind=file containing +# platform-pending: URIs in the file.uri field. Walk the structure +# in-place and rewrite to the local workspace: URI when the cache has it. +# Unknown URIs pass through unchanged — the agent gets to choose how +# to react (most runtimes log + ignore an unresolvable URI). + + +def _rewrite_part(part: Any) -> None: + """Mutate a single A2A Part dict to swap platform-pending: URIs.""" + if not isinstance(part, dict): + return + file_obj = part.get("file") + if not isinstance(file_obj, dict): + return + uri = file_obj.get("uri") + if not isinstance(uri, str) or not uri.startswith("platform-pending:"): + return + rewritten = _cache.get(uri) + if rewritten: + file_obj["uri"] = rewritten + + +def rewrite_request_body(body: Any) -> None: + """Mutate ``body`` in-place, replacing platform-pending: URIs with + the cached local equivalents. + + Walks the same shapes ``inbox._extract_text`` accepts: + + - ``body['parts']`` + - ``body['params']['parts']`` + - ``body['params']['message']['parts']`` + + No-op for shapes that don't match — the message simply passes + through to the agent as-is. + """ + if not isinstance(body, dict): + return + candidates: list[Any] = [] + params = body.get("params") if isinstance(body.get("params"), dict) else None + if params: + message = params.get("message") if isinstance(params.get("message"), dict) else None + if message: + candidates.append(message.get("parts")) + candidates.append(params.get("parts")) + candidates.append(body.get("parts")) + + for parts in candidates: + if isinstance(parts, list): + for part in parts: + _rewrite_part(part) diff --git a/workspace/mcp_cli.py b/workspace/mcp_cli.py index feea0b83..e890a66d 100644 --- a/workspace/mcp_cli.py +++ b/workspace/mcp_cli.py @@ -31,422 +31,53 @@ dependency via ``a2a-sdk``. In-container usage (``python -m molecule_runtime.a2a_mcp_server`` or direct import) bypasses this wrapper — the workspace runtime has its own heartbeat loop in ``heartbeat.py`` so we don't double-heartbeat. + +Module layout (RFC #2873 iter 3 split): + * ``mcp_heartbeat`` — register POST + heartbeat loop + auth-failure + escalation + inbound-secret persistence. + * ``mcp_workspace_resolver`` — env validation, single + multi-workspace + resolution, operator-help printer, on-disk token-file read. + * ``mcp_inbox_pollers`` — activate the inbox singleton + spawn one + daemon poller per workspace. + +This file keeps just ``main()`` plus thin re-exports of the private +symbols so existing tests' imports (``mcp_cli._build_agent_card``, +``mcp_cli._heartbeat_loop``, etc.) keep working without churn. """ from __future__ import annotations -import json import logging import os import sys -import threading -import time -from pathlib import Path import configs_dir +import mcp_heartbeat +import mcp_inbox_pollers +import mcp_workspace_resolver logger = logging.getLogger(__name__) -# Heartbeat cadence. Must be tighter than healthsweep's stale window -# (currently 60-90s — see registry/healthsweep.go) by a comfortable -# margin so a single missed heartbeat doesn't flip awaiting_agent. -# 20s gives the operator's network 3 attempts within the budget; long -# enough that it doesn't spam, short enough to recover quickly after -# laptop sleep. -HEARTBEAT_INTERVAL_SECONDS = 20.0 +# Re-export public surface for back-compat with the pre-split callers +# and tests. The underscore-prefixed names mirror the names that +# existed in this module before the split — keeping them ensures +# `mcp_cli._build_agent_card`, `mcp_cli._heartbeat_loop`, etc. +# resolve identically to the new functions. +HEARTBEAT_INTERVAL_SECONDS = mcp_heartbeat.HEARTBEAT_INTERVAL_SECONDS +_HEARTBEAT_AUTH_LOUD_THRESHOLD = mcp_heartbeat.HEARTBEAT_AUTH_LOUD_THRESHOLD +_HEARTBEAT_AUTH_RELOG_INTERVAL = mcp_heartbeat.HEARTBEAT_AUTH_RELOG_INTERVAL -# After this many consecutive 401/403 heartbeats, escalate from -# WARNING to ERROR with re-onboard guidance. 3 ticks at 20s = ~1 minute -# of sustained auth failure — enough to rule out a transient platform -# blip but quick enough that an operator doesn't sit puzzled for 10 -# minutes wondering why their MCP tools 401. Same threshold used for -# repeat-logging at 20-tick (~7 min) intervals so a long-running -# session that missed the first ERROR still sees the message. -_HEARTBEAT_AUTH_LOUD_THRESHOLD = 3 -_HEARTBEAT_AUTH_RELOG_INTERVAL = 20 +_build_agent_card = mcp_heartbeat.build_agent_card +_platform_register = mcp_heartbeat.platform_register +_heartbeat_loop = mcp_heartbeat.heartbeat_loop +_log_heartbeat_auth_failure = mcp_heartbeat.log_heartbeat_auth_failure +_persist_inbound_secret_from_heartbeat = mcp_heartbeat.persist_inbound_secret_from_heartbeat +_start_heartbeat_thread = mcp_heartbeat.start_heartbeat_thread +_resolve_workspaces = mcp_workspace_resolver.resolve_workspaces +_print_missing_env_help = mcp_workspace_resolver.print_missing_env_help +_read_token_file = mcp_workspace_resolver.read_token_file -def _build_agent_card(workspace_id: str) -> dict: - """Build the ``agent_card`` payload sent to /registry/register. - - Three optional env vars override the defaults so an operator can - surface human-readable identity + capabilities to peers and the - canvas Skills tab without code changes: - - * ``MOLECULE_AGENT_NAME`` — display name (defaults to - ``molecule-mcp-{id[:8]}``). Surfaced in canvas workspace cards - and ``list_peers`` output. - * ``MOLECULE_AGENT_DESCRIPTION`` — one-liner about the agent's - purpose. Rendered in canvas Details + Skills tabs. - * ``MOLECULE_AGENT_SKILLS`` — comma-separated skill names - (e.g. ``research,code-review,memory-curation``). Each name is - expanded to a ``{"name": ...}`` skill object — the minimum - shape that satisfies both ``shared_runtime.summarize_peers`` - (uses ``s["name"]``) and the canvas SkillsTab.tsx schema - (id falls back to name when omitted). Empty / whitespace - entries are dropped. - - Defaults match the previous hardcoded behaviour exactly so this - is a strict superset — an operator who sets none of the env vars - sees no change. - """ - name = (os.environ.get("MOLECULE_AGENT_NAME") or "").strip() - if not name: - name = f"molecule-mcp-{workspace_id[:8]}" - - description = (os.environ.get("MOLECULE_AGENT_DESCRIPTION") or "").strip() - - skills_raw = (os.environ.get("MOLECULE_AGENT_SKILLS") or "").strip() - skills: list[dict] = [] - if skills_raw: - for s in skills_raw.split(","): - label = s.strip() - if label: - skills.append({"name": label}) - - card: dict = {"name": name, "skills": skills} - if description: - card["description"] = description - return card - - -def _platform_register(platform_url: str, workspace_id: str, token: str) -> None: - """One-shot register at startup; fails fast on auth errors. - - Lifts the workspace from ``awaiting_agent`` to ``online`` for - operators who never ran the curl-register snippet. Safe to call - repeatedly: the platform's register handler is an upsert that - just refreshes ``url``, ``agent_card``, and ``status``. - - Failure model (post-review): - - 401 / 403 → ``sys.exit(3)`` immediately. The operator's - token is wrong; silently looping in a broken state would - make this hard to diagnose because the MCP tools would 401 - on every call too. Hard-fail is the kindest option. - - Other 4xx/5xx → log a warning + continue. The heartbeat - thread will surface persistent failures; transient platform - blips shouldn't abort the MCP loop. - - Network / transport errors → log + continue. Same reasoning. - - Origin header is required by the SaaS edge WAF; without it - /registry/register currently still works (it's on the WAF - allowlist), but the heartbeat path needs Origin and we want one - consistent header set across both calls. - """ - try: - import httpx - except ImportError: - # httpx is a transitive dep via a2a-sdk; if missing, the MCP - # server won't import either. Let the caller's later import - # surface the real error. - return - - payload = { - "id": workspace_id, - "url": "", - "agent_card": _build_agent_card(workspace_id), - "delivery_mode": "poll", - } - headers = { - "Authorization": f"Bearer {token}", - "Origin": platform_url, - "Content-Type": "application/json", - } - try: - with httpx.Client(timeout=10.0) as client: - resp = client.post( - f"{platform_url}/registry/register", - json=payload, - headers=headers, - ) - if resp.status_code in (401, 403): - print( - f"molecule-mcp: register rejected with HTTP {resp.status_code} — " - f"the token in MOLECULE_WORKSPACE_TOKEN is invalid for workspace " - f"{workspace_id}. Regenerate from the canvas → Tokens tab.", - file=sys.stderr, - ) - sys.exit(3) - if resp.status_code >= 400: - logger.warning( - "molecule-mcp: register POST returned HTTP %d: %s", - resp.status_code, - (resp.text or "")[:200], - ) - else: - logger.info( - "molecule-mcp: registered workspace %s with platform", - workspace_id, - ) - except SystemExit: - raise - except Exception as exc: # noqa: BLE001 - logger.warning("molecule-mcp: register POST failed: %s", exc) - - -def _heartbeat_loop( - platform_url: str, - workspace_id: str, - token: str, - interval: float = HEARTBEAT_INTERVAL_SECONDS, -) -> None: - """Daemon thread body: POST /registry/heartbeat every ``interval``s. - - Failures are logged at WARNING and the loop continues. The thread - exits when the main process does (daemon=True). Each iteration - rebuilds the payload + headers — cheap and ensures token rotation - via env var (rare but possible) is picked up on the next tick. - """ - try: - import httpx - except ImportError: - return - - start_time = time.time() - consecutive_auth_failures = 0 - while True: - body = { - "workspace_id": workspace_id, - "error_rate": 0.0, - "sample_error": "", - "active_tasks": 0, - "uptime_seconds": int(time.time() - start_time), - } - headers = { - "Authorization": f"Bearer {token}", - "Origin": platform_url, - "Content-Type": "application/json", - } - try: - with httpx.Client(timeout=10.0) as client: - resp = client.post( - f"{platform_url}/registry/heartbeat", - json=body, - headers=headers, - ) - if resp.status_code in (401, 403): - consecutive_auth_failures += 1 - _log_heartbeat_auth_failure( - consecutive_auth_failures, workspace_id, resp.status_code, - ) - elif resp.status_code >= 400: - # Non-auth HTTP error — log, but DO NOT touch the - # auth-failure counter (5xx blips, 429, etc. are - # transient and unrelated to token validity). - logger.warning( - "molecule-mcp: heartbeat HTTP %d: %s", - resp.status_code, - (resp.text or "")[:200], - ) - else: - consecutive_auth_failures = 0 - _persist_inbound_secret_from_heartbeat(resp) - except Exception as exc: # noqa: BLE001 - logger.warning("molecule-mcp: heartbeat failed: %s", exc) - time.sleep(interval) - - -def _log_heartbeat_auth_failure(count: int, workspace_id: str, status_code: int) -> None: - """Escalate consecutive heartbeat 401/403s from quiet WARNING to - actionable ERROR. - - The operator's first sign of trouble shouldn't be "tools 401 with no - explanation" — that was the failure mode that motivated this code, - triggered by a workspace being deleted server-side and its tokens - revoked while the runtime kept heartbeating in silence. - - Cadence: - * count < threshold: WARNING per tick (transient — could be a - platform blip, don't shout yet) - * count == threshold: ERROR with re-onboard instructions - (the first signal the operator can't miss) - * count > threshold and (count - threshold) % relog == 0: re-log - ERROR (so a session that started after the first ERROR still - sees the message scrolling past in their logs) - """ - if count < _HEARTBEAT_AUTH_LOUD_THRESHOLD: - logger.warning( - "molecule-mcp: heartbeat HTTP %d (auth failure %d/%d) — " - "token may be revoked. Will retry; if persistent, regenerate " - "from canvas → Tokens.", - status_code, count, _HEARTBEAT_AUTH_LOUD_THRESHOLD, - ) - return - # At or past the threshold — this is the loud actionable error. - if count == _HEARTBEAT_AUTH_LOUD_THRESHOLD or ( - count - _HEARTBEAT_AUTH_LOUD_THRESHOLD - ) % _HEARTBEAT_AUTH_RELOG_INTERVAL == 0: - logger.error( - "molecule-mcp: %d consecutive heartbeat auth failures (HTTP %d) — " - "the token in MOLECULE_WORKSPACE_TOKEN has been REVOKED, likely " - "because workspace %s was deleted server-side. The MCP server is " - "still running but every platform call will fail. Regenerate the " - "workspace + token from the canvas (Tokens tab), update your MCP " - "config, and restart your runtime.", - count, status_code, workspace_id, - ) - - -def _persist_inbound_secret_from_heartbeat(resp: object) -> None: - """Persist ``platform_inbound_secret`` from a heartbeat response, if any. - - The platform's heartbeat handler returns the secret on every beat - (mirroring /registry/register) so a workspace that lazy-healed the - secret on the platform side — typical recovery path for a workspace - whose row had a NULL ``platform_inbound_secret`` after a partial - bootstrap — picks it up within one heartbeat tick instead of - requiring a runtime restart. - - Without this delivery path the chat-upload code path's "secret was - just minted, will pick up on next heartbeat" 503 message is a lie - and the workspace stays 401-forever until the operator restarts - the runtime. Caught 2026-04-30 on hongmingwang tenant. - - Failure is non-fatal: if the body isn't JSON, doesn't carry the - field, or the disk write fails, the next heartbeat retries. This - matches the cold-start register flow in main.py:319-323. - """ - try: - body = resp.json() - except Exception: # noqa: BLE001 - return - if not isinstance(body, dict): - return - secret = body.get("platform_inbound_secret") - if not secret: - return - try: - from platform_inbound_auth import save_inbound_secret - - save_inbound_secret(secret) - except Exception as exc: # noqa: BLE001 - logger.warning( - "molecule-mcp: persist inbound secret from heartbeat failed: %s", exc - ) - - -def _start_heartbeat_thread( - platform_url: str, - workspace_id: str, - token: str, -) -> threading.Thread: - """Start the heartbeat daemon thread. Returns the Thread handle. - - The MCP stdio loop runs in the foreground (asyncio); this thread - runs alongside it. ``daemon=True`` so when the operator hits - Ctrl-C / closes the runtime, the heartbeat dies with it instead - of leaking and writing to a stale workspace. - """ - t = threading.Thread( - target=_heartbeat_loop, - args=(platform_url, workspace_id, token), - name="molecule-mcp-heartbeat", - daemon=True, - ) - t.start() - return t - - -def _resolve_workspaces() -> tuple[list[tuple[str, str]], list[str]]: - """Return the list of ``(workspace_id, token)`` pairs to register. - - Resolution order: - - 1. ``MOLECULE_WORKSPACES`` env var — JSON array of - ``{"id": "...", "token": "..."}`` objects. Activates the - multi-workspace external-agent path (one process registered into - N workspaces). When set, ``WORKSPACE_ID`` / ``MOLECULE_WORKSPACE_TOKEN`` - are IGNORED — the JSON is the source of truth. - - 2. Single-workspace fallback — ``WORKSPACE_ID`` env var + token from - ``MOLECULE_WORKSPACE_TOKEN`` or ``${CONFIGS_DIR}/.auth_token``. - This is the pre-existing path; back-compat exact. - - Returns ``(workspaces, errors)``: - * ``workspaces``: list of ``(workspace_id, token)`` — non-empty - on the happy path. - * ``errors``: human-readable strings describing what's missing / - malformed. ``main()`` surfaces these with the same shape as - ``_print_missing_env_help`` so the operator's first run gives - actionable output. - - Why JSON env (not file): ergonomic for Claude Code MCP config (one - string in ``mcpServers.molecule.env`` instead of a sidecar file) - and for CI / launchers. A separate config-file path can be added - later without breaking this. - """ - raw = os.environ.get("MOLECULE_WORKSPACES", "").strip() - if raw: - try: - parsed = json.loads(raw) - except json.JSONDecodeError as exc: - return [], [ - f"MOLECULE_WORKSPACES is not valid JSON ({exc.msg} at pos " - f"{exc.pos}). Expected: '[{{\"id\":\"\",\"token\":" - f"\"\"}},{{...}}]'" - ] - if not isinstance(parsed, list) or not parsed: - return [], [ - "MOLECULE_WORKSPACES must be a non-empty JSON array of " - "{\"id\":\"...\",\"token\":\"...\"} objects" - ] - out: list[tuple[str, str]] = [] - seen: set[str] = set() - errors: list[str] = [] - for i, entry in enumerate(parsed): - if not isinstance(entry, dict): - errors.append( - f"MOLECULE_WORKSPACES[{i}] is not an object — got {type(entry).__name__}" - ) - continue - wsid = str(entry.get("id", "")).strip() - tok = str(entry.get("token", "")).strip() - if not wsid or not tok: - errors.append( - f"MOLECULE_WORKSPACES[{i}] missing 'id' or 'token'" - ) - continue - if wsid in seen: - errors.append( - f"MOLECULE_WORKSPACES[{i}] duplicate workspace id {wsid!r}" - ) - continue - seen.add(wsid) - out.append((wsid, tok)) - if errors: - return [], errors - return out, [] - - # Single-workspace back-compat path. - wsid = os.environ.get("WORKSPACE_ID", "").strip() - if not wsid: - return [], ["WORKSPACE_ID (or MOLECULE_WORKSPACES) is required"] - tok = os.environ.get("MOLECULE_WORKSPACE_TOKEN", "").strip() - if not tok: - tok = _read_token_file() - if not tok: - return [], [ - "MOLECULE_WORKSPACE_TOKEN (or CONFIGS_DIR/.auth_token) is required" - ] - return [(wsid, tok)], [] - - -def _print_missing_env_help(missing: list[str], have_token_file: bool) -> None: - print("molecule-mcp: missing required environment.\n", file=sys.stderr) - print("Set the following before running molecule-mcp:", file=sys.stderr) - print(" WORKSPACE_ID — your workspace UUID (from canvas)", file=sys.stderr) - print( - " PLATFORM_URL — base URL of your Molecule platform " - "(e.g. https://your-tenant.staging.moleculesai.app)", - file=sys.stderr, - ) - if not have_token_file: - print( - " MOLECULE_WORKSPACE_TOKEN — bearer token for this workspace " - "(canvas → Tokens tab)", - file=sys.stderr, - ) - print("", file=sys.stderr) - print(f"Currently missing: {', '.join(missing)}", file=sys.stderr) +_start_inbox_pollers = mcp_inbox_pollers.start_inbox_pollers def main() -> None: @@ -558,69 +189,5 @@ def main() -> None: cli_main() -def _start_inbox_pollers(platform_url: str, workspace_ids: list[str]) -> None: - """Activate the inbox singleton + spawn one poller daemon thread per workspace. - - Done lazily here (not at module import) because importing inbox - pulls in platform_auth, which only resolves cleanly AFTER env - validation succeeds. Activation is idempotent within a process, - so a stray double-call (e.g. test harness re-entering main) is - harmless. - - The poller threads are daemon=True — die with the main process. - - Single-workspace path: one poller, single cursor file at the legacy - location (``.mcp_inbox_cursor``). Cursor-key resolution falls back - to the empty string for back-compat with operators whose existing - on-disk cursor was written by the pre-multi-workspace code. - - Multi-workspace path: N pollers, each with its own cursor file - keyed by ``workspace_id[:8]``. Cursors live next to each other in - configs_dir so an operator inspecting state sees all of them - together. - """ - try: - import inbox - except ImportError as exc: - logger.warning("molecule-mcp: inbox module unavailable: %s", exc) - return - - if len(workspace_ids) <= 1: - # Back-compat exact: single-workspace mode reuses the legacy - # cursor filename + cursor_path constructor arg, so an existing - # operator's on-disk state isn't invalidated by upgrade. - wsid = workspace_ids[0] - state = inbox.InboxState(cursor_path=inbox.default_cursor_path()) - inbox.activate(state) - inbox.start_poller_thread(state, platform_url, wsid) - return - - # Multi-workspace: per-workspace cursor file, one shared queue. - cursor_paths = {wsid: inbox.default_cursor_path(wsid) for wsid in workspace_ids} - state = inbox.InboxState(cursor_paths=cursor_paths) - inbox.activate(state) - for wsid in workspace_ids: - inbox.start_poller_thread(state, platform_url, wsid) - - -def _read_token_file() -> str: - """Read the token from the resolved configs dir's ``.auth_token`` if - present. - - Mirrors platform_auth._token_file's location resolution but without - importing the heavy module here (that import triggers a2a_client's - WORKSPACE_ID guard which is fine after env validation, but cheaper - to inline a 4-line file read than pull in the whole stack just for - the path). - """ - path = configs_dir.resolve() / ".auth_token" - if not path.is_file(): - return "" - try: - return path.read_text().strip() - except OSError: - return "" - - if __name__ == "__main__": # pragma: no cover main() diff --git a/workspace/mcp_heartbeat.py b/workspace/mcp_heartbeat.py new file mode 100644 index 00000000..2d27aa29 --- /dev/null +++ b/workspace/mcp_heartbeat.py @@ -0,0 +1,325 @@ +"""Heartbeat + register thread for the standalone ``molecule-mcp`` wrapper. + +Extracted from ``mcp_cli.py`` (RFC #2873 iter 3) so the heartbeat / +register concern lives in its own module. The console-script entry +``mcp_cli:main`` still drives the spawn, but the loop body, auth-failure +escalation, and inbound-secret persistence now live here so they can be +read, tested, and replaced independently of the orchestrator. + +Public surface: + +* ``HEARTBEAT_INTERVAL_SECONDS`` — cadence constant. +* ``build_agent_card(workspace_id)`` — payload helper. +* ``platform_register(platform_url, workspace_id, token)`` — one-shot + POST /registry/register at startup. +* ``start_heartbeat_thread(platform_url, workspace_id, token)`` — spawn + the daemon thread. +""" +from __future__ import annotations + +import logging +import os +import sys +import threading +import time + +logger = logging.getLogger(__name__) + +# Heartbeat cadence. Must be tighter than healthsweep's stale window +# (currently 60-90s — see registry/healthsweep.go) by a comfortable +# margin so a single missed heartbeat doesn't flip awaiting_agent. +# 20s gives the operator's network 3 attempts within the budget; long +# enough that it doesn't spam, short enough to recover quickly after +# laptop sleep. +HEARTBEAT_INTERVAL_SECONDS = 20.0 + +# After this many consecutive 401/403 heartbeats, escalate from +# WARNING to ERROR with re-onboard guidance. 3 ticks at 20s = ~1 minute +# of sustained auth failure — enough to rule out a transient platform +# blip but quick enough that an operator doesn't sit puzzled for 10 +# minutes wondering why their MCP tools 401. Same threshold used for +# repeat-logging at 20-tick (~7 min) intervals so a long-running +# session that missed the first ERROR still sees the message. +HEARTBEAT_AUTH_LOUD_THRESHOLD = 3 +HEARTBEAT_AUTH_RELOG_INTERVAL = 20 + + +def build_agent_card(workspace_id: str) -> dict: + """Build the ``agent_card`` payload sent to /registry/register. + + Three optional env vars override the defaults so an operator can + surface human-readable identity + capabilities to peers and the + canvas Skills tab without code changes: + + * ``MOLECULE_AGENT_NAME`` — display name (defaults to + ``molecule-mcp-{id[:8]}``). Surfaced in canvas workspace cards + and ``list_peers`` output. + * ``MOLECULE_AGENT_DESCRIPTION`` — one-liner about the agent's + purpose. Rendered in canvas Details + Skills tabs. + * ``MOLECULE_AGENT_SKILLS`` — comma-separated skill names + (e.g. ``research,code-review,memory-curation``). Each name is + expanded to a ``{"name": ...}`` skill object — the minimum + shape that satisfies both ``shared_runtime.summarize_peers`` + (uses ``s["name"]``) and the canvas SkillsTab.tsx schema + (id falls back to name when omitted). Empty / whitespace + entries are dropped. + + Defaults match the previous hardcoded behaviour exactly so this + is a strict superset — an operator who sets none of the env vars + sees no change. + """ + name = (os.environ.get("MOLECULE_AGENT_NAME") or "").strip() + if not name: + name = f"molecule-mcp-{workspace_id[:8]}" + + description = (os.environ.get("MOLECULE_AGENT_DESCRIPTION") or "").strip() + + skills_raw = (os.environ.get("MOLECULE_AGENT_SKILLS") or "").strip() + skills: list[dict] = [] + if skills_raw: + for s in skills_raw.split(","): + label = s.strip() + if label: + skills.append({"name": label}) + + card: dict = {"name": name, "skills": skills} + if description: + card["description"] = description + return card + + +def platform_register(platform_url: str, workspace_id: str, token: str) -> None: + """One-shot register at startup; fails fast on auth errors. + + Lifts the workspace from ``awaiting_agent`` to ``online`` for + operators who never ran the curl-register snippet. Safe to call + repeatedly: the platform's register handler is an upsert that + just refreshes ``url``, ``agent_card``, and ``status``. + + Failure model (post-review): + - 401 / 403 → ``sys.exit(3)`` immediately. The operator's + token is wrong; silently looping in a broken state would + make this hard to diagnose because the MCP tools would 401 + on every call too. Hard-fail is the kindest option. + - Other 4xx/5xx → log a warning + continue. The heartbeat + thread will surface persistent failures; transient platform + blips shouldn't abort the MCP loop. + - Network / transport errors → log + continue. Same reasoning. + + Origin header is required by the SaaS edge WAF; without it + /registry/register currently still works (it's on the WAF + allowlist), but the heartbeat path needs Origin and we want one + consistent header set across both calls. + """ + try: + import httpx + except ImportError: + # httpx is a transitive dep via a2a-sdk; if missing, the MCP + # server won't import either. Let the caller's later import + # surface the real error. + return + + payload = { + "id": workspace_id, + "url": "", + "agent_card": build_agent_card(workspace_id), + "delivery_mode": "poll", + } + headers = { + "Authorization": f"Bearer {token}", + "Origin": platform_url, + "Content-Type": "application/json", + } + try: + with httpx.Client(timeout=10.0) as client: + resp = client.post( + f"{platform_url}/registry/register", + json=payload, + headers=headers, + ) + if resp.status_code in (401, 403): + print( + f"molecule-mcp: register rejected with HTTP {resp.status_code} — " + f"the token in MOLECULE_WORKSPACE_TOKEN is invalid for workspace " + f"{workspace_id}. Regenerate from the canvas → Tokens tab.", + file=sys.stderr, + ) + sys.exit(3) + if resp.status_code >= 400: + logger.warning( + "molecule-mcp: register POST returned HTTP %d: %s", + resp.status_code, + (resp.text or "")[:200], + ) + else: + logger.info( + "molecule-mcp: registered workspace %s with platform", + workspace_id, + ) + except SystemExit: + raise + except Exception as exc: # noqa: BLE001 + logger.warning("molecule-mcp: register POST failed: %s", exc) + + +def heartbeat_loop( + platform_url: str, + workspace_id: str, + token: str, + interval: float = HEARTBEAT_INTERVAL_SECONDS, +) -> None: + """Daemon thread body: POST /registry/heartbeat every ``interval``s. + + Failures are logged at WARNING and the loop continues. The thread + exits when the main process does (daemon=True). Each iteration + rebuilds the payload + headers — cheap and ensures token rotation + via env var (rare but possible) is picked up on the next tick. + """ + try: + import httpx + except ImportError: + return + + start_time = time.time() + consecutive_auth_failures = 0 + while True: + body = { + "workspace_id": workspace_id, + "error_rate": 0.0, + "sample_error": "", + "active_tasks": 0, + "uptime_seconds": int(time.time() - start_time), + } + headers = { + "Authorization": f"Bearer {token}", + "Origin": platform_url, + "Content-Type": "application/json", + } + try: + with httpx.Client(timeout=10.0) as client: + resp = client.post( + f"{platform_url}/registry/heartbeat", + json=body, + headers=headers, + ) + if resp.status_code in (401, 403): + consecutive_auth_failures += 1 + log_heartbeat_auth_failure( + consecutive_auth_failures, workspace_id, resp.status_code, + ) + elif resp.status_code >= 400: + # Non-auth HTTP error — log, but DO NOT touch the + # auth-failure counter (5xx blips, 429, etc. are + # transient and unrelated to token validity). + logger.warning( + "molecule-mcp: heartbeat HTTP %d: %s", + resp.status_code, + (resp.text or "")[:200], + ) + else: + consecutive_auth_failures = 0 + persist_inbound_secret_from_heartbeat(resp) + except Exception as exc: # noqa: BLE001 + logger.warning("molecule-mcp: heartbeat failed: %s", exc) + time.sleep(interval) + + +def log_heartbeat_auth_failure(count: int, workspace_id: str, status_code: int) -> None: + """Escalate consecutive heartbeat 401/403s from quiet WARNING to + actionable ERROR. + + The operator's first sign of trouble shouldn't be "tools 401 with no + explanation" — that was the failure mode that motivated this code, + triggered by a workspace being deleted server-side and its tokens + revoked while the runtime kept heartbeating in silence. + + Cadence: + * count < threshold: WARNING per tick (transient — could be a + platform blip, don't shout yet) + * count == threshold: ERROR with re-onboard instructions + (the first signal the operator can't miss) + * count > threshold and (count - threshold) % relog == 0: re-log + ERROR (so a session that started after the first ERROR still + sees the message scrolling past in their logs) + """ + if count < HEARTBEAT_AUTH_LOUD_THRESHOLD: + logger.warning( + "molecule-mcp: heartbeat HTTP %d (auth failure %d/%d) — " + "token may be revoked. Will retry; if persistent, regenerate " + "from canvas → Tokens.", + status_code, count, HEARTBEAT_AUTH_LOUD_THRESHOLD, + ) + return + # At or past the threshold — this is the loud actionable error. + if count == HEARTBEAT_AUTH_LOUD_THRESHOLD or ( + count - HEARTBEAT_AUTH_LOUD_THRESHOLD + ) % HEARTBEAT_AUTH_RELOG_INTERVAL == 0: + logger.error( + "molecule-mcp: %d consecutive heartbeat auth failures (HTTP %d) — " + "the token in MOLECULE_WORKSPACE_TOKEN has been REVOKED, likely " + "because workspace %s was deleted server-side. The MCP server is " + "still running but every platform call will fail. Regenerate the " + "workspace + token from the canvas (Tokens tab), update your MCP " + "config, and restart your runtime.", + count, status_code, workspace_id, + ) + + +def persist_inbound_secret_from_heartbeat(resp: object) -> None: + """Persist ``platform_inbound_secret`` from a heartbeat response, if any. + + The platform's heartbeat handler returns the secret on every beat + (mirroring /registry/register) so a workspace that lazy-healed the + secret on the platform side — typical recovery path for a workspace + whose row had a NULL ``platform_inbound_secret`` after a partial + bootstrap — picks it up within one heartbeat tick instead of + requiring a runtime restart. + + Without this delivery path the chat-upload code path's "secret was + just minted, will pick up on next heartbeat" 503 message is a lie + and the workspace stays 401-forever until the operator restarts + the runtime. Caught 2026-04-30 on hongmingwang tenant. + + Failure is non-fatal: if the body isn't JSON, doesn't carry the + field, or the disk write fails, the next heartbeat retries. This + matches the cold-start register flow in main.py:319-323. + """ + try: + body = resp.json() + except Exception: # noqa: BLE001 + return + if not isinstance(body, dict): + return + secret = body.get("platform_inbound_secret") + if not secret: + return + try: + from platform_inbound_auth import save_inbound_secret + + save_inbound_secret(secret) + except Exception as exc: # noqa: BLE001 + logger.warning( + "molecule-mcp: persist inbound secret from heartbeat failed: %s", exc + ) + + +def start_heartbeat_thread( + platform_url: str, + workspace_id: str, + token: str, +) -> threading.Thread: + """Start the heartbeat daemon thread. Returns the Thread handle. + + The MCP stdio loop runs in the foreground (asyncio); this thread + runs alongside it. ``daemon=True`` so when the operator hits + Ctrl-C / closes the runtime, the heartbeat dies with it instead + of leaking and writing to a stale workspace. + """ + t = threading.Thread( + target=heartbeat_loop, + args=(platform_url, workspace_id, token), + name="molecule-mcp-heartbeat", + daemon=True, + ) + t.start() + return t diff --git a/workspace/mcp_inbox_pollers.py b/workspace/mcp_inbox_pollers.py new file mode 100644 index 00000000..659da5ed --- /dev/null +++ b/workspace/mcp_inbox_pollers.py @@ -0,0 +1,63 @@ +"""Inbox-poller spawn helpers for the standalone ``molecule-mcp`` wrapper. + +Extracted from ``mcp_cli.py`` (RFC #2873 iter 3). The poller is the +INBOUND side of the standalone path — without it, the universal MCP +server is outbound-only (can call ``delegate_task`` / +``send_message_to_user``, never observes canvas-user / peer-agent +messages). + +Public surface: + +* ``start_inbox_pollers(platform_url, workspace_ids)`` — activate the + inbox singleton and spawn one daemon poller per workspace. +""" +from __future__ import annotations + +import logging + +logger = logging.getLogger(__name__) + + +def start_inbox_pollers(platform_url: str, workspace_ids: list[str]) -> None: + """Activate the inbox singleton + spawn one poller daemon thread per workspace. + + Done lazily here (not at module import) because importing inbox + pulls in platform_auth, which only resolves cleanly AFTER env + validation succeeds. Activation is idempotent within a process, + so a stray double-call (e.g. test harness re-entering main) is + harmless. + + The poller threads are daemon=True — die with the main process. + + Single-workspace path: one poller, single cursor file at the legacy + location (``.mcp_inbox_cursor``). Cursor-key resolution falls back + to the empty string for back-compat with operators whose existing + on-disk cursor was written by the pre-multi-workspace code. + + Multi-workspace path: N pollers, each with its own cursor file + keyed by ``workspace_id[:8]``. Cursors live next to each other in + configs_dir so an operator inspecting state sees all of them + together. + """ + try: + import inbox + except ImportError as exc: + logger.warning("molecule-mcp: inbox module unavailable: %s", exc) + return + + if len(workspace_ids) <= 1: + # Back-compat exact: single-workspace mode reuses the legacy + # cursor filename + cursor_path constructor arg, so an existing + # operator's on-disk state isn't invalidated by upgrade. + wsid = workspace_ids[0] + state = inbox.InboxState(cursor_path=inbox.default_cursor_path()) + inbox.activate(state) + inbox.start_poller_thread(state, platform_url, wsid) + return + + # Multi-workspace: per-workspace cursor file, one shared queue. + cursor_paths = {wsid: inbox.default_cursor_path(wsid) for wsid in workspace_ids} + state = inbox.InboxState(cursor_paths=cursor_paths) + inbox.activate(state) + for wsid in workspace_ids: + inbox.start_poller_thread(state, platform_url, wsid) diff --git a/workspace/mcp_workspace_resolver.py b/workspace/mcp_workspace_resolver.py new file mode 100644 index 00000000..a6fe3bff --- /dev/null +++ b/workspace/mcp_workspace_resolver.py @@ -0,0 +1,146 @@ +"""Env validation + workspace resolution for the standalone ``molecule-mcp``. + +Extracted from ``mcp_cli.py`` (RFC #2873 iter 3). Deals with the two +shapes ``molecule-mcp`` accepts: + + * Single-workspace legacy shape: ``WORKSPACE_ID`` + token from + ``MOLECULE_WORKSPACE_TOKEN`` or ``${CONFIGS_DIR}/.auth_token``. + * Multi-workspace JSON shape: ``MOLECULE_WORKSPACES`` env var carries a + JSON array of ``{"id": ..., "token": ...}`` entries. + +Public surface: + +* ``resolve_workspaces()`` → ``(workspaces, errors)``. +* ``read_token_file()`` → token text or ``""``. +* ``print_missing_env_help(missing, have_token_file)`` — operator-help + printer. +""" +from __future__ import annotations + +import json +import os +import sys + +import configs_dir + + +def resolve_workspaces() -> tuple[list[tuple[str, str]], list[str]]: + """Return the list of ``(workspace_id, token)`` pairs to register. + + Resolution order: + + 1. ``MOLECULE_WORKSPACES`` env var — JSON array of + ``{"id": "...", "token": "..."}`` objects. Activates the + multi-workspace external-agent path (one process registered into + N workspaces). When set, ``WORKSPACE_ID`` / ``MOLECULE_WORKSPACE_TOKEN`` + are IGNORED — the JSON is the source of truth. + + 2. Single-workspace fallback — ``WORKSPACE_ID`` env var + token from + ``MOLECULE_WORKSPACE_TOKEN`` or ``${CONFIGS_DIR}/.auth_token``. + This is the pre-existing path; back-compat exact. + + Returns ``(workspaces, errors)``: + * ``workspaces``: list of ``(workspace_id, token)`` — non-empty + on the happy path. + * ``errors``: human-readable strings describing what's missing / + malformed. ``main()`` surfaces these with the same shape as + ``print_missing_env_help`` so the operator's first run gives + actionable output. + + Why JSON env (not file): ergonomic for Claude Code MCP config (one + string in ``mcpServers.molecule.env`` instead of a sidecar file) + and for CI / launchers. A separate config-file path can be added + later without breaking this. + """ + raw = os.environ.get("MOLECULE_WORKSPACES", "").strip() + if raw: + try: + parsed = json.loads(raw) + except json.JSONDecodeError as exc: + return [], [ + f"MOLECULE_WORKSPACES is not valid JSON ({exc.msg} at pos " + f"{exc.pos}). Expected: '[{{\"id\":\"\",\"token\":" + f"\"\"}},{{...}}]'" + ] + if not isinstance(parsed, list) or not parsed: + return [], [ + "MOLECULE_WORKSPACES must be a non-empty JSON array of " + "{\"id\":\"...\",\"token\":\"...\"} objects" + ] + out: list[tuple[str, str]] = [] + seen: set[str] = set() + errors: list[str] = [] + for i, entry in enumerate(parsed): + if not isinstance(entry, dict): + errors.append( + f"MOLECULE_WORKSPACES[{i}] is not an object — got {type(entry).__name__}" + ) + continue + wsid = str(entry.get("id", "")).strip() + tok = str(entry.get("token", "")).strip() + if not wsid or not tok: + errors.append( + f"MOLECULE_WORKSPACES[{i}] missing 'id' or 'token'" + ) + continue + if wsid in seen: + errors.append( + f"MOLECULE_WORKSPACES[{i}] duplicate workspace id {wsid!r}" + ) + continue + seen.add(wsid) + out.append((wsid, tok)) + if errors: + return [], errors + return out, [] + + # Single-workspace back-compat path. + wsid = os.environ.get("WORKSPACE_ID", "").strip() + if not wsid: + return [], ["WORKSPACE_ID (or MOLECULE_WORKSPACES) is required"] + tok = os.environ.get("MOLECULE_WORKSPACE_TOKEN", "").strip() + if not tok: + tok = read_token_file() + if not tok: + return [], [ + "MOLECULE_WORKSPACE_TOKEN (or CONFIGS_DIR/.auth_token) is required" + ] + return [(wsid, tok)], [] + + +def print_missing_env_help(missing: list[str], have_token_file: bool) -> None: + print("molecule-mcp: missing required environment.\n", file=sys.stderr) + print("Set the following before running molecule-mcp:", file=sys.stderr) + print(" WORKSPACE_ID — your workspace UUID (from canvas)", file=sys.stderr) + print( + " PLATFORM_URL — base URL of your Molecule platform " + "(e.g. https://your-tenant.staging.moleculesai.app)", + file=sys.stderr, + ) + if not have_token_file: + print( + " MOLECULE_WORKSPACE_TOKEN — bearer token for this workspace " + "(canvas → Tokens tab)", + file=sys.stderr, + ) + print("", file=sys.stderr) + print(f"Currently missing: {', '.join(missing)}", file=sys.stderr) + + +def read_token_file() -> str: + """Read the token from the resolved configs dir's ``.auth_token`` if + present. + + Mirrors platform_auth._token_file's location resolution but without + importing the heavy module here (that import triggers a2a_client's + WORKSPACE_ID guard which is fine after env validation, but cheaper + to inline a 4-line file read than pull in the whole stack just for + the path). + """ + path = configs_dir.resolve() / ".auth_token" + if not path.is_file(): + return "" + try: + return path.read_text().strip() + except OSError: + return "" diff --git a/workspace/tests/test_inbox.py b/workspace/tests/test_inbox.py index 6731701a..cbba9a3b 100644 --- a/workspace/tests/test_inbox.py +++ b/workspace/tests/test_inbox.py @@ -555,16 +555,34 @@ def test_poll_once_self_notify_does_not_fire_notification(state: inbox.InboxStat def test_start_poller_thread_is_daemon(state: inbox.InboxState): """Daemon flag is required so the poller dies with the parent process; a non-daemon poller would leak across `claude` restarts - and write to a stale workspace.""" + and write to a stale workspace. + + Stop_event is plumbed so the thread cleans up at the end of the + test instead of leaking into later tests. Without cleanup, the + daemon's ~10ms tick races with later tests that patch httpx.Client + — the leaked thread sees their patched response and runs an + unwanted iteration of _poll_once that double-counts mocked calls + (caught when test_batch_fetcher_owns_client_when_not_supplied + surfaced this on Python 3.11 CI but not 3.13 local). + """ resp = _make_response(200, []) p, _ = _patch_httpx(resp) + stop_event = threading.Event() with p, patch("platform_auth.auth_headers", return_value={}): # Use a very short interval so the loop body runs at least once # before we exit the test. - t = inbox.start_poller_thread(state, "http://platform", "ws-1", interval=0.01) + t = inbox.start_poller_thread( + state, "http://platform", "ws-1", interval=0.01, stop_event=stop_event + ) time.sleep(0.05) - assert t.daemon is True - assert t.is_alive() + assert t.daemon is True + assert t.is_alive() + # Signal shutdown + wait for the thread to actually exit before + # we leave the test scope. Without this join, the leaked thread + # races with later tests' httpx patches. + stop_event.set() + t.join(timeout=2.0) + assert not t.is_alive(), "poller thread did not exit on stop_event" # --------------------------------------------------------------------------- @@ -577,6 +595,219 @@ def test_default_cursor_path_uses_configs_dir(monkeypatch, tmp_path: Path): assert inbox.default_cursor_path() == tmp_path / ".mcp_inbox_cursor" +# --------------------------------------------------------------------------- +# Phase 5b — BatchFetcher integration with the poll loop +# --------------------------------------------------------------------------- +# +# These tests pin the cross-module contract between inbox._poll_once and +# inbox_uploads.BatchFetcher: chat_upload_receive rows must be submitted +# to a single BatchFetcher AND drained (URI cache populated) before any +# subsequent message row is processed. Without the drain, the +# rewrite_request_body path inside message_from_activity surfaces the +# un-rewritten ``platform-pending:`` URI to the agent. + + +def _upload_row(act_id: str, file_id: str) -> dict: + return { + "id": act_id, + "source_id": None, + "method": "chat_upload_receive", + "summary": f"chat_upload_receive: {file_id}.pdf", + "request_body": { + "file_id": file_id, + "name": f"{file_id}.pdf", + "uri": f"platform-pending:ws-1/{file_id}", + "mimeType": "application/pdf", + "size": 3, + }, + "created_at": "2026-05-04T10:00:00Z", + } + + +def _message_row_referencing(act_id: str, file_id: str) -> dict: + return { + "id": act_id, + "source_id": None, + "method": "message/send", + "summary": None, + "request_body": { + "params": { + "message": { + "parts": [ + {"kind": "text", "text": "have a look"}, + { + "kind": "file", + "file": { + "uri": f"platform-pending:ws-1/{file_id}", + "name": f"{file_id}.pdf", + }, + }, + ] + } + } + }, + "created_at": "2026-05-04T10:00:01Z", + } + + +def _patch_httpx_routing(activity_rows: list[dict], upload_bytes: bytes = b"PDF"): + """Replace ``httpx.Client`` so: + + - GET /activity returns ``activity_rows`` + - GET /workspaces/.../content returns ``upload_bytes`` with content-type + - POST /ack returns 200 + + Returns the patch context manager; tests use ``with p:``. Each new + Client(...) gets a fresh MagicMock so the test can verify + constructor-count expectations without pinning singletons. + """ + def _client_factory(*args, **kwargs): + c = MagicMock() + c.__enter__ = MagicMock(return_value=c) + c.__exit__ = MagicMock(return_value=False) + + def _get(url, params=None, headers=None): + if "/activity" in url: + resp = MagicMock() + resp.status_code = 200 + resp.json.return_value = activity_rows + resp.text = "" + return resp + if "/pending-uploads/" in url and "/content" in url: + resp = MagicMock() + resp.status_code = 200 + resp.content = upload_bytes + resp.headers = {"content-type": "application/pdf"} + resp.text = "" + return resp + resp = MagicMock() + resp.status_code = 404 + resp.text = "" + return resp + + def _post(url, headers=None): + resp = MagicMock() + resp.status_code = 200 + resp.text = "" + return resp + + c.get = MagicMock(side_effect=_get) + c.post = MagicMock(side_effect=_post) + c.close = MagicMock() + return c + + return patch("httpx.Client", side_effect=_client_factory) + + +def test_poll_once_drains_uploads_before_processing_message_row(state: inbox.InboxState, tmp_path): + """The chat-message row's file.uri MUST be rewritten to the local + workspace: URI by the time it lands in the InboxState queue. This + requires BatchFetcher.wait_all() to run before message_from_activity + on the second row. + """ + import inbox_uploads + inbox_uploads.get_cache().clear() + # Sandbox the on-disk staging dir so the test can't pollute the + # workspace's real chat-uploads. + real_dir = inbox_uploads.CHAT_UPLOAD_DIR + inbox_uploads.CHAT_UPLOAD_DIR = str(tmp_path / "chat-uploads") + try: + rows = [ + _upload_row("act-1", "file-A"), + _message_row_referencing("act-2", "file-A"), + ] + state.save_cursor("act-old") + with _patch_httpx_routing(rows, upload_bytes=b"PDF-bytes"): + n = inbox._poll_once(state, "http://platform", "ws-1", {}) + finally: + inbox_uploads.CHAT_UPLOAD_DIR = real_dir + inbox_uploads.get_cache().clear() + + assert n == 1, "exactly one message row should be enqueued (the upload row is a side-effect, not a message)" + queued = state.peek(10) + assert len(queued) == 1 + # The contract this test exists to pin: the platform-pending: URI + # was rewritten to workspace: BEFORE the message landed in the + # state queue. message_from_activity mutates row['request_body'] + # in-place, so the rewritten URI is observable on the row dict + # we passed in. + rewritten_part = rows[1]["request_body"]["params"]["message"]["parts"][1] + assert rewritten_part["file"]["uri"].startswith("workspace:"), ( + f"upload barrier broken: file.uri = {rewritten_part['file']['uri']!r}; " + "rewrite_request_body ran before BatchFetcher.wait_all populated the cache" + ) + # Cursor advanced past BOTH rows — upload-receive (act-1) is + # acknowledged via the inbox cursor regardless of fetch outcome. + assert state.load_cursor() == "act-2" + + +def test_poll_once_with_only_upload_rows_drains_at_loop_end(state: inbox.InboxState, tmp_path): + """End-of-batch drain: a poll that contains ONLY upload rows (no + chat-message row to trigger the inline drain) must still drain the + BatchFetcher before _poll_once returns. Otherwise a future poll + that picks up the corresponding chat-message row would race with + in-flight fetches from the previous batch. + """ + import inbox_uploads + inbox_uploads.get_cache().clear() + real_dir = inbox_uploads.CHAT_UPLOAD_DIR + inbox_uploads.CHAT_UPLOAD_DIR = str(tmp_path / "chat-uploads") + try: + rows = [_upload_row("act-1", "file-A"), _upload_row("act-2", "file-B")] + state.save_cursor("act-old") + with _patch_httpx_routing(rows, upload_bytes=b"PDF"): + n = inbox._poll_once(state, "http://platform", "ws-1", {}) + # By the time _poll_once returned, the URI cache must be hot + # for both file_ids — proves the end-of-loop drain ran. + assert inbox_uploads.get_cache().get("platform-pending:ws-1/file-A") is not None + assert inbox_uploads.get_cache().get("platform-pending:ws-1/file-B") is not None + finally: + inbox_uploads.CHAT_UPLOAD_DIR = real_dir + inbox_uploads.get_cache().clear() + # Upload rows are NOT message rows; queue stays empty. + assert n == 0 + # Cursor advances past both upload rows. + assert state.load_cursor() == "act-2" + + +def test_poll_once_no_uploads_does_not_construct_batch_fetcher(state: inbox.InboxState): + """A batch with no upload-receive rows must not pay the BatchFetcher + construction cost — the executor + httpx client allocation is + deferred until the first upload row appears. + """ + import inbox_uploads + + constructed: list[Any] = [] + + def _patched_init(self, **kwargs): + constructed.append(kwargs) + # Don't actually run __init__; we never hit submit/wait_all. + self._closed = False + self._futures = [] + self._executor = MagicMock() + self._client = MagicMock() + self._own_client = False + + rows = [ + { + "id": "act-1", + "source_id": None, + "method": "message/send", + "summary": None, + "request_body": {"parts": [{"type": "text", "text": "hi"}]}, + "created_at": "2026-04-30T22:00:00Z", + }, + ] + state.save_cursor("act-old") + resp = _make_response(200, rows) + p, _ = _patch_httpx(resp) + with patch.object(inbox_uploads.BatchFetcher, "__init__", _patched_init), p: + n = inbox._poll_once(state, "http://platform", "ws-1", {}) + + assert n == 1 + assert constructed == [], "BatchFetcher must not be constructed when no upload rows are present" + + def test_default_cursor_path_falls_back_to_default(tmp_path, monkeypatch): """When CONFIGS_DIR is unset, the cursor path resolves through configs_dir.resolve() — /configs in-container, ~/.molecule-workspace @@ -701,3 +932,165 @@ def test_set_notification_callback_none_clears(state: inbox.InboxState): state.record(_msg("act-1")) assert received == [] + + +# --------------------------------------------------------------------------- +# Phase 2 — chat_upload_receive rows route to inbox_uploads.fetch_and_stage +# --------------------------------------------------------------------------- + + +def test_poll_once_skips_chat_upload_row_from_queue(state: inbox.InboxState, monkeypatch, tmp_path): + """A row with method='chat_upload_receive' must NOT enqueue as a + chat message — it's a side-effect telling the workspace to fetch + bytes. Pin the contract so a refactor that flattens the row loop + can't silently re-enqueue these as 'empty A2A message' rows.""" + import inbox_uploads + monkeypatch.setattr(inbox_uploads, "CHAT_UPLOAD_DIR", str(tmp_path / "chat-uploads")) + inbox_uploads.get_cache().clear() + + rows = [ + { + "id": "act-1", + "source_id": None, + "method": "chat_upload_receive", + "summary": "chat_upload_receive: foo.pdf", + "request_body": { + "file_id": "abc123", + "name": "foo.pdf", + "mimeType": "application/pdf", + "size": 4, + "uri": "platform-pending:ws-1/abc123", + }, + "created_at": "2026-05-04T10:00:00Z", + }, + ] + resp = _make_response(200, rows) + p, _ = _patch_httpx(resp) + fetch_called = [] + + def fake_fetch(row, **kwargs): + fetch_called.append((row.get("id"), kwargs["workspace_id"])) + return "workspace:/local/foo.pdf" + + with p, patch.object(inbox_uploads, "fetch_and_stage", fake_fetch): + n = inbox._poll_once(state, "http://platform", "ws-1", {}) + + # Not enqueued + cursor advanced. + assert n == 0 + assert state.peek(10) == [] + assert state.load_cursor() == "act-1" + # fetch_and_stage was invoked with the row and workspace_id. + assert fetch_called == [("act-1", "ws-1")] + + +def test_poll_once_chat_upload_row_then_chat_message_rewrites_uri(state: inbox.InboxState, monkeypatch, tmp_path): + """The classic ordering: upload-receive row first (lower id), chat + message referencing platform-pending: URI second. The chat message + that lands in the inbox must have its URI rewritten to the local + workspace: URI before the agent sees it. + """ + import inbox_uploads + monkeypatch.setattr(inbox_uploads, "CHAT_UPLOAD_DIR", str(tmp_path / "chat-uploads")) + cache = inbox_uploads.get_cache() + cache.clear() + + # Pretend the fetch already populated the cache. (The real flow + # populates it inside fetch_and_stage; we patch that to keep the + # test focused on the rewrite contract.) + cache.set("platform-pending:ws-1/abc123", "workspace:/workspace/.molecule/chat-uploads/xx-foo.pdf") + + rows = [ + { + "id": "act-1", + "source_id": None, + "method": "chat_upload_receive", + "summary": "chat_upload_receive: foo.pdf", + "request_body": { + "file_id": "abc123", + "name": "foo.pdf", + "mimeType": "application/pdf", + "size": 4, + "uri": "platform-pending:ws-1/abc123", + }, + "created_at": "2026-05-04T10:00:00Z", + }, + { + "id": "act-2", + "source_id": None, + "method": "message/send", + "summary": None, + "request_body": { + "params": { + "message": { + "parts": [ + {"kind": "text", "text": "look at this"}, + { + "kind": "file", + "file": { + "uri": "platform-pending:ws-1/abc123", + "name": "foo.pdf", + }, + }, + ] + } + } + }, + "created_at": "2026-05-04T10:00:01Z", + }, + ] + resp = _make_response(200, rows) + p, _ = _patch_httpx(resp) + + def fake_fetch(row, **kwargs): + return "workspace:/workspace/.molecule/chat-uploads/xx-foo.pdf" + + with p, patch.object(inbox_uploads, "fetch_and_stage", fake_fetch): + n = inbox._poll_once(state, "http://platform", "ws-1", {}) + + # Only the chat message is enqueued. + assert n == 1 + queue = state.peek(10) + assert len(queue) == 1 + msg = queue[0] + assert msg.activity_id == "act-2" + # The URI in the row's request_body was mutated by message_from_activity + # → rewrite_request_body. Re-extracting reveals the rewritten value. + rewritten = rows[1]["request_body"]["params"]["message"]["parts"][1]["file"]["uri"] + assert rewritten == "workspace:/workspace/.molecule/chat-uploads/xx-foo.pdf" + + +def test_poll_once_chat_upload_row_advances_cursor_even_on_fetch_failure( + state: inbox.InboxState, monkeypatch, tmp_path +): + """A permanent network failure on /content must NOT stall the cursor + — otherwise one bad upload blocks all real chat traffic for the + workspace. fetch_and_stage returns None on failure, but the row is + still considered handled from the cursor's perspective.""" + import inbox_uploads + monkeypatch.setattr(inbox_uploads, "CHAT_UPLOAD_DIR", str(tmp_path / "chat-uploads")) + + rows = [ + { + "id": "act-broken", + "source_id": None, + "method": "chat_upload_receive", + "summary": "chat_upload_receive: doomed.pdf", + "request_body": { + "file_id": "doom", + "name": "doomed.pdf", + "uri": "platform-pending:ws-1/doom", + }, + "created_at": "2026-05-04T10:00:00Z", + }, + ] + resp = _make_response(200, rows) + p, _ = _patch_httpx(resp) + + def fake_fetch(row, **kwargs): + return None # network failure + + with p, patch.object(inbox_uploads, "fetch_and_stage", fake_fetch): + inbox._poll_once(state, "http://platform", "ws-1", {}) + + assert state.peek(10) == [] + assert state.load_cursor() == "act-broken" diff --git a/workspace/tests/test_inbox_uploads.py b/workspace/tests/test_inbox_uploads.py new file mode 100644 index 00000000..37446760 --- /dev/null +++ b/workspace/tests/test_inbox_uploads.py @@ -0,0 +1,1120 @@ +"""Tests for workspace/inbox_uploads.py — poll-mode chat-upload fetcher. + +Covers the full activity-row → fetch → stage-on-disk → ack flow plus +the URI cache and the rewrite that swaps platform-pending: URIs to +local workspace: URIs in subsequent chat messages. +""" +from __future__ import annotations + +import os +from typing import Any +from unittest.mock import MagicMock, patch + +import pytest + +import inbox_uploads + + +@pytest.fixture(autouse=True) +def _reset_cache_and_dir(tmp_path, monkeypatch): + """Each test starts with an empty URI cache and a temp upload dir + so on-disk artifacts from one test don't leak into the next.""" + inbox_uploads.get_cache().clear() + monkeypatch.setattr(inbox_uploads, "CHAT_UPLOAD_DIR", str(tmp_path / "chat-uploads")) + yield + inbox_uploads.get_cache().clear() + + +# --------------------------------------------------------------------------- +# sanitize_filename — parity with internal_chat_uploads + Go SanitizeFilename +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "raw,want", + [ + ("../../etc/passwd", "passwd"), + ("/etc/passwd", "passwd"), + ("hello world.pdf", "hello_world.pdf"), + ("weird;chars!?.txt", "weird_chars__.txt"), + ("中文.docx", "__.docx"), + ("file (1).pdf", "file__1_.pdf"), + ("report-2026.05.04_v2.pdf", "report-2026.05.04_v2.pdf"), + ("", "file"), + (".", "file"), + ("..", "file"), + ], +) +def test_sanitize_filename_parity_with_python_internal(raw, want): + assert inbox_uploads.sanitize_filename(raw) == want + + +def test_sanitize_filename_caps_at_100_preserves_short_extension(): + long = "a" * 200 + ".pdf" + got = inbox_uploads.sanitize_filename(long) + assert len(got) == 100 + assert got.endswith(".pdf") + + +def test_sanitize_filename_drops_long_extension(): + long = "c" * 90 + ".thisisaverylongextensionnotpreserved" + got = inbox_uploads.sanitize_filename(long) + assert len(got) == 100 + assert ".thisisaverylongextensionnotpreserved" not in got + + +# --------------------------------------------------------------------------- +# _URICache — LRU semantics +# --------------------------------------------------------------------------- + + +def test_uricache_set_get_roundtrip(): + c = inbox_uploads._URICache(max_entries=10) + c.set("platform-pending:ws/1", "workspace:/local/1") + assert c.get("platform-pending:ws/1") == "workspace:/local/1" + + +def test_uricache_get_missing_returns_none(): + c = inbox_uploads._URICache(max_entries=10) + assert c.get("platform-pending:ws/missing") is None + + +def test_uricache_evicts_oldest_at_capacity(): + c = inbox_uploads._URICache(max_entries=2) + c.set("a", "A") + c.set("b", "B") + c.set("c", "C") # evicts "a" + assert c.get("a") is None + assert c.get("b") == "B" + assert c.get("c") == "C" + assert len(c) == 2 + + +def test_uricache_get_promotes_recently_used(): + c = inbox_uploads._URICache(max_entries=2) + c.set("a", "A") + c.set("b", "B") + # Promote "a" by reading; next set should evict "b" instead of "a". + assert c.get("a") == "A" + c.set("c", "C") + assert c.get("a") == "A" + assert c.get("b") is None + assert c.get("c") == "C" + + +def test_uricache_overwrite_updates_value(): + c = inbox_uploads._URICache(max_entries=10) + c.set("k", "v1") + c.set("k", "v2") + assert c.get("k") == "v2" + assert len(c) == 1 + + +def test_uricache_clear(): + c = inbox_uploads._URICache(max_entries=10) + c.set("a", "A") + c.set("b", "B") + c.clear() + assert c.get("a") is None + assert len(c) == 0 + + +def test_resolve_pending_uri_uses_module_cache(): + inbox_uploads.get_cache().set("platform-pending:ws/x", "workspace:/local/x") + assert inbox_uploads.resolve_pending_uri("platform-pending:ws/x") == "workspace:/local/x" + assert inbox_uploads.resolve_pending_uri("platform-pending:ws/missing") is None + + +# --------------------------------------------------------------------------- +# stage_to_disk +# --------------------------------------------------------------------------- + + +def test_stage_to_disk_writes_file_and_returns_workspace_uri(tmp_path): + uri = inbox_uploads.stage_to_disk(b"hello", "report.pdf") + assert uri.startswith("workspace:") + path = uri[len("workspace:"):] + assert os.path.isfile(path) + with open(path, "rb") as f: + assert f.read() == b"hello" + assert path.endswith("-report.pdf") + # Prefix is 32 hex chars + "-" + name. + name = os.path.basename(path) + prefix, _, _ = name.partition("-") + assert len(prefix) == 32 + + +def test_stage_to_disk_sanitizes_filename(): + uri = inbox_uploads.stage_to_disk(b"x", "../../evil.txt") + name = os.path.basename(uri) + assert "/" not in name + assert name.endswith("-evil.txt") + + +def test_stage_to_disk_rejects_oversize(): + with pytest.raises(ValueError): + inbox_uploads.stage_to_disk(b"x" * (inbox_uploads.MAX_FILE_BYTES + 1), "big.bin") + + +def test_stage_to_disk_creates_directory_if_missing(): + # CHAT_UPLOAD_DIR is monkeypatched to a non-existent tmp path; the + # call must mkdir -p it on first write. + assert not os.path.exists(inbox_uploads.CHAT_UPLOAD_DIR) + inbox_uploads.stage_to_disk(b"x", "a.txt") + assert os.path.isdir(inbox_uploads.CHAT_UPLOAD_DIR) + + +def test_stage_to_disk_write_failure_cleans_partial_file(tmp_path, monkeypatch): + # open() succeeds but write() fails — the partial file must be + # removed so a retry can claim a fresh prefix without colliding. + real_fdopen = os.fdopen + written_paths: list[str] = [] + + def boom_fdopen(fd, mode): + # Wrap the real file with one whose write() raises. + f = real_fdopen(fd, mode) + # Track which path's fd we opened by inspecting the chat-upload dir. + for entry in os.listdir(inbox_uploads.CHAT_UPLOAD_DIR): + written_paths.append(os.path.join(inbox_uploads.CHAT_UPLOAD_DIR, entry)) + original_write = f.write + + def bad_write(b): + original_write(b"") # ensure file exists + raise OSError(28, "no space") + f.write = bad_write + return f + + monkeypatch.setattr(os, "fdopen", boom_fdopen) + with pytest.raises(OSError): + inbox_uploads.stage_to_disk(b"data", "x.txt") + # All staged files cleaned up. + for p in written_paths: + assert not os.path.exists(p) + + +def test_stage_to_disk_write_failure_unlink_failure_swallowed(monkeypatch): + # open() succeeds, write() fails, unlink() ALSO fails — the unlink + # error is swallowed and the original write error propagates. + real_fdopen = os.fdopen + + def boom_fdopen(fd, mode): + f = real_fdopen(fd, mode) + + def bad_write(_): + raise OSError(28, "no space") + f.write = bad_write + return f + + def bad_unlink(_): + raise OSError(13, "permission denied") + + monkeypatch.setattr(os, "fdopen", boom_fdopen) + monkeypatch.setattr(os, "unlink", bad_unlink) + with pytest.raises(OSError) as ei: + inbox_uploads.stage_to_disk(b"data", "x.txt") + # Original write error, not the unlink error. + assert ei.value.errno == 28 + + +def test_stage_to_disk_propagates_oserror_and_cleans_partial(tmp_path, monkeypatch): + # Make the dir read-only AFTER mkdir succeeds, so open() fails. Skip + # this on platforms where the dir's permissions don't restrict the + # process owner (root in Docker, etc.). + inbox_uploads.stage_to_disk(b"first", "a.txt") + if os.geteuid() == 0: + pytest.skip("root bypasses permission bits") + os.chmod(inbox_uploads.CHAT_UPLOAD_DIR, 0o500) + try: + with pytest.raises(OSError): + inbox_uploads.stage_to_disk(b"second", "b.txt") + finally: + os.chmod(inbox_uploads.CHAT_UPLOAD_DIR, 0o755) + + +# --------------------------------------------------------------------------- +# is_chat_upload_row + _request_body_dict +# --------------------------------------------------------------------------- + + +def test_is_chat_upload_row_true_on_method_match(): + assert inbox_uploads.is_chat_upload_row({"method": "chat_upload_receive"}) + + +def test_is_chat_upload_row_false_on_other_methods(): + assert not inbox_uploads.is_chat_upload_row({"method": "message/send"}) + assert not inbox_uploads.is_chat_upload_row({"method": None}) + assert not inbox_uploads.is_chat_upload_row({}) + + +def test_request_body_dict_passthrough(): + body = {"file_id": "x"} + assert inbox_uploads._request_body_dict({"request_body": body}) is body + + +def test_request_body_dict_string_decoded(): + assert inbox_uploads._request_body_dict({"request_body": '{"a": 1}'}) == {"a": 1} + + +def test_request_body_dict_invalid_string_returns_none(): + assert inbox_uploads._request_body_dict({"request_body": "not json"}) is None + + +def test_request_body_dict_non_dict_after_decode_returns_none(): + assert inbox_uploads._request_body_dict({"request_body": "[1, 2]"}) is None + + +def test_request_body_dict_other_type_returns_none(): + assert inbox_uploads._request_body_dict({"request_body": 123}) is None + + +# --------------------------------------------------------------------------- +# fetch_and_stage — the full GET / write / ack flow +# --------------------------------------------------------------------------- + + +def _make_resp(status_code: int, content: bytes = b"", content_type: str = "", text: str = "") -> MagicMock: + resp = MagicMock() + resp.status_code = status_code + resp.content = content + headers: dict[str, str] = {} + if content_type: + headers["content-type"] = content_type + resp.headers = headers + resp.text = text + return resp + + +def _patch_httpx_for_fetch(get_resp: MagicMock, ack_resp: MagicMock | None = None): + """Patch httpx.Client so each new context-manager returns a client + whose .get() returns get_resp and .post() returns ack_resp. + """ + client = MagicMock() + client.__enter__ = MagicMock(return_value=client) + client.__exit__ = MagicMock(return_value=False) + client.get = MagicMock(return_value=get_resp) + client.post = MagicMock(return_value=ack_resp or _make_resp(200)) + return patch("httpx.Client", return_value=client), client + + +def _row(file_id: str = "file-1", uri: str | None = None, name: str = "report.pdf", body_extra: dict | None = None) -> dict: + body: dict[str, Any] = { + "file_id": file_id, + "name": name, + "mimeType": "application/pdf", + "size": 9, + } + if uri is not None: + body["uri"] = uri + if body_extra: + body.update(body_extra) + return { + "id": "act-100", + "source_id": None, + "method": "chat_upload_receive", + "summary": "chat_upload_receive: report.pdf", + "request_body": body, + "created_at": "2026-05-04T10:00:00Z", + } + + +def test_fetch_and_stage_happy_path_writes_file_acks_and_caches(): + pending_uri = "platform-pending:ws-1/file-1" + row = _row(uri=pending_uri) + get_resp = _make_resp(200, content=b"PDF-bytes", content_type="application/pdf") + p, client = _patch_httpx_for_fetch(get_resp) + with p: + local_uri = inbox_uploads.fetch_and_stage( + row, platform_url="http://plat", workspace_id="ws-1", headers={"Authorization": "Bearer t"} + ) + assert local_uri is not None + assert local_uri.startswith("workspace:") + # On-disk file content matches. + path = local_uri[len("workspace:"):] + with open(path, "rb") as f: + assert f.read() == b"PDF-bytes" + # Cache populated. + assert inbox_uploads.get_cache().get(pending_uri) == local_uri + # Ack POSTed to the right URL. + client.post.assert_called_once() + args, kwargs = client.post.call_args + assert "/pending-uploads/file-1/ack" in args[0] + assert kwargs["headers"]["Authorization"] == "Bearer t" + + +def test_fetch_and_stage_reconstructs_uri_when_missing_in_body(): + row = _row(uri=None) # request_body has no 'uri' + get_resp = _make_resp(200, content=b"x", content_type="text/plain") + p, _ = _patch_httpx_for_fetch(get_resp) + with p: + inbox_uploads.fetch_and_stage( + row, platform_url="http://plat", workspace_id="ws-1", headers={} + ) + # Cache key reconstructed from workspace_id + file_id. + assert inbox_uploads.get_cache().get("platform-pending:ws-1/file-1") is not None + + +def test_fetch_and_stage_returns_none_on_missing_request_body(): + row = {"id": "act-100", "method": "chat_upload_receive"} + # No httpx call should happen, but we patch defensively. + p, client = _patch_httpx_for_fetch(_make_resp(200)) + with p: + result = inbox_uploads.fetch_and_stage( + row, platform_url="http://plat", workspace_id="ws-1", headers={} + ) + assert result is None + client.get.assert_not_called() + + +def test_fetch_and_stage_returns_none_on_missing_file_id(): + row = {"id": "act-100", "method": "chat_upload_receive", "request_body": {"name": "x.pdf"}} + p, client = _patch_httpx_for_fetch(_make_resp(200)) + with p: + result = inbox_uploads.fetch_and_stage( + row, platform_url="http://plat", workspace_id="ws-1", headers={} + ) + assert result is None + client.get.assert_not_called() + + +def test_fetch_and_stage_handles_nonstring_file_id(): + row = {"id": "act-100", "method": "chat_upload_receive", "request_body": {"file_id": 123}} + p, client = _patch_httpx_for_fetch(_make_resp(200)) + with p: + result = inbox_uploads.fetch_and_stage( + row, platform_url="http://plat", workspace_id="ws-1", headers={} + ) + assert result is None + client.get.assert_not_called() + + +def test_fetch_and_stage_404_returns_none_no_ack(): + row = _row() + get_resp = _make_resp(404, text="gone") + ack_resp = _make_resp(200) + p, client = _patch_httpx_for_fetch(get_resp, ack_resp) + with p: + result = inbox_uploads.fetch_and_stage( + row, platform_url="http://plat", workspace_id="ws-1", headers={} + ) + assert result is None + # No ack — the row is already gone. + client.post.assert_not_called() + + +def test_fetch_and_stage_500_returns_none_no_ack(): + row = _row() + p, client = _patch_httpx_for_fetch(_make_resp(500, text="boom")) + with p: + result = inbox_uploads.fetch_and_stage( + row, platform_url="http://plat", workspace_id="ws-1", headers={} + ) + assert result is None + client.post.assert_not_called() + + +def test_fetch_and_stage_network_error_returns_none(): + row = _row() + client = MagicMock() + client.__enter__ = MagicMock(return_value=client) + client.__exit__ = MagicMock(return_value=False) + client.get = MagicMock(side_effect=RuntimeError("connection refused")) + with patch("httpx.Client", return_value=client): + result = inbox_uploads.fetch_and_stage( + row, platform_url="http://plat", workspace_id="ws-1", headers={} + ) + assert result is None + + +def test_fetch_and_stage_oversize_response_refused(): + row = _row() + big = b"x" * (inbox_uploads.MAX_FILE_BYTES + 1) + p, client = _patch_httpx_for_fetch(_make_resp(200, content=big, content_type="application/octet-stream")) + with p: + result = inbox_uploads.fetch_and_stage( + row, platform_url="http://plat", workspace_id="ws-1", headers={} + ) + assert result is None + client.post.assert_not_called() + + +def test_fetch_and_stage_ack_failure_does_not_invalidate_local_uri(): + row = _row(uri="platform-pending:ws-1/file-1") + get_resp = _make_resp(200, content=b"data", content_type="text/plain") + ack_resp = _make_resp(500, text="ack failed") + p, _ = _patch_httpx_for_fetch(get_resp, ack_resp) + with p: + local_uri = inbox_uploads.fetch_and_stage( + row, platform_url="http://plat", workspace_id="ws-1", headers={} + ) + # On-disk staging succeeded; ack failure is logged but doesn't + # roll back the cache. + assert local_uri is not None + assert inbox_uploads.get_cache().get("platform-pending:ws-1/file-1") == local_uri + + +def test_fetch_and_stage_ack_network_error_swallowed(): + row = _row(uri="platform-pending:ws-1/file-1") + client = MagicMock() + client.__enter__ = MagicMock(return_value=client) + client.__exit__ = MagicMock(return_value=False) + client.get = MagicMock(return_value=_make_resp(200, content=b"data", content_type="text/plain")) + client.post = MagicMock(side_effect=RuntimeError("ack network error")) + with patch("httpx.Client", return_value=client): + result = inbox_uploads.fetch_and_stage( + row, platform_url="http://plat", workspace_id="ws-1", headers={} + ) + assert result is not None # GET succeeded → URI returned even if ack blew up + + +def test_fetch_and_stage_uses_response_content_type_when_present(): + row = _row(name="thing.bin", body_extra={"mimeType": "application/x-bogus"}) + # Response says image/png; should win over body's mimeType. + get_resp = _make_resp(200, content=b"PNG", content_type="image/png; charset=binary") + p, _ = _patch_httpx_for_fetch(get_resp) + with p: + # We don't assert on returned mime (not part of the contract); + # the test just verifies the happy path runs without trying to + # parse the trailing parameter. + result = inbox_uploads.fetch_and_stage( + row, platform_url="http://plat", workspace_id="ws-1", headers={} + ) + assert result is not None + + +def test_fetch_and_stage_nonstring_filename_falls_back_to_file(): + # body['name'] is a non-string (e.g. truncated to None or a number); + # filename must default to "file" so sanitize_filename has something + # to work with. + row = _row(body_extra={"name": 12345}) + p, _ = _patch_httpx_for_fetch(_make_resp(200, content=b"x", content_type="text/plain")) + with p: + local_uri = inbox_uploads.fetch_and_stage( + row, platform_url="http://plat", workspace_id="ws-1", headers={} + ) + assert local_uri is not None + assert local_uri.endswith("-file") + + +def test_fetch_and_stage_default_filename_when_missing(): + row = { + "id": "act", + "method": "chat_upload_receive", + "request_body": {"file_id": "file-1"}, + } + p, _ = _patch_httpx_for_fetch(_make_resp(200, content=b"data", content_type="text/plain")) + with p: + local_uri = inbox_uploads.fetch_and_stage( + row, platform_url="http://plat", workspace_id="ws-1", headers={} + ) + assert local_uri is not None + assert local_uri.endswith("-file") # default filename + + +def test_fetch_and_stage_disk_write_failure_returns_none(monkeypatch): + row = _row() + p, client = _patch_httpx_for_fetch(_make_resp(200, content=b"x", content_type="text/plain")) + + def bad_stage(*args, **kwargs): + raise OSError(28, "no space left") + monkeypatch.setattr(inbox_uploads, "stage_to_disk", bad_stage) + + with p: + result = inbox_uploads.fetch_and_stage( + row, platform_url="http://plat", workspace_id="ws-1", headers={} + ) + assert result is None + client.post.assert_not_called() + + +def test_fetch_and_stage_disk_value_error_returns_none(monkeypatch): + row = _row() + p, client = _patch_httpx_for_fetch(_make_resp(200, content=b"x", content_type="text/plain")) + + def bad_stage(*args, **kwargs): + raise ValueError("oversize after sanity check") + monkeypatch.setattr(inbox_uploads, "stage_to_disk", bad_stage) + + with p: + result = inbox_uploads.fetch_and_stage( + row, platform_url="http://plat", workspace_id="ws-1", headers={} + ) + assert result is None + client.post.assert_not_called() + + +def test_fetch_and_stage_httpx_missing_returns_none(monkeypatch): + row = _row() + # Simulate httpx not installed by making the import fail. + import sys + real_httpx = sys.modules.pop("httpx", None) + monkeypatch.setitem(sys.modules, "httpx", None) + try: + result = inbox_uploads.fetch_and_stage( + row, platform_url="http://plat", workspace_id="ws-1", headers={} + ) + finally: + if real_httpx is not None: + sys.modules["httpx"] = real_httpx + else: + sys.modules.pop("httpx", None) + assert result is None + + +def test_fetch_and_stage_falls_back_to_extension_mime(monkeypatch): + row = _row(name="snap.png", body_extra={"mimeType": ""}) # no mimeType in body + # Response also has no content-type so it falls through to mimetypes.guess_type. + get_resp = _make_resp(200, content=b"PNG", content_type="") + p, _ = _patch_httpx_for_fetch(get_resp) + with p: + result = inbox_uploads.fetch_and_stage( + row, platform_url="http://plat", workspace_id="ws-1", headers={} + ) + assert result is not None + + +# --------------------------------------------------------------------------- +# rewrite_request_body — URI swap in chat-message bodies +# --------------------------------------------------------------------------- + + +def test_rewrite_request_body_swaps_pending_uri_in_message_parts(): + inbox_uploads.get_cache().set("platform-pending:ws/1", "workspace:/local/1") + body = { + "method": "message/send", + "params": { + "message": { + "parts": [ + {"kind": "text", "text": "see this"}, + {"kind": "file", "file": {"uri": "platform-pending:ws/1", "name": "a.pdf"}}, + ] + } + }, + } + inbox_uploads.rewrite_request_body(body) + assert body["params"]["message"]["parts"][1]["file"]["uri"] == "workspace:/local/1" + + +def test_rewrite_request_body_swaps_in_params_parts(): + inbox_uploads.get_cache().set("platform-pending:ws/2", "workspace:/local/2") + body = { + "params": { + "parts": [ + {"kind": "file", "file": {"uri": "platform-pending:ws/2"}}, + ] + } + } + inbox_uploads.rewrite_request_body(body) + assert body["params"]["parts"][0]["file"]["uri"] == "workspace:/local/2" + + +def test_rewrite_request_body_swaps_in_top_level_parts(): + inbox_uploads.get_cache().set("platform-pending:ws/3", "workspace:/local/3") + body = { + "parts": [{"kind": "file", "file": {"uri": "platform-pending:ws/3"}}] + } + inbox_uploads.rewrite_request_body(body) + assert body["parts"][0]["file"]["uri"] == "workspace:/local/3" + + +def test_rewrite_request_body_leaves_unmatched_uri_unchanged(): + # No cache entry → URI stays as-is. Agent surfaces the unresolvable + # URI rather than the inbox silently dropping the part. + body = { + "parts": [{"kind": "file", "file": {"uri": "platform-pending:ws/missing"}}] + } + inbox_uploads.rewrite_request_body(body) + assert body["parts"][0]["file"]["uri"] == "platform-pending:ws/missing" + + +def test_rewrite_request_body_leaves_non_pending_uri_unchanged(): + inbox_uploads.get_cache().set("platform-pending:ws/3", "workspace:/local/3") + body = { + "parts": [ + {"kind": "file", "file": {"uri": "workspace:/already-local.pdf"}}, + {"kind": "file", "file": {"uri": "https://example.com/x.pdf"}}, + ] + } + inbox_uploads.rewrite_request_body(body) + assert body["parts"][0]["file"]["uri"] == "workspace:/already-local.pdf" + assert body["parts"][1]["file"]["uri"] == "https://example.com/x.pdf" + + +def test_rewrite_request_body_skips_non_dict_parts(): + body = {"parts": ["not a dict", 42, None]} + inbox_uploads.rewrite_request_body(body) # must not raise + assert body["parts"] == ["not a dict", 42, None] + + +def test_rewrite_request_body_skips_text_parts(): + body = { + "parts": [{"kind": "text", "text": "platform-pending:ws/should-not-rewrite"}] + } + inbox_uploads.rewrite_request_body(body) + # Text content not touched — only file.uri fields are URIs. + assert body["parts"][0]["text"] == "platform-pending:ws/should-not-rewrite" + + +def test_rewrite_request_body_skips_part_without_file_dict(): + body = {"parts": [{"kind": "file"}]} # no file key + inbox_uploads.rewrite_request_body(body) + assert body["parts"] == [{"kind": "file"}] + + +def test_rewrite_request_body_skips_file_without_uri(): + body = {"parts": [{"kind": "file", "file": {"name": "x.pdf"}}]} + inbox_uploads.rewrite_request_body(body) + assert body["parts"][0]["file"] == {"name": "x.pdf"} + + +def test_rewrite_request_body_skips_nonstring_uri(): + body = {"parts": [{"kind": "file", "file": {"uri": None}}]} + inbox_uploads.rewrite_request_body(body) # must not raise + + +def test_rewrite_request_body_handles_non_dict_body(): + inbox_uploads.rewrite_request_body(None) # no-op + inbox_uploads.rewrite_request_body("string body") # no-op + inbox_uploads.rewrite_request_body([1, 2, 3]) # no-op + + +def test_rewrite_request_body_handles_non_dict_params(): + body = {"params": "not a dict", "parts": []} + inbox_uploads.rewrite_request_body(body) # must not raise + + +def test_rewrite_request_body_handles_non_dict_message(): + body = {"params": {"message": "not a dict"}} + inbox_uploads.rewrite_request_body(body) # must not raise + + +def test_rewrite_request_body_handles_non_list_parts(): + body = {"parts": "not a list"} + inbox_uploads.rewrite_request_body(body) # must not raise + + +def test_rewrite_request_body_handles_non_dict_file(): + body = {"parts": [{"kind": "file", "file": "not a dict"}]} + inbox_uploads.rewrite_request_body(body) # must not raise + + +# --------------------------------------------------------------------------- +# fetch_and_stage with shared client — Phase 5b client-reuse contract +# --------------------------------------------------------------------------- +# +# When a caller passes ``client=`` to fetch_and_stage, that client must be +# used for BOTH the GET /content and the POST /ack — no fresh +# ``httpx.Client(...)`` constructions should happen. The pre-Phase-5b +# implementation made one new client for GET and another for ack; the new +# shape lets BatchFetcher share one connection pool across an entire batch. + + +def test_fetch_and_stage_with_supplied_client_does_not_construct_new_client(monkeypatch): + row = _row(uri="platform-pending:ws-1/file-1") + get_resp = _make_resp(200, content=b"PDF", content_type="application/pdf") + ack_resp = _make_resp(200) + supplied = MagicMock() + supplied.get = MagicMock(return_value=get_resp) + supplied.post = MagicMock(return_value=ack_resp) + # Sentinel: any code path that constructs httpx.Client when one was + # already supplied is a regression — count constructions. + constructed: list[Any] = [] + + class _ShouldNotBeCalled: + def __init__(self, *a, **kw): + constructed.append((a, kw)) + + monkeypatch.setattr("httpx.Client", _ShouldNotBeCalled) + + local_uri = inbox_uploads.fetch_and_stage( + row, + platform_url="http://plat", + workspace_id="ws-1", + headers={"Authorization": "Bearer t"}, + client=supplied, + ) + assert local_uri is not None + assert constructed == [], "supplied client must be reused; no new Client should be constructed" + # GET + POST ack both went through the supplied client. + supplied.get.assert_called_once() + supplied.post.assert_called_once() + # Caller-owned client must NOT be closed by fetch_and_stage; the + # batch fetcher (or test) closes it once the whole batch is done. + supplied.close.assert_not_called() + + +def test_fetch_and_stage_without_supplied_client_constructs_and_closes_one(monkeypatch): + row = _row(uri="platform-pending:ws-1/file-1") + get_resp = _make_resp(200, content=b"PDF", content_type="application/pdf") + ack_resp = _make_resp(200) + built: list[MagicMock] = [] + + def _factory(*args, **kwargs): + c = MagicMock() + c.get = MagicMock(return_value=get_resp) + c.post = MagicMock(return_value=ack_resp) + built.append(c) + return c + + monkeypatch.setattr("httpx.Client", _factory) + + local_uri = inbox_uploads.fetch_and_stage( + row, platform_url="http://plat", workspace_id="ws-1", headers={} + ) + assert local_uri is not None + # Pre-Phase-5b built TWO clients (one for GET, one for ack); now exactly one. + assert len(built) == 1, f"expected 1 httpx.Client construction, got {len(built)}" + # Same client must serve BOTH calls. + built[0].get.assert_called_once() + built[0].post.assert_called_once() + # Owned client must be closed by fetch_and_stage on the way out. + built[0].close.assert_called_once() + + +def test_fetch_and_stage_with_supplied_client_does_not_close_caller_client(): + # Even on failure the supplied client must not be closed — the + # BatchFetcher owns the lifecycle for the whole batch. + row = _row(uri="platform-pending:ws-1/file-1") + supplied = MagicMock() + supplied.get = MagicMock(side_effect=RuntimeError("network down")) + supplied.post = MagicMock() # should not be reached on GET failure + inbox_uploads.fetch_and_stage( + row, + platform_url="http://plat", + workspace_id="ws-1", + headers={}, + client=supplied, + ) + supplied.close.assert_not_called() + supplied.post.assert_not_called() + + +# --------------------------------------------------------------------------- +# BatchFetcher — concurrent fetch + URI cache barrier +# --------------------------------------------------------------------------- + + +def _row_with_id(act_id: str, file_id: str) -> dict: + """Helper: an upload-receive row with a distinct activity id + file id.""" + return { + "id": act_id, + "method": "chat_upload_receive", + "request_body": { + "file_id": file_id, + "name": f"{file_id}.pdf", + "uri": f"platform-pending:ws-1/{file_id}", + "mimeType": "application/pdf", + "size": 1, + }, + } + + +def _stub_client_for_batch(get_responses: dict[str, MagicMock]) -> MagicMock: + """Build one MagicMock client that returns per-file_id responses + based on the file_id segment of the URL. + """ + client = MagicMock() + + def _get(url: str, headers: dict[str, str] | None = None) -> MagicMock: + for fid, resp in get_responses.items(): + if f"/pending-uploads/{fid}/content" in url: + return resp + return _make_resp(404) + + def _post(url: str, headers: dict[str, str] | None = None) -> MagicMock: + return _make_resp(200) + + client.get = MagicMock(side_effect=_get) + client.post = MagicMock(side_effect=_post) + return client + + +def test_batch_fetcher_runs_submitted_rows_concurrently(): + # Three rows whose .get() blocks for ~120ms each. With 4 workers the + # batch should complete in ~120ms (parallel), not ~360ms (serial). + # The 250ms ceiling accommodates CI scheduler jitter while still + # discriminating concurrent (~120ms) from serial (~360ms). + import time + + barrier_start = [0.0] + + def _slow_get(url: str, headers: dict[str, str] | None = None) -> MagicMock: + time.sleep(0.12) + for fid in ("a", "b", "c"): + if f"/pending-uploads/{fid}/content" in url: + return _make_resp(200, content=b"X", content_type="text/plain") + return _make_resp(404) + + client = MagicMock() + client.get = MagicMock(side_effect=_slow_get) + client.post = MagicMock(return_value=_make_resp(200)) + + bf = inbox_uploads.BatchFetcher( + platform_url="http://plat", + workspace_id="ws-1", + headers={}, + client=client, + max_workers=4, + ) + barrier_start[0] = time.time() + for fid in ("a", "b", "c"): + bf.submit(_row_with_id(f"act-{fid}", fid)) + bf.wait_all() + elapsed = time.time() - barrier_start[0] + bf.close() + + assert elapsed < 0.25, ( + f"3 rows × 120ms with 4 workers should finish in <250ms; got {elapsed:.3f}s " + "(suggests serial execution — Phase 5b regression)" + ) + assert client.get.call_count == 3 + assert client.post.call_count == 3 + + +def test_batch_fetcher_wait_all_blocks_until_uri_cache_populated(): + """Pin the correctness invariant: when wait_all returns, the URI + cache is hot for every submitted row. Without this barrier the + inbox loop would process the chat-message row before its uploads + were staged, and rewrite_request_body would surface the un-rewritten + platform-pending: URI to the agent. + """ + import time + + def _slow_get(url: str, headers: dict[str, str] | None = None) -> MagicMock: + time.sleep(0.05) + return _make_resp(200, content=b"data", content_type="text/plain") + + client = MagicMock() + client.get = MagicMock(side_effect=_slow_get) + client.post = MagicMock(return_value=_make_resp(200)) + + inbox_uploads.get_cache().clear() + with inbox_uploads.BatchFetcher( + platform_url="http://plat", workspace_id="ws-1", headers={}, client=client + ) as bf: + bf.submit(_row_with_id("act-a", "a")) + bf.submit(_row_with_id("act-b", "b")) + bf.wait_all() + # Cache must be hot for BOTH rows by the time wait_all returns. + assert inbox_uploads.get_cache().get("platform-pending:ws-1/a") is not None + assert inbox_uploads.get_cache().get("platform-pending:ws-1/b") is not None + + +def test_batch_fetcher_isolates_per_row_failure(): + """One failing fetch must not abort siblings. Sibling rows complete, + URI cache populates for them; the bad row's cache entry stays absent. + """ + def _get(url: str, headers: dict[str, str] | None = None) -> MagicMock: + if "/pending-uploads/bad/content" in url: + return _make_resp(500, text="upstream broken") + return _make_resp(200, content=b"ok", content_type="text/plain") + + client = MagicMock() + client.get = MagicMock(side_effect=_get) + client.post = MagicMock(return_value=_make_resp(200)) + + inbox_uploads.get_cache().clear() + with inbox_uploads.BatchFetcher( + platform_url="http://plat", workspace_id="ws-1", headers={}, client=client + ) as bf: + bf.submit(_row_with_id("act-1", "good1")) + bf.submit(_row_with_id("act-2", "bad")) + bf.submit(_row_with_id("act-3", "good2")) + bf.wait_all() + + cache = inbox_uploads.get_cache() + assert cache.get("platform-pending:ws-1/good1") is not None + assert cache.get("platform-pending:ws-1/good2") is not None + assert cache.get("platform-pending:ws-1/bad") is None + + +def test_batch_fetcher_reuses_one_client_across_all_submits(): + """Every row in the batch must share the same client instance. This + is the connection-pool-reuse leg of the perf win: a second fetch + to the same host reuses the TCP+TLS handshake from the first. + """ + client = MagicMock() + client.get = MagicMock(return_value=_make_resp(200, content=b"x", content_type="text/plain")) + client.post = MagicMock(return_value=_make_resp(200)) + + with inbox_uploads.BatchFetcher( + platform_url="http://plat", workspace_id="ws-1", headers={}, client=client + ) as bf: + for fid in ("a", "b", "c"): + bf.submit(_row_with_id(f"act-{fid}", fid)) + bf.wait_all() + + # 3 GETs + 3 POST acks all on the same client — no per-row Client + # construction. + assert client.get.call_count == 3 + assert client.post.call_count == 3 + + +def test_batch_fetcher_close_idempotent(): + client = MagicMock() + bf = inbox_uploads.BatchFetcher( + platform_url="http://plat", workspace_id="ws-1", headers={}, client=client + ) + bf.close() + bf.close() # second call must not raise + + +def test_batch_fetcher_submit_after_close_raises(): + client = MagicMock() + bf = inbox_uploads.BatchFetcher( + platform_url="http://plat", workspace_id="ws-1", headers={}, client=client + ) + bf.close() + with pytest.raises(RuntimeError, match="submit after close"): + bf.submit(_row_with_id("act-x", "x")) + + +def test_batch_fetcher_owns_client_when_not_supplied(monkeypatch): + built: list[MagicMock] = [] + + def _factory(*args, **kwargs): + c = MagicMock() + c.get = MagicMock(return_value=_make_resp(200, content=b"x", content_type="text/plain")) + c.post = MagicMock(return_value=_make_resp(200)) + built.append(c) + return c + + monkeypatch.setattr("httpx.Client", _factory) + + bf = inbox_uploads.BatchFetcher( + platform_url="http://plat", workspace_id="ws-1", headers={} + ) + bf.submit(_row_with_id("act-a", "a")) + bf.wait_all() + bf.close() + + assert len(built) == 1, "expected one owned client per BatchFetcher" + built[0].close.assert_called_once() + + +def test_batch_fetcher_does_not_close_supplied_client(): + client = MagicMock() + client.get = MagicMock(return_value=_make_resp(200, content=b"x", content_type="text/plain")) + client.post = MagicMock(return_value=_make_resp(200)) + with inbox_uploads.BatchFetcher( + platform_url="http://plat", workspace_id="ws-1", headers={}, client=client + ) as bf: + bf.submit(_row_with_id("act-a", "a")) + bf.wait_all() + # Supplied client survives the BatchFetcher's close — caller's lifecycle. + client.close.assert_not_called() + + +def test_batch_fetcher_wait_all_no_op_on_empty_batch(): + client = MagicMock() + with inbox_uploads.BatchFetcher( + platform_url="http://plat", workspace_id="ws-1", headers={}, client=client + ) as bf: + bf.wait_all() # nothing submitted; must not block, must not raise + client.get.assert_not_called() + client.post.assert_not_called() + + +def test_batch_fetcher_httpx_missing_makes_submit_a_noop(monkeypatch): + # No client supplied + httpx import fails → BatchFetcher degrades + # gracefully: submit() returns None and the row is silently skipped. + import sys + + real_httpx = sys.modules.pop("httpx", None) + monkeypatch.setitem(sys.modules, "httpx", None) + try: + bf = inbox_uploads.BatchFetcher( + platform_url="http://plat", workspace_id="ws-1", headers={} + ) + result = bf.submit(_row_with_id("act-a", "a")) + bf.wait_all() + bf.close() + finally: + if real_httpx is not None: + sys.modules["httpx"] = real_httpx + else: + sys.modules.pop("httpx", None) + assert result is None + + +def test_batch_fetcher_close_after_timeout_does_not_block_on_running_workers(): + """The deadline contract: when wait_all times out, close() must NOT + block waiting for the leaked worker threads. Otherwise the inbox + poll loop stalls indefinitely on a hung /content fetch — undoing + the user-facing timeout. + + Strategy: build a client whose .get() blocks on a threading.Event + that the test never sets. Submit a row, wait_all with a tiny + timeout, then time close(). If close() drained-and-waited it would + block until we set the event (i.e., forever in this test). + """ + import threading + import time + + blocker = threading.Event() # never set — workers stay running + + def _hang_get(url, headers=None): + # Wait at most ~5s so a buggy implementation eventually unblocks + # the test instead of timing out the whole pytest run, but + # nothing legitimate should reach this fallback. + blocker.wait(timeout=5.0) + return _make_resp(200, content=b"x", content_type="text/plain") + + client = MagicMock() + client.get = MagicMock(side_effect=_hang_get) + client.post = MagicMock(return_value=_make_resp(200)) + + bf = inbox_uploads.BatchFetcher( + platform_url="http://plat", + workspace_id="ws-1", + headers={}, + client=client, + max_workers=1, # serialize so submitting 1 keeps the worker busy + ) + bf.submit(_row_with_id("act-a", "a")) + # Tiny timeout — wait_all must report the future as not_done. + bf.wait_all(timeout=0.05) + t0 = time.time() + bf.close() + elapsed = time.time() - t0 + # Unblock the lingering worker so it doesn't pollute later tests. + blocker.set() + + # Without the cancel-on-timeout fix, close() would block until + # blocker.set() — i.e., the full ~5s. With the fix it returns + # immediately because shutdown(wait=False) doesn't drain. + assert elapsed < 1.0, ( + f"close() blocked for {elapsed:.2f}s after wait_all timeout — " + "cancel-on-timeout regression: close() is draining instead of bailing" + ) + + +def test_batch_fetcher_close_without_timeout_still_drains(): + """Negative leg of the timeout contract: when wait_all completes + cleanly (no timeout), close() must KEEP its drain-and-wait + behavior so a still-queued ack POST isn't dropped mid-write. + """ + import time + + def _slow_get(url, headers=None): + time.sleep(0.05) + return _make_resp(200, content=b"x", content_type="text/plain") + + client = MagicMock() + client.get = MagicMock(side_effect=_slow_get) + client.post = MagicMock(return_value=_make_resp(200)) + + bf = inbox_uploads.BatchFetcher( + platform_url="http://plat", + workspace_id="ws-1", + headers={}, + client=client, + max_workers=2, + ) + bf.submit(_row_with_id("act-a", "a")) + bf.submit(_row_with_id("act-b", "b")) + bf.wait_all() # generous default timeout — should not fire + bf.close() + + # All 2 GETs + 2 ACK POSTs ran to completion via drain-and-wait. + assert client.get.call_count == 2 + assert client.post.call_count == 2 diff --git a/workspace/tests/test_mcp_cli.py b/workspace/tests/test_mcp_cli.py index 608d1e7c..a1061394 100644 --- a/workspace/tests/test_mcp_cli.py +++ b/workspace/tests/test_mcp_cli.py @@ -13,6 +13,7 @@ from pathlib import Path import pytest import mcp_cli +import mcp_heartbeat @pytest.fixture(autouse=True) @@ -739,8 +740,13 @@ def test_heartbeat_loop_calls_persist_on_success(monkeypatch): def fake_persist(resp): saw.append(resp) + # Patch on mcp_heartbeat — that's where heartbeat_loop's internal + # name resolution looks up persist_inbound_secret_from_heartbeat + # after the RFC #2873 iter 3 split. The mcp_cli._persist_…_from_heartbeat + # back-compat re-export still exists, but patching it here would not + # affect the loop body. monkeypatch.setattr( - mcp_cli, "_persist_inbound_secret_from_heartbeat", fake_persist + mcp_heartbeat, "persist_inbound_secret_from_heartbeat", fake_persist ) class FakeResp: @@ -786,8 +792,8 @@ def test_heartbeat_loop_skips_persist_on_4xx(monkeypatch): """Heartbeat 4xx error path must NOT invoke persist (no body to trust).""" saw: list[object] = [] monkeypatch.setattr( - mcp_cli, - "_persist_inbound_secret_from_heartbeat", + mcp_heartbeat, + "persist_inbound_secret_from_heartbeat", lambda r: saw.append(r), ) @@ -899,7 +905,7 @@ def test_heartbeat_single_401_logs_warning_not_error(monkeypatch, caplog): transient platform blip. Log at WARNING; don't shout.""" import logging - caplog.set_level(logging.WARNING, logger="mcp_cli") + caplog.set_level(logging.WARNING, logger="mcp_heartbeat") _multi_iter_runner(monkeypatch, [401]) @@ -923,7 +929,7 @@ def test_heartbeat_three_consecutive_401s_escalates_to_error(monkeypatch, caplog LOUD ERROR with re-onboard guidance — not buried at WARNING.""" import logging - caplog.set_level(logging.WARNING, logger="mcp_cli") + caplog.set_level(logging.WARNING, logger="mcp_heartbeat") _multi_iter_runner(monkeypatch, [401, 401, 401]) @@ -949,7 +955,7 @@ def test_heartbeat_403_treated_same_as_401(monkeypatch, caplog): not authorized for this workspace). Same escalation path.""" import logging - caplog.set_level(logging.WARNING, logger="mcp_cli") + caplog.set_level(logging.WARNING, logger="mcp_heartbeat") _multi_iter_runner(monkeypatch, [403, 403, 403]) @@ -963,7 +969,7 @@ def test_heartbeat_recovery_resets_consecutive_counter(monkeypatch, caplog): later should NOT immediately escalate.""" import logging - caplog.set_level(logging.WARNING, logger="mcp_cli") + caplog.set_level(logging.WARNING, logger="mcp_heartbeat") # Two 401s, then 200, then one 401. If counter resets correctly, # the final 401 is "1 consecutive" and should NOT escalate. @@ -982,7 +988,7 @@ def test_heartbeat_500_does_not_increment_auth_counter(monkeypatch, caplog): misleading the operator.""" import logging - caplog.set_level(logging.WARNING, logger="mcp_cli") + caplog.set_level(logging.WARNING, logger="mcp_heartbeat") _multi_iter_runner(monkeypatch, [500, 500, 500]) diff --git a/workspace/tests/test_mcp_cli_split.py b/workspace/tests/test_mcp_cli_split.py new file mode 100644 index 00000000..e8a39817 --- /dev/null +++ b/workspace/tests/test_mcp_cli_split.py @@ -0,0 +1,231 @@ +"""RFC #2873 iter 3 — drift gate + behavior tests for the post-split surface. + +The bulk of the heartbeat / resolver behavior is exercised by +``test_mcp_cli.py`` and ``test_mcp_cli_multi_workspace.py`` through the +``mcp_cli._symbol`` back-compat aliases. This file pins: + + 1. The split is **behavior-neutral via aliasing** — every previously- + exposed ``mcp_cli._foo`` symbol is the SAME callable as the new + module's authoritative function. If a refactor accidentally drops + an alias or points it at a stale copy, this fails. + + 2. ``mcp_inbox_pollers.start_inbox_pollers`` works for both single- + workspace (legacy back-compat) and multi-workspace shapes. + ``mcp_cli`` had no direct test for this branch before the split. +""" +from __future__ import annotations + +import sys +import types + +import pytest + +import mcp_cli +import mcp_heartbeat +import mcp_inbox_pollers +import mcp_workspace_resolver + + +# ============== Drift gate: back-compat aliases point at the real fn ============== + +class TestBackCompatAliases: + """Pin that ``mcp_cli._foo is real_fn``. A test that re-implements + the alias would still pass — the ``is`` check guarantees we didn't + create a wrapper that drifts.""" + + def test_heartbeat_aliases(self): + assert mcp_cli._build_agent_card is mcp_heartbeat.build_agent_card + assert mcp_cli._platform_register is mcp_heartbeat.platform_register + assert mcp_cli._heartbeat_loop is mcp_heartbeat.heartbeat_loop + assert mcp_cli._log_heartbeat_auth_failure is mcp_heartbeat.log_heartbeat_auth_failure + assert ( + mcp_cli._persist_inbound_secret_from_heartbeat + is mcp_heartbeat.persist_inbound_secret_from_heartbeat + ) + assert mcp_cli._start_heartbeat_thread is mcp_heartbeat.start_heartbeat_thread + + def test_resolver_aliases(self): + assert mcp_cli._resolve_workspaces is mcp_workspace_resolver.resolve_workspaces + assert mcp_cli._print_missing_env_help is mcp_workspace_resolver.print_missing_env_help + assert mcp_cli._read_token_file is mcp_workspace_resolver.read_token_file + + def test_inbox_pollers_alias(self): + assert mcp_cli._start_inbox_pollers is mcp_inbox_pollers.start_inbox_pollers + + def test_constants_match(self): + assert ( + mcp_cli.HEARTBEAT_INTERVAL_SECONDS + == mcp_heartbeat.HEARTBEAT_INTERVAL_SECONDS + ) + assert ( + mcp_cli._HEARTBEAT_AUTH_LOUD_THRESHOLD + == mcp_heartbeat.HEARTBEAT_AUTH_LOUD_THRESHOLD + ) + assert ( + mcp_cli._HEARTBEAT_AUTH_RELOG_INTERVAL + == mcp_heartbeat.HEARTBEAT_AUTH_RELOG_INTERVAL + ) + + +# ============== mcp_inbox_pollers — both shapes + degraded import ============== + +class _FakeInboxState: + def __init__(self, **kwargs): + self.kwargs = kwargs + + +def _install_fake_inbox(monkeypatch): + """Inject a fake ``inbox`` module so we observe the spawn calls + without pulling in the real platform_auth dependency tree.""" + activations: list[_FakeInboxState] = [] + spawned: list[tuple[_FakeInboxState, str, str]] = [] + cursor_paths: list[str] = [] + + def default_cursor_path(wsid=None): + # Mirror the real signature: optional wsid → distinct path per id, + # absent → legacy single path. + path = f"/tmp/.mcp_inbox_cursor.{wsid[:8]}" if wsid else "/tmp/.mcp_inbox_cursor" + cursor_paths.append(path) + return path + + def activate(state): + activations.append(state) + + def start_poller_thread(state, platform_url, wsid): + spawned.append((state, platform_url, wsid)) + + fake = types.ModuleType("inbox") + fake.InboxState = _FakeInboxState + fake.activate = activate + fake.default_cursor_path = default_cursor_path + fake.start_poller_thread = start_poller_thread + monkeypatch.setitem(sys.modules, "inbox", fake) + return activations, spawned, cursor_paths + + +class TestStartInboxPollers: + def test_single_workspace_uses_legacy_cursor_path(self, monkeypatch): + """Back-compat exact: single-workspace mode reuses the legacy + cursor filename so an existing operator's on-disk state isn't + invalidated by upgrade.""" + activations, spawned, cursor_paths = _install_fake_inbox(monkeypatch) + + mcp_inbox_pollers.start_inbox_pollers( + "https://test.moleculesai.app", ["ws-only-one"] + ) + + assert len(activations) == 1, "exactly one inbox.activate call" + assert len(spawned) == 1, "exactly one poller thread spawned" + # Single-workspace path uses default_cursor_path() with no arg — + # the cursor_path captured here must be the legacy filename + # (no per-ws suffix). + assert cursor_paths == ["/tmp/.mcp_inbox_cursor"] + # State carries cursor_path, not cursor_paths + state = activations[0] + assert state.kwargs == {"cursor_path": "/tmp/.mcp_inbox_cursor"} + # Spawned poller is for the right workspace + assert spawned[0] == (state, "https://test.moleculesai.app", "ws-only-one") + + def test_multi_workspace_uses_per_workspace_cursor_paths(self, monkeypatch): + """Multi-workspace path: per-workspace cursor file, one shared + InboxState. N pollers, each pointed at the same state so the + agent's inbox_peek/pop sees a merged view.""" + activations, spawned, _ = _install_fake_inbox(monkeypatch) + + wsids = ["ws-aaaaaaaa", "ws-bbbbbbbb", "ws-cccccccc"] + mcp_inbox_pollers.start_inbox_pollers( + "https://test.moleculesai.app", wsids + ) + + # One state, one activate, three pollers + assert len(activations) == 1 + assert len(spawned) == 3 + state = activations[0] + # Multi-workspace state carries cursor_paths (mapping) + assert "cursor_paths" in state.kwargs + assert set(state.kwargs["cursor_paths"].keys()) == set(wsids) + # All pollers share the same state + for s, _url, _wsid in spawned: + assert s is state + # All workspace ids covered + assert sorted(t[2] for t in spawned) == sorted(wsids) + + def test_inbox_module_unavailable_logs_and_returns(self, monkeypatch, caplog): + """If ``import inbox`` fails (older install or stripped + runtime), spawn must NOT raise — log a warning and continue. + The MCP server can still serve outbound tools.""" + import logging + + # Force ImportError by injecting a module sentinel that raises. + class _Boom: + def __getattr__(self, _name): + raise ImportError("inbox stripped from this build") + + # Setting sys.modules["inbox"] to a broken object isn't enough — + # the import statement reads sys.modules first; if the entry is + # truthy, Python returns it. We need to force the import to raise. + # Easiest: pre-poison sys.modules so the `import inbox` line + # raises by setting the entry to None (Python special-cases None + # as "explicit ImportError"). + monkeypatch.setitem(sys.modules, "inbox", None) + + caplog.set_level(logging.WARNING, logger="mcp_inbox_pollers") + # Should not raise. + mcp_inbox_pollers.start_inbox_pollers( + "https://test.moleculesai.app", ["ws-1"] + ) + warnings = [r for r in caplog.records if r.levelno == logging.WARNING] + assert any("inbox module unavailable" in r.message for r in warnings), ( + f"expected a 'inbox module unavailable' warning, got: " + f"{[r.message for r in warnings]}" + ) + + +# ============== mcp_heartbeat.build_agent_card — short direct tests ============== + +class TestBuildAgentCardDirect: + """Spot-check the new module's public surface; the full test matrix + lives in ``test_mcp_cli.py`` reaching through ``mcp_cli._build_agent_card``. + """ + + def test_default_card_shape(self, monkeypatch): + for v in ("MOLECULE_AGENT_NAME", "MOLECULE_AGENT_DESCRIPTION", "MOLECULE_AGENT_SKILLS"): + monkeypatch.delenv(v, raising=False) + card = mcp_heartbeat.build_agent_card("8dad3e29-c32a-4ec7-9ea7-94fe2d2d98ec") + assert card == {"name": "molecule-mcp-8dad3e29", "skills": []} + + def test_skills_csv_split_and_trim(self, monkeypatch): + monkeypatch.setenv("MOLECULE_AGENT_SKILLS", "research, , code-review,memory-curation, ") + card = mcp_heartbeat.build_agent_card("ws-1") + assert card["skills"] == [ + {"name": "research"}, + {"name": "code-review"}, + {"name": "memory-curation"}, + ] + + +# ============== mcp_workspace_resolver — short direct tests ============== + +class TestResolveWorkspacesDirect: + @pytest.fixture(autouse=True) + def _isolate(self, monkeypatch, tmp_path): + for v in ("WORKSPACE_ID", "MOLECULE_WORKSPACE_TOKEN", "MOLECULE_WORKSPACES"): + monkeypatch.delenv(v, raising=False) + monkeypatch.setenv("CONFIGS_DIR", str(tmp_path)) + yield + + def test_single_workspace_via_env(self, monkeypatch): + monkeypatch.setenv("WORKSPACE_ID", "ws-1") + monkeypatch.setenv("MOLECULE_WORKSPACE_TOKEN", "tok") + out, errors = mcp_workspace_resolver.resolve_workspaces() + assert out == [("ws-1", "tok")] + assert errors == [] + + def test_multi_workspace_via_json_env(self, monkeypatch): + monkeypatch.setenv( + "MOLECULE_WORKSPACES", + '[{"id":"ws-a","token":"a"},{"id":"ws-b","token":"b"}]', + ) + out, errors = mcp_workspace_resolver.resolve_workspaces() + assert out == [("ws-a", "a"), ("ws-b", "b")] + assert errors == [] diff --git a/workspace/tests/test_mcp_memory.py b/workspace/tests/test_mcp_memory.py index 117e5417..d2a7ac35 100644 --- a/workspace/tests/test_mcp_memory.py +++ b/workspace/tests/test_mcp_memory.py @@ -63,7 +63,7 @@ async def test_commit_memory_success(monkeypatch): mcp = _load_mcp() client = FakeClient() - monkeypatch.setattr("a2a_tools.httpx.AsyncClient", lambda **kw: client) + monkeypatch.setattr("a2a_tools_memory.httpx.AsyncClient", lambda **kw: client) result = await mcp.handle_tool_call("commit_memory", { "content": "Architecture decision: use Go for backend", @@ -92,7 +92,7 @@ async def test_commit_memory_default_scope(monkeypatch): mcp = _load_mcp() client = FakeClient() - monkeypatch.setattr("a2a_tools.httpx.AsyncClient", lambda **kw: client) + monkeypatch.setattr("a2a_tools_memory.httpx.AsyncClient", lambda **kw: client) result = await mcp.handle_tool_call("commit_memory", { "content": "Some note", @@ -108,7 +108,7 @@ async def test_recall_memory_success(monkeypatch): mcp = _load_mcp() client = FakeClient() - monkeypatch.setattr("a2a_tools.httpx.AsyncClient", lambda **kw: client) + monkeypatch.setattr("a2a_tools_memory.httpx.AsyncClient", lambda **kw: client) result = await mcp.handle_tool_call("recall_memory", {"query": "architecture"}) @@ -127,7 +127,7 @@ async def test_recall_memory_empty(monkeypatch): async def get(self, url, params=None, headers=None, **kwargs): return FakeResponse(200, []) - monkeypatch.setattr("a2a_tools.httpx.AsyncClient", lambda **kw: EmptyClient()) + monkeypatch.setattr("a2a_tools_memory.httpx.AsyncClient", lambda **kw: EmptyClient()) result = await mcp.handle_tool_call("recall_memory", {}) assert "No memories found" in result @@ -139,7 +139,7 @@ async def test_recall_memory_with_scope_filter(monkeypatch): mcp = _load_mcp() client = FakeClient() - monkeypatch.setattr("a2a_tools.httpx.AsyncClient", lambda **kw: client) + monkeypatch.setattr("a2a_tools_memory.httpx.AsyncClient", lambda **kw: client) await mcp.handle_tool_call("recall_memory", {"scope": "TEAM"}) diff --git a/workspace/tests/test_secret_redact.py b/workspace/tests/test_secret_redact.py index d0975969..ecc268e8 100644 --- a/workspace/tests/test_secret_redact.py +++ b/workspace/tests/test_secret_redact.py @@ -357,7 +357,7 @@ class TestA2AToolCommitMemoryRedactsSecrets: fake_client.post = _capture - with patch("a2a_tools.httpx.AsyncClient", return_value=fake_client): + with patch("a2a_tools_memory.httpx.AsyncClient", return_value=fake_client): await a2a_tools.tool_commit_memory(content_with_secret) stored = captured.get("content", "") @@ -385,7 +385,7 @@ class TestA2AToolCommitMemoryRedactsSecrets: fake_client.post = _capture - with patch("a2a_tools.httpx.AsyncClient", return_value=fake_client): + with patch("a2a_tools_memory.httpx.AsyncClient", return_value=fake_client): await a2a_tools.tool_commit_memory(f"key={key}") stored = captured.get("content", "")