Merge branch 'staging' into refactor/a2a-tools-messaging-extract-rfc2873-iter4d

This commit is contained in:
Hongming Wang 2026-05-05 13:41:55 -07:00 committed by GitHub
commit 0d0840d9d9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
71 changed files with 7668 additions and 1072 deletions

View File

@ -172,6 +172,9 @@ jobs:
- name: Run poll-mode + since_id cursor E2E (#2339)
if: needs.detect-changes.outputs.api == 'true'
run: bash tests/e2e/test_poll_mode_e2e.sh
- name: Run poll-mode chat upload E2E (RFC #2891)
if: needs.detect-changes.outputs.api == 'true'
run: bash tests/e2e/test_poll_mode_chat_upload_e2e.sh
- name: Dump platform log on failure
if: failure() && needs.detect-changes.outputs.api == 'true'
run: cat workspace-server/platform.log || true

View File

@ -18,7 +18,7 @@
// quick bounce between signup and either Checkout or the tenant UI.
import { useEffect, useState } from "react";
import { fetchSession, redirectToLogin, type Session } from "@/lib/auth";
import { fetchSession, redirectToLogin, signOut, type Session } from "@/lib/auth";
import { PLATFORM_URL } from "@/lib/api";
import { formatCredits, pillTone, bannerKind } from "@/lib/credits";
import { TermsGate } from "@/components/TermsGate";
@ -129,7 +129,7 @@ export default function OrgsPage() {
return <EmptyState banner={justCheckedOut ? <CheckoutBanner /> : null} />;
}
return (
<Shell>
<Shell session={session}>
{justCheckedOut && <CheckoutBanner />}
<ul className="space-y-3">
{orgs.map((o) => (
@ -160,11 +160,21 @@ function CheckoutBanner() {
);
}
function Shell({ children }: { children: React.ReactNode }) {
function Shell({
children,
session,
}: {
children: React.ReactNode;
// Optional: when present, the header renders the signed-in email +
// a Sign-out button. The empty-state Shell call doesn't have a
// session in scope, so accept null and skip the header chrome there.
session?: Session | null;
}) {
return (
<main className="min-h-screen bg-surface text-ink">
<TermsGate>
<div className="mx-auto max-w-2xl px-6 pt-20 pb-12">
{session ? <AccountBar session={session} /> : null}
<h1 className="text-3xl font-bold text-ink">Your organizations</h1>
<p className="mt-2 text-ink-mid">
Each org is an isolated Molecule workspace.
@ -177,6 +187,40 @@ function Shell({ children }: { children: React.ReactNode }) {
);
}
// AccountBar renders the signed-in email + a Sign-out button at the
// top of the page. Without this the user has no way to log out — the
// /cp/auth/signout endpoint exists on the control plane but no UI ever
// called it. Reported externally on 2026-05-05; this is the fix.
//
// Click → calls signOut() which POSTs /cp/auth/signout (clears the
// WorkOS session cookie + revokes at the provider) then bounces to
// /cp/auth/login. The signOut helper is best-effort — even on a 5xx
// or network failure the redirect fires so the user never gets stuck
// on an authed-looking page after they clicked Sign out.
function AccountBar({ session }: { session: Session }) {
const [signingOut, setSigningOut] = useState(false);
return (
<div className="mb-6 flex items-center justify-between text-sm text-ink-mid">
<span title="Signed-in user">{session.email}</span>
<button
type="button"
disabled={signingOut}
onClick={async () => {
setSigningOut(true);
await signOut();
// Redirect happens inside signOut; this line is for tests +
// edge cases (jsdom, blocked navigation) where it doesn't.
setSigningOut(false);
}}
className="rounded border border-line bg-surface-card px-3 py-1 text-xs text-ink hover:bg-surface-card disabled:opacity-50"
aria-label="Sign out"
>
{signingOut ? "Signing out…" : "Sign out"}
</button>
</div>
);
}
// DataResidencyNotice surfaces where workspace data lives so EU-based
// signups can make an informed choice (GDPR Art. 13 disclosure
// requirement). Plain text, no icon — the goal is clarity, not

View File

@ -48,16 +48,21 @@ export function EmptyState() {
});
// "Create blank" bypasses templates entirely — no preflight, no
// modal, just POST /workspaces with a default name and tier.
// Deliberately NOT routed through useTemplateDeploy because it
// has no `template.id` to deploy against.
// modal, just POST /workspaces with a default name. Deliberately
// NOT routed through useTemplateDeploy because it has no
// `template.id` to deploy against.
//
// tier is omitted so the backend picks a SaaS-aware default
// (T4 on SaaS, T3 on self-hosted — see WorkspaceHandler.DefaultTier).
// The previous hardcoded `tier: 2` shipped every fresh-tenant agent
// at Standard regardless of host, which surprised SaaS users whose
// CreateWorkspaceDialog already defaults to T4.
const createBlank = async () => {
setBlankCreating(true);
setBlankError(null);
try {
const ws = await api.post<{ id: string }>("/workspaces", {
name: "My First Agent",
tier: 2,
canvas: firstDeployCoords(),
});
handleDeployed(ws.id);

View File

@ -286,6 +286,14 @@ function MyChatPanel({ workspaceId, data }: Props) {
const [error, setError] = useState<string | null>(null);
const [confirmRestart, setConfirmRestart] = useState(false);
const bottomRef = useRef<HTMLDivElement>(null);
// First-mount scroll-to-bottom needs `behavior: "instant"` — long
// conversations smooth-animate for ~300ms which any concurrent
// re-render can interrupt, leaving the user stuck mid-conversation
// when the chat tab opens. Subsequent appends (new agent messages)
// keep `smooth` for the visual "landing" feel. Flipped the first
// time messages.length goes positive, so a workspace switch (which
// remounts ChatTab) gets a fresh instant jump too.
const hasInitialScrollRef = useRef(false);
// Lazy-load older history on scroll-up.
// - containerRef = the scrollable messages viewport
// - topRef = sentinel above the messages list; IO observes it
@ -545,6 +553,15 @@ function MyChatPanel({ workspaceId, data }: Props) {
scrollAnchorRef.current = null;
return;
}
// Instant on first arrival of messages — smooth-scroll on a long
// conversation gets interrupted by concurrent renders and leaves
// the user stuck in the middle. After the first jump, subsequent
// appends animate as before.
if (!hasInitialScrollRef.current && messages.length > 0) {
hasInitialScrollRef.current = true;
bottomRef.current?.scrollIntoView({ behavior: "instant" as ScrollBehavior });
return;
}
bottomRef.current?.scrollIntoView({ behavior: "smooth" });
}, [messages]);

View File

@ -1,6 +1,6 @@
"use client";
import { useState, useEffect, useMemo, useRef } from "react";
import { useState, useEffect, useLayoutEffect, useMemo, useRef, useCallback } from "react";
import ReactMarkdown from "react-markdown";
import remarkGfm from "remark-gfm";
import { api } from "@/lib/api";
@ -184,13 +184,23 @@ function unwrapErrorText(raw: string | null): string {
export function AgentCommsPanel({ workspaceId }: { workspaceId: string }) {
const [messages, setMessages] = useState<CommMessage[]>([]);
const [loading, setLoading] = useState(true);
const [loadError, setLoadError] = useState<string | null>(null);
// Dedup by timestamp+type+peer to handle API load + WebSocket race
const seenKeys = useRef(new Set<string>());
const bottomRef = useRef<HTMLDivElement>(null);
// Mirrors the my-chat scroll behaviour from ChatTab (PR #2903) —
// smooth-scroll on a long history gets interrupted by concurrent
// renders and lands the panel mid-conversation. Switch the first
// arrival to instant; subsequent appends animate.
const hasInitialScrollRef = useRef(false);
// Load history
useEffect(() => {
// Load history. Extracted so the error-state retry button can
// re-invoke without remount. ChatTab uses the same shape
// (loadInitial → loadError state → retry button).
const loadInitial = useCallback(() => {
setLoading(true);
setLoadError(null);
seenKeys.current.clear();
api.get<ActivityEntry[]>(`/workspaces/${workspaceId}/activity?source=agent&limit=50`)
.then((entries) => {
const filtered = (entries ?? [])
@ -234,10 +244,15 @@ export function AgentCommsPanel({ workspaceId }: { workspaceId: string }) {
// the .then body) — the panel just sat on the empty state
// with zero signal.
console.warn("AgentCommsPanel: load activity failed", err);
setLoadError(err instanceof Error ? err.message : String(err));
setLoading(false);
});
}, [workspaceId]);
useEffect(() => {
loadInitial();
}, [loadInitial]);
// Live updates routed through the global ReconnectingSocket. The
// previous pattern of `new WebSocket(WS_URL)` per panel had no
// onclose / no reconnect, so any drop (idle timeout, browser
@ -358,7 +373,18 @@ export function AgentCommsPanel({ workspaceId }: { workspaceId: string }) {
} catch { /* ignore */ }
});
useEffect(() => {
// useLayoutEffect (not useEffect) so the scroll runs BEFORE paint —
// otherwise the user sees the panel jump for one frame on every
// append. Mirrors ChatTab's MyChatPanel scroll block.
useLayoutEffect(() => {
if (!hasInitialScrollRef.current && messages.length > 0) {
// Instant on first arrival — smooth-scroll on a long history
// gets interrupted by concurrent renders and lands the panel
// mid-conversation (the chat-opens-in-middle bug class).
hasInitialScrollRef.current = true;
bottomRef.current?.scrollIntoView({ behavior: "instant" as ScrollBehavior });
return;
}
bottomRef.current?.scrollIntoView({ behavior: "smooth" });
}, [messages]);
@ -366,6 +392,27 @@ export function AgentCommsPanel({ workspaceId }: { workspaceId: string }) {
return <div className="text-xs text-ink-soft text-center py-8">Loading agent communications...</div>;
}
if (loadError !== null && messages.length === 0) {
// Mirrors ChatTab my-chat error UI — surfaces the load failure
// with a retry button instead of silently rendering empty state.
return (
<div
role="alert"
className="mx-2 mt-2 rounded-lg border border-red-800/50 bg-red-950/30 px-3 py-2.5"
>
<p className="text-[11px] text-bad mb-1.5">
Failed to load agent communications: {loadError}
</p>
<button
onClick={loadInitial}
className="text-[10px] px-2 py-0.5 rounded bg-red-800/40 text-bad hover:bg-red-700/50 transition-colors"
>
Retry
</button>
</div>
);
}
if (messages.length === 0) {
return (
<div className="text-xs text-ink-soft text-center py-8">

View File

@ -0,0 +1,115 @@
// @vitest-environment jsdom
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
import { render, screen, fireEvent, waitFor } from "@testing-library/react";
// API mock — tests can override per case via apiGetMock.mockImplementationOnce.
const apiGetMock = vi.fn<(url: string) => Promise<unknown>>();
vi.mock("@/lib/api", () => ({
api: {
get: (url: string) => apiGetMock(url),
},
}));
// useSocketEvent — no-op for these render tests; live updates aren't
// what we're verifying here.
vi.mock("@/hooks/useSocketEvent", () => ({
useSocketEvent: () => {},
}));
// Canvas store — peer name resolution.
vi.mock("@/store/canvas", () => ({
useCanvasStore: {
getState: () => ({
nodes: [
{ id: "ws-self", data: { name: "Self" } },
{ id: "ws-peer", data: { name: "Peer Agent" } },
],
}),
},
}));
// Toaster shim — AgentCommsPanel imports showToast.
vi.mock("../../Toaster", () => ({
showToast: vi.fn(),
}));
import { AgentCommsPanel } from "../AgentCommsPanel";
// jsdom doesn't implement scrollIntoView. Tests that observe the call
// install a spy here; tests that don't care still need a no-op stub
// so the component doesn't throw.
const scrollSpy = vi.fn<(opts?: ScrollIntoViewOptions | boolean) => void>();
beforeEach(() => {
apiGetMock.mockReset();
scrollSpy.mockReset();
Element.prototype.scrollIntoView = scrollSpy as unknown as Element["scrollIntoView"];
});
afterEach(() => {
vi.clearAllMocks();
});
describe("AgentCommsPanel — initial-state parity with ChatTab my-chat", () => {
it("shows loading text while history fetch is in flight", () => {
apiGetMock.mockReturnValueOnce(new Promise(() => { /* never resolves */ }));
render(<AgentCommsPanel workspaceId="ws-self" />);
expect(screen.getByText("Loading agent communications...")).toBeDefined();
});
it("renders error UI with a Retry button when the history fetch rejects", async () => {
apiGetMock.mockRejectedValueOnce(new Error("network down"));
render(<AgentCommsPanel workspaceId="ws-self" />);
// Wait for the error state to render — loading→error transition is async.
const alert = await waitFor(() => screen.getByRole("alert"));
expect(alert.textContent).toMatch(/Failed to load agent communications/);
expect(alert.textContent).toMatch(/network down/);
// Retry button must be present and trigger a refetch.
const retry = screen.getByRole("button", { name: "Retry" });
apiGetMock.mockResolvedValueOnce([]); // success on retry
fireEvent.click(retry);
// Two calls total: initial load + retry. Pin via mock call count.
await waitFor(() => expect(apiGetMock.mock.calls.length).toBe(2));
});
it("falls back to empty-state copy when load succeeds with zero rows", async () => {
apiGetMock.mockResolvedValueOnce([]);
render(<AgentCommsPanel workspaceId="ws-self" />);
await waitFor(() =>
expect(screen.getByText("No agent-to-agent communications yet.")).toBeDefined(),
);
});
it("scrollIntoView is called with behavior=instant on the first message arrival", async () => {
apiGetMock.mockResolvedValueOnce([
{
id: "act-1",
activity_type: "a2a_send",
source_id: "ws-self",
target_id: "ws-peer",
method: "message/send",
summary: "Delegating",
request_body: { message: { parts: [{ text: "hi" }] } },
response_body: null,
status: "ok",
created_at: "2026-04-25T18:00:00Z",
},
]);
render(<AgentCommsPanel workspaceId="ws-self" />);
// useLayoutEffect is what makes the first call instant — wait for
// the panel to render at least one message.
await waitFor(() => expect(scrollSpy.mock.calls.length).toBeGreaterThan(0));
// The pinned contract: SOME call uses behavior: "instant" — the
// first-arrival case. Subsequent appends use "smooth", but those
// can't fire here (no live update yet).
const sawInstant = scrollSpy.mock.calls.some((args) => {
const opts = args[0];
return typeof opts === "object" && opts !== null && "behavior" in opts && opts.behavior === "instant";
});
expect(sawInstant).toBe(true);
});
});

View File

@ -2,7 +2,7 @@
* @vitest-environment jsdom
*/
import { describe, it, expect, vi, afterEach } from "vitest";
import { fetchSession, redirectToLogin } from "../auth";
import { fetchSession, redirectToLogin, signOut } from "../auth";
afterEach(() => {
vi.unstubAllGlobals();
@ -110,3 +110,157 @@ describe("redirectToLogin", () => {
expect((window.location as unknown as { href: string }).href).toBe(signupHref);
});
});
describe("signOut", () => {
// Helper — most tests need the same window.location stub.
function stubLocation(): void {
Object.defineProperty(window, "location", {
writable: true,
value: {
href: "https://acme.moleculesai.app/orgs",
pathname: "/orgs",
hostname: "acme.moleculesai.app",
protocol: "https:",
},
});
}
it("POSTs to /cp/auth/signout with credentials:include", async () => {
stubLocation();
const fetchMock = vi.fn().mockResolvedValue({
ok: true,
status: 200,
json: async () => ({ ok: true, logout_url: "" }),
});
vi.stubGlobal("fetch", fetchMock);
await signOut();
expect(fetchMock).toHaveBeenCalledTimes(1);
expect(fetchMock).toHaveBeenCalledWith(
expect.stringContaining("/cp/auth/signout"),
expect.objectContaining({ method: "POST", credentials: "include" }),
);
});
it("navigates to provider logout_url when the response includes one", async () => {
// The hosted-logout path is what actually breaks the SSO re-auth
// loop reported on PR #2913. Without this, AuthKit's browser
// cookie keeps the user signed in via SSO and any subsequent
// /cp/auth/login silently re-auths.
stubLocation();
const hostedLogout =
"https://api.workos.com/user_management/sessions/logout?session_id=cookie&return_to=https%3A%2F%2Fapp.moleculesai.app%2Forgs";
vi.stubGlobal(
"fetch",
vi.fn().mockResolvedValue({
ok: true,
status: 200,
json: async () => ({ ok: true, logout_url: hostedLogout }),
}),
);
await signOut();
const after = (window.location as unknown as { href: string }).href;
expect(after).toBe(hostedLogout);
});
it("falls back to /cp/auth/login when logout_url is empty (DisabledProvider / dev)", async () => {
// DisabledProvider returns "" — the local /cp/auth/login redirect
// works in dev/test where there's no SSO session to escape.
stubLocation();
vi.stubGlobal(
"fetch",
vi.fn().mockResolvedValue({
ok: true,
status: 200,
json: async () => ({ ok: true, logout_url: "" }),
}),
);
await signOut();
const after = (window.location as unknown as { href: string }).href;
// Tenant subdomain (acme.moleculesai.app) → auth origin is app.moleculesai.app.
expect(after).toBe("https://app.moleculesai.app/cp/auth/login");
});
it("redirects even when the POST fails so the user isn't stuck on an authed page", async () => {
// Critical UX invariant: clicking 'Sign out' MUST navigate away from
// the authenticated app, even if the network is down or the cookie
// is already invalid. Anything else looks like the button is
// broken — the precise complaint that triggered this fix.
stubLocation();
vi.stubGlobal("fetch", vi.fn().mockRejectedValue(new Error("network down")));
await signOut();
const after = (window.location as unknown as { href: string }).href;
expect(after).toBe("https://app.moleculesai.app/cp/auth/login");
});
it("redirects on 401 (session already invalid) just like 200", async () => {
// A user with an already-invalid cookie should still see the
// logout flow complete — no error, no stuck-on-app dead end.
// Note: 401 means res.ok=false → we don't read .json() at all,
// so a missing body is fine.
stubLocation();
vi.stubGlobal(
"fetch",
vi.fn().mockResolvedValue({
ok: false,
status: 401,
json: async () => ({}),
}),
);
await signOut();
const after = (window.location as unknown as { href: string }).href;
expect(after).toBe("https://app.moleculesai.app/cp/auth/login");
});
it("falls back to /cp/auth/login when the response body is malformed", async () => {
// Defensive parsing: a body that isn't valid JSON, or doesn't
// have logout_url, or has logout_url as the wrong type — none of
// these should strand the user on the authed page. Fallback path
// takes over.
stubLocation();
vi.stubGlobal(
"fetch",
vi.fn().mockResolvedValue({
ok: true,
status: 200,
json: async () => {
throw new Error("not json");
},
}),
);
await signOut();
const after = (window.location as unknown as { href: string }).href;
expect(after).toBe("https://app.moleculesai.app/cp/auth/login");
});
it("falls back to /cp/auth/login when logout_url is the wrong type", async () => {
// Even valid JSON should be type-checked: a non-string logout_url
// (e.g. server-side bug, version drift) must not crash or open-
// redirect the user.
stubLocation();
vi.stubGlobal(
"fetch",
vi.fn().mockResolvedValue({
ok: true,
status: 200,
json: async () => ({ ok: true, logout_url: 42 }),
}),
);
await signOut();
const after = (window.location as unknown as { href: string }).href;
expect(after).toBe("https://app.moleculesai.app/cp/auth/login");
});
});

View File

@ -67,3 +67,80 @@ export function redirectToLogin(screenHint: "sign-up" | "sign-in" = "sign-in"):
const dest = `${authOrigin}${AUTH_BASE}/${path}?return_to=${encodeURIComponent(returnTo)}`;
window.location.href = dest;
}
/**
* signOut posts to /cp/auth/signout to clear the WorkOS session cookie
* + revoke at the provider, then navigates the browser to the
* provider-supplied hosted logout URL (so the provider's BROWSER-side
* SSO cookie is cleared too without this, AuthKit silently re-auths
* via SSO on the next /cp/auth/login and the user is "still signed
* in" after pressing Sign out).
*
* Two-layer flow:
* 1. POST /cp/auth/signout CP clears OUR session cookie + revokes
* session_id at the provider API. Response includes
* `logout_url` the AuthKit hosted URL the BROWSER must navigate
* to so the provider's own browser cookie is cleared.
* 2. window.location.href = <logout_url> AuthKit clears its
* session, then redirects the browser to the configured
* return_to (defaults to APP_URL/orgs).
*
* Best-effort by design: a 5xx, network failure, missing logout_url
* (DisabledProvider, dev), or stale cookie still results in the
* browser navigating away leaving the user on a logged-in-looking
* page after they clicked "Sign out" is the worst possible UX. The
* fallback path navigates to /cp/auth/login on the auth origin, which
* works correctly in environments without a hosted logout flow (dev,
* tests, DisabledProvider).
*
* Throws nothing callers can disable the button optimistically or
* await this and trust it returns. On a redirect-blocked test
* environment (jsdom under vitest) we still exit cleanly so unit tests
* can spy on the fetch call.
*/
export async function signOut(): Promise<void> {
let logoutURL: string | undefined;
// Fire-and-tolerate the POST. credentials:include is mandatory cross-
// origin so the SaaS canvas (acme.moleculesai.app) can hit
// app.moleculesai.app/cp/auth/signout with the session cookie.
try {
const res = await fetch(`${getAuthOrigin()}${AUTH_BASE}/signout`, {
method: "POST",
credentials: "include",
});
if (res.ok) {
// Body shape: {"ok": true, "logout_url": "..."}. logout_url is
// empty for DisabledProvider (dev/local) — we fall back to
// /cp/auth/login below. Defensive parsing: a malformed body
// shouldn't strand the user on the authed page.
const body: unknown = await res.json().catch(() => null);
if (
body &&
typeof body === "object" &&
"logout_url" in body &&
typeof (body as { logout_url: unknown }).logout_url === "string" &&
(body as { logout_url: string }).logout_url
) {
logoutURL = (body as { logout_url: string }).logout_url;
}
}
} catch {
// Ignore — we still redirect below.
}
if (typeof window === "undefined") return;
if (logoutURL) {
// Hosted logout: AuthKit clears its SSO cookie + redirects to
// return_to (configured server-side). This is the path that
// actually breaks the SSO re-auth loop.
window.location.href = logoutURL;
return;
}
// Fallback: no hosted logout (dev, DisabledProvider, network
// failure). Land on the login screen rather than the current URL:
// returning to a tenant URL after signout would just re-redirect
// through /cp/auth/login due to AuthGate. Send the user straight
// there with no return_to so they don't loop back into the org they
// just left.
const authOrigin = getAuthOrigin();
window.location.href = `${authOrigin}${AUTH_BASE}/login`;
}

View File

@ -1,111 +0,0 @@
# Team Expansion (Recursive Workspaces)
When a workspace is expanded into a team, it gains sub-workspaces while its own agent remains as the **team lead** (coordinator). This is recursive — sub-workspaces can themselves be expanded into teams, infinitely deep.
## How It Works
When Developer PM is expanded into a team:
```
Business Core
|
+-- Developer PM (agent stays, becomes coordinator)
|
+-- Frontend Agent (sub-workspace, private scope)
+-- Backend Agent (sub-workspace, private scope)
+-- QA Agent (sub-workspace, private scope)
```
- Developer PM's agent **still exists** and acts as coordinator
- Developer PM receives incoming A2A messages from Business Core
- Developer PM's agent decides how to delegate to sub-workspaces
- Sub-workspaces talk to Developer PM and to each other (same level)
- Sub-workspaces **cannot** talk to Business Core or any workspace outside the team
## Communication Rules
| Direction | Allowed? | Example |
|-----------|----------|---------|
| Parent level -> team lead | Yes | Business Core -> Developer PM |
| Team lead -> sub-workspaces | Yes | Developer PM -> Frontend Agent |
| Sub-workspace -> team lead | Yes | Frontend Agent -> Developer PM |
| Sub-workspace <-> sibling | Yes | Frontend Agent <-> Backend Agent |
| Outside -> sub-workspace directly | No (403) | Business Core -> Frontend Agent |
| Sub-workspace -> outside directly | No | Frontend Agent -> Business Core |
The team lead (Developer PM) is the **only** bridge between the team's internal world and the outside.
## Scoped Registry
Sub-workspaces register in the platform registry but with a **private scope**. The registry knows about them but enforces access control.
```
Registry:
Business Core :8001 scope: public
Developer PM :8002 scope: public
Frontend Agent :8010 scope: private, parent=Developer PM
Backend Agent :8011 scope: private, parent=Developer PM
QA Agent :8012 scope: private, parent=Developer PM
```
- The platform can always discover any workspace (for provisioning, monitoring)
- The parent workspace can discover its sub-workspaces
- Sub-workspaces can discover their siblings (same parent)
- Outside workspaces get a **403 Forbidden** if they try to discover a private sub-workspace
## How to Expand
Expansion is triggered via `POST /workspaces/:id/expand`. The platform reads the `sub_workspaces` list from the workspace's config and provisions each one. On the canvas, users right-click a workspace node and select "Expand into team."
Collapsing is the inverse: `POST /workspaces/:id/collapse`. Sub-workspaces are stopped and removed.
## What Happens on Expansion
When Developer PM is expanded into a team, the hierarchy changes but the outside view doesn't. Business Core's parent/child relationship to Developer PM is unaffected — Developer PM still responds to the same A2A endpoint.
The events fired:
- `WORKSPACE_EXPANDED` with the new `sub_workspace_ids` in the payload
- `WORKSPACE_PROVISIONING` for each new sub-workspace
- `WORKSPACE_ONLINE` for each sub-workspace as they come up
Communication rules are automatically derived from the new hierarchy — no manual wiring needed.
## Canvas Behavior
- Children render as embedded mini-cards (`TeamMemberChip`) inside the parent node, not as separate canvas nodes
- Each mini-card shows full status: gradient bar, name, tier badge, skills pills, active tasks, descendant count
- **Recursive rendering** up to 3 levels deep (`MAX_NESTING_DEPTH = 3`) — sub-cards can contain their own "Team" sections
- Parent node dynamically resizes: 210-280px (no children), 320-450px (children), 400-560px (grandchildren)
- Eject button (sky-blue arrow icon) on hover extracts a child from the team
- "Extract from Team" also available in the right-click context menu
- Double-click a team node to zoom/fit to the parent area
- The parent workspace node shows a badge with total descendant count
## Collapsing a Team
The inverse of expansion, triggered via `POST /workspaces/:id/collapse`:
1. Each sub-workspace agent wraps up current work and writes a handoff document to memory
2. Sub-workspaces are stopped and removed
3. The team lead's agent goes back to handling everything directly
4. A `WORKSPACE_COLLAPSED` event fires
Sub-workspace memory is cleaned up based on backend (see [Memory — Cleanup](../architecture/memory.md#cleanup-on-workspace-deletion)).
## Deleting a Team Workspace
When a team workspace is deleted:
1. Platform shows a warning listing all sub-workspaces that will be deleted
2. User can **drag sub-workspaces out** of the team before confirming (promotes them to the parent level)
3. On confirmation, cascade delete removes the parent and all remaining sub-workspaces
4. `WORKSPACE_REMOVED` events fire for each deleted workspace
## Related Docs
- [Communication Rules](../api-protocol/communication-rules.md) — Full access control model
- [Core Concepts](../product/core-concepts.md) — Workspace fundamentals
- [System Prompt Structure](./system-prompt-structure.md) — How peer capabilities are injected
- [Provisioner](../architecture/provisioner.md) — How sub-workspaces are deployed
- [Registry & Heartbeat](../api-protocol/registry-and-heartbeat.md) — How registration works
- [Event Log](../architecture/event-log.md) — Events fired during expansion
- [Canvas UI](../frontend/canvas.md) — Visual behavior of teams

View File

@ -41,8 +41,6 @@ Full contract: `docs/runbooks/admin-auth.md`.
| GET | /admin/workspaces/:id/test-token | admin_test_token.go — mint a fresh bearer token for E2E scripts; returns 404 unless `MOLECULE_ENV != production` or `MOLECULE_ENABLE_TEST_TOKENS=1` |
| GET/POST/DELETE | /admin/secrets[/:key] | secrets.go — legacy aliases for /settings/secrets |
| WS | /workspaces/:id/terminal | terminal.go |
| POST | /workspaces/:id/expand | team.go |
| POST | /workspaces/:id/collapse | team.go |
| POST/GET | /workspaces/:id/approvals | approvals.go |
| POST | /workspaces/:id/approvals/:id/decide | approvals.go |
| GET | /approvals/pending | approvals.go |

View File

@ -336,8 +336,6 @@ This same logic governs: A2A delegation, memory scope enforcement, activity visi
| Method | Endpoint | Purpose |
|--------|----------|---------|
| `POST` | `/workspaces/:id/expand` | Expand workspace into team (become coordinator) |
| `POST` | `/workspaces/:id/collapse` | Collapse team back to single workspace |
### Files, Terminal, Templates, Bundles (8 endpoints)

View File

@ -186,4 +186,3 @@ So the UI now exposes more operational failure state directly instead of silentl
- [Quickstart](../quickstart.md)
- [Platform API](../api-protocol/platform-api.md)
- [Workspace Runtime](../agent-runtime/workspace-runtime.md)
- [Team Expansion](../agent-runtime/team-expansion.md)

View File

@ -18,7 +18,7 @@ lands in the watch list with a colliding term, add a row here.
| **plugin** | A directory under `plugins/` packaging one or more skills or an MCP server wrapper, installable per-workspace via `POST /workspaces/:id/plugins`. Governed by `plugin.yaml`. | **Langflow**: a visual UI node / component in a flowchart. **CrewAI**: a Python-importable callable registered as a capability. |
| **agent** | A persistent containerized workspace running continuously — an identity with memory, a role, and a schedule. Not a one-shot invocation. | Most frameworks (AutoGPT, LangChain agents, OpenAI Assistants): a stateless function-call loop. No persistence between invocations unless explicitly checkpointed. |
| **flow** | A task execution within a workspace — a request enters, the agent runs tools, emits a response, logs activity. No explicit graph abstraction. | **Langflow**: a directed graph of nodes you author visually. **LangGraph**: a stateful graph of callable nodes. Our "flow" is an imperative timeline, not a graph. |
| **team** | A named cluster of workspaces under a PM (org template `expand_team`). Used for role grouping in Canvas. | **CrewAI**: a "crew" is a sequence of agents that pass a task through a declared order. Our "team" is an org-chart abstraction, not an execution order. |
| **team** | A named cluster of workspaces under a PM . Used for role grouping in Canvas. | **CrewAI**: a "crew" is a sequence of agents that pass a task through a declared order. Our "team" is an org-chart abstraction, not an execution order. |
| **skill** | A directory with `SKILL.md` that an agent invokes via the `Skill` tool. Skills are documentation + optional scripts that teach an agent a recipe. | **Anthropic Skills API**: nearly identical. **CrewAI tool**: closer to our plugin's MCP tool, not our skill. |
| **channel** | An outbound/inbound social integration (Telegram, Slack, …) per-workspace, wired in `workspace_channels`. | Slack's "channel": the container for messages. We use "channel" for the adapter + credentials, not the conversation itself. |
| **runtime** | The execution engine image tag for a workspace: one of `langgraph`, `claude-code`, `openclaw`, `crewai`, `autogen`, `deepagents`, `hermes`. | **LangGraph runtime**: the Python process running the graph. We use "runtime" for the Docker image + adapter pairing, not the inner process. |

View File

@ -166,8 +166,6 @@ list_workspaces
| MCP Tool | API Route | Method | Description |
|----------|-----------|--------|-------------|
| `expand_team` | `/workspaces/:id/expand` | POST | Expand team node |
| `collapse_team` | `/workspaces/:id/collapse` | POST | Collapse team node |
### Templates & Bundles

View File

@ -73,11 +73,15 @@ TOP_LEVEL_MODULES = {
"executor_helpers",
"heartbeat",
"inbox",
"inbox_uploads",
"initial_prompt",
"internal_chat_uploads",
"internal_file_read",
"main",
"mcp_cli",
"mcp_heartbeat",
"mcp_inbox_pollers",
"mcp_workspace_resolver",
"molecule_ai_status",
"not_configured_handler",
"platform_auth",

View File

@ -0,0 +1,295 @@
#!/usr/bin/env bash
# E2E for poll-mode chat upload (RFC #2891 phases 1-5b).
#
# Round-trip: register a workspace as poll-mode (no callback URL) → POST a
# multi-file chat upload → verify each file becomes (a) one
# `chat_upload_receive` activity row and (b) one /pending-uploads row → fetch
# the bytes back via the poll endpoint → ack → verify the row 404s on
# subsequent fetch. Also pins cross-workspace bleed protection: workspace B
# cannot read workspace A's pending uploads even with its own valid bearer.
#
# Why this exists separately from test_chat_upload_e2e.sh: that script
# covers the PUSH path (the workspace's own /internal/chat/uploads/ingest).
# This script covers the POLL path: the same canvas-side request lands on
# the platform's pendinguploads.Storage instead, and the workspace fetches
# it later. The two paths share zero handler code on the platform side, so
# both need their own E2E.
#
# Requires: platform running on localhost:8080 with migrations applied.
# bash workspace-server/scripts/dev-start.sh
# bash workspace-server/scripts/run-migrations.sh
#
# Idempotent: each run uses fresh per-script workspace UUIDs so reruns
# don't collide. Best-effort cleanup on EXIT — does NOT call
# e2e_cleanup_all_workspaces (see
# `feedback_never_run_cluster_cleanup_tests_on_live_platform.md`).
set -euo pipefail
source "$(dirname "$0")/_lib.sh"
PASS=0
FAIL=0
TIMEOUT="${A2A_TIMEOUT:-30}"
gen_uuid() {
if command -v uuidgen >/dev/null 2>&1; then
uuidgen | tr '[:upper:]' '[:lower:]'
else
python3 -c 'import uuid; print(uuid.uuid4())'
fi
}
WS_A="$(gen_uuid)"
WS_B="$(gen_uuid)"
# Per-run scratch dir collected under one trap so every assertion-failure
# path drops the temp files it made (see test_chat_attachments_e2e.sh).
TMPDIR_E2E=$(mktemp -d -t poll-chat-upload-e2e-XXXXXX)
cleanup() {
local rc=$?
curl -s -X DELETE "$BASE/workspaces/$WS_A?confirm=true" >/dev/null 2>&1 || true
curl -s -X DELETE "$BASE/workspaces/$WS_B?confirm=true" >/dev/null 2>&1 || true
rm -rf "$TMPDIR_E2E"
exit $rc
}
trap cleanup EXIT INT TERM
check() {
local desc="$1" expected="$2" actual="$3"
if echo "$actual" | grep -qF -- "$expected"; then
echo "PASS: $desc"
PASS=$((PASS + 1))
else
echo "FAIL: $desc"
echo " expected to contain: $expected"
echo " got: $(echo "$actual" | head -10)"
FAIL=$((FAIL + 1))
fi
}
check_eq() {
local desc="$1" expected="$2" actual="$3"
if [ "$actual" = "$expected" ]; then
echo "PASS: $desc"
PASS=$((PASS + 1))
else
echo "FAIL: $desc"
echo " expected: $expected"
echo " got: $actual"
FAIL=$((FAIL + 1))
fi
}
echo "=== Poll-Mode Chat Upload E2E ==="
echo " base: $BASE"
echo " workspace A: $WS_A"
echo " workspace B: $WS_B"
echo ""
# ---------- Phase 1: register poll-mode workspace ----------
echo "--- Phase 1: Register poll-mode workspace A ---"
REG_A=$(curl -s -X POST "$BASE/registry/register" \
-H "Content-Type: application/json" \
-d "{
\"id\": \"$WS_A\",
\"delivery_mode\": \"poll\",
\"agent_card\": {\"name\": \"poll-chat-upload-test-a\"}
}")
check "register accepts poll mode without URL" '"status":"registered"' "$REG_A"
TOK_A=$(echo "$REG_A" | e2e_extract_token || true)
[ -n "$TOK_A" ] || { echo "FAIL: no auth_token in register response (ws A)"; FAIL=$((FAIL + 1)); exit 1; }
# ---------- Phase 2: multi-file chat upload ----------
echo ""
echo "--- Phase 2: POST /chat/uploads with two files ---"
FILE1="$TMPDIR_E2E/alpha.txt"
FILE2="$TMPDIR_E2E/beta.txt"
EXPECTED1="alpha-secret-$(openssl rand -hex 4)"
EXPECTED2="beta-secret-$(openssl rand -hex 4)"
printf '%s' "$EXPECTED1" > "$FILE1"
printf '%s' "$EXPECTED2" > "$FILE2"
UPLOAD=$(curl -s -X POST "$BASE/workspaces/$WS_A/chat/uploads" \
-H "Authorization: Bearer $TOK_A" \
-F "files=@$FILE1;filename=alpha.txt;type=text/plain" \
-F "files=@$FILE2;filename=beta.txt;type=text/plain" \
-w "\nHTTP_CODE=%{http_code}\n")
UPLOAD_CODE=$(echo "$UPLOAD" | grep -oE 'HTTP_CODE=[0-9]+' | cut -d= -f2)
UPLOAD_BODY=$(echo "$UPLOAD" | sed '/^HTTP_CODE=/,$d')
check_eq "upload returns 200" "200" "$UPLOAD_CODE"
check "upload response has files array" '"files":' "$UPLOAD_BODY"
# Pull file_ids out of the URI in the response. URI shape is
# `platform-pending:<wsid>/<file_id>` — proves the response came from the
# poll-mode branch, not the push-mode internal-ingest branch.
URI1=$(echo "$UPLOAD_BODY" | python3 -c 'import sys,json; d=json.load(sys.stdin); print(d["files"][0]["uri"])')
URI2=$(echo "$UPLOAD_BODY" | python3 -c 'import sys,json; d=json.load(sys.stdin); print(d["files"][1]["uri"])')
check "URI 1 has platform-pending: scheme" "platform-pending:$WS_A/" "$URI1"
check "URI 2 has platform-pending: scheme" "platform-pending:$WS_A/" "$URI2"
FID1="${URI1##*/}"
FID2="${URI2##*/}"
[ -n "$FID1" ] && [ -n "$FID2" ] || { echo "FAIL: could not extract file IDs"; FAIL=$((FAIL + 1)); exit 1; }
echo " file_id 1: $FID1"
echo " file_id 2: $FID2"
# ---------- Phase 3: activity rows visible to the workspace ----------
echo ""
echo "--- Phase 3: /activity shows two chat_upload_receive rows ---"
# activity_logs INSERTs run in a goroutine — give them a moment.
sleep 1
ACT=$(curl -s --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
"$BASE/workspaces/$WS_A/activity?type=a2a_receive&limit=20")
check "activity feed has the alpha file" "$FID1" "$ACT"
check "activity feed has the beta file" "$FID2" "$ACT"
check "activity rows tagged chat_upload_receive" '"method":"chat_upload_receive"' "$ACT"
check "activity rows record alpha mimetype" '"mimeType":"text/plain"' "$ACT"
CHAT_UPLOAD_COUNT=$(echo "$ACT" | python3 -c '
import json, sys
rows = json.load(sys.stdin)
n = sum(1 for r in rows if (r.get("method") or "") == "chat_upload_receive")
print(n)
')
check_eq "exactly two chat_upload_receive rows" "2" "$CHAT_UPLOAD_COUNT"
# ---------- Phase 4: GET /pending-uploads/:file_id/content ----------
echo ""
echo "--- Phase 4: Fetch content for each pending upload ---"
GOT1=$(curl -s --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
"$BASE/workspaces/$WS_A/pending-uploads/$FID1/content")
check_eq "alpha bytes round-trip" "$EXPECTED1" "$GOT1"
GOT2=$(curl -s --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
"$BASE/workspaces/$WS_A/pending-uploads/$FID2/content")
check_eq "beta bytes round-trip" "$EXPECTED2" "$GOT2"
# Mimetype + Content-Disposition headers should match what was uploaded.
HEAD1=$(curl -s -D - -o /dev/null --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
"$BASE/workspaces/$WS_A/pending-uploads/$FID1/content")
check "alpha response carries text/plain Content-Type" "Content-Type: text/plain" "$HEAD1"
check "alpha response carries Content-Disposition with filename" 'filename="alpha.txt"' "$HEAD1"
# ---------- Phase 5: idempotent re-fetch (until ack) ----------
echo ""
echo "--- Phase 5: Re-fetch before ack returns the same bytes ---"
RE_GOT1=$(curl -s --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
"$BASE/workspaces/$WS_A/pending-uploads/$FID1/content")
check_eq "re-fetch returns same alpha bytes" "$EXPECTED1" "$RE_GOT1"
# ---------- Phase 6: ack each row ----------
echo ""
echo "--- Phase 6: Ack each pending upload ---"
ACK1=$(curl -s -X POST --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
"$BASE/workspaces/$WS_A/pending-uploads/$FID1/ack")
check "alpha ack returns acked:true" '"acked":true' "$ACK1"
ACK2=$(curl -s -X POST --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
"$BASE/workspaces/$WS_A/pending-uploads/$FID2/ack")
check "beta ack returns acked:true" '"acked":true' "$ACK2"
# Re-ack should still 200 (idempotent — the row's gone but the workspace's
# at-least-once intent was already honored, and the second ack hits the
# raced path which also returns 200).
RE_ACK1=$(curl -s -w '\n%{http_code}' -X POST --max-time "$TIMEOUT" \
-H "Authorization: Bearer $TOK_A" \
"$BASE/workspaces/$WS_A/pending-uploads/$FID1/ack")
RE_ACK1_CODE=$(printf '%s' "$RE_ACK1" | tail -n1)
# Acked rows return 404 on Get-before-Ack (the row's still in the table
# but Get filters acked_at IS NULL); workspace would not normally re-ack
# since it already saw the success. Accept both 200 and 404 here so the
# test pins the contract without being brittle on the inner ordering.
case "$RE_ACK1_CODE" in
200|404)
echo "PASS: re-ack returns 200 or 404 ($RE_ACK1_CODE)"
PASS=$((PASS + 1))
;;
*)
echo "FAIL: re-ack returned unexpected $RE_ACK1_CODE"
FAIL=$((FAIL + 1))
;;
esac
# ---------- Phase 7: GET content after ack returns 404 ----------
echo ""
echo "--- Phase 7: Acked file 404s on subsequent fetch ---"
POST_ACK=$(curl -s -w '\n%{http_code}' --max-time "$TIMEOUT" -H "Authorization: Bearer $TOK_A" \
"$BASE/workspaces/$WS_A/pending-uploads/$FID1/content")
POST_ACK_CODE=$(printf '%s' "$POST_ACK" | tail -n1)
check_eq "acked alpha returns HTTP 404" "404" "$POST_ACK_CODE"
# ---------- Phase 8: cross-workspace bleed protection ----------
echo ""
echo "--- Phase 8: Workspace B cannot read workspace A's pending uploads ---"
# Stage a fresh upload on workspace A so we have an UN-acked row to probe.
PROBE_FILE="$TMPDIR_E2E/probe.txt"
printf '%s' "probe-bytes-$(openssl rand -hex 4)" > "$PROBE_FILE"
PROBE_UP=$(curl -s -X POST "$BASE/workspaces/$WS_A/chat/uploads" \
-H "Authorization: Bearer $TOK_A" \
-F "files=@$PROBE_FILE;filename=probe.txt;type=text/plain")
PROBE_FID=$(echo "$PROBE_UP" | python3 -c 'import sys,json; d=json.load(sys.stdin); print(d["files"][0]["uri"].split("/")[-1])')
[ -n "$PROBE_FID" ] || { echo "FAIL: probe upload returned no file_id"; FAIL=$((FAIL + 1)); exit 1; }
# Register a SECOND poll-mode workspace and capture its bearer.
REG_B=$(curl -s -X POST "$BASE/registry/register" \
-H "Content-Type: application/json" \
-d "{
\"id\": \"$WS_B\",
\"delivery_mode\": \"poll\",
\"agent_card\": {\"name\": \"poll-chat-upload-test-b\"}
}")
check "second workspace registers" '"status":"registered"' "$REG_B"
TOK_B=$(echo "$REG_B" | e2e_extract_token || true)
[ -n "$TOK_B" ] || { echo "FAIL: no auth_token (ws B)"; FAIL=$((FAIL + 1)); exit 1; }
# B's bearer hitting B's URL with A's file_id → 404 (handler checks the row's
# workspace_id matches the URL :id, not the bearer's workspace).
CROSS_RESP=$(curl -s -w '\n%{http_code}' --max-time "$TIMEOUT" \
-H "Authorization: Bearer $TOK_B" \
"$BASE/workspaces/$WS_B/pending-uploads/$PROBE_FID/content")
CROSS_CODE=$(printf '%s' "$CROSS_RESP" | tail -n1)
check_eq "B's URL with A's file_id returns 404" "404" "$CROSS_CODE"
# B's bearer hitting A's URL → 401 (wsAuth pins bearer to :id). This is the
# strictest cross-workspace check: a presented-but-wrong bearer is rejected
# in EVERY platform posture (dev-mode fail-open only triggers when no bearer
# is presented at all — invalid tokens always 401).
WRONG_BEARER=$(curl -s -w '\n%{http_code}' --max-time "$TIMEOUT" \
-H "Authorization: Bearer $TOK_B" \
"$BASE/workspaces/$WS_A/pending-uploads/$PROBE_FID/content")
WRONG_CODE=$(printf '%s' "$WRONG_BEARER" | tail -n1)
check_eq "B's bearer on A's URL returns 401" "401" "$WRONG_CODE"
# NB: a fully bearerless request to /pending-uploads/:fid/content returns
# 401 ONLY when the platform has MOLECULE_ENV != development (production /
# staging). On local-dev with MOLECULE_ENV=development the wsauth middleware
# fail-opens for bearerless requests so the canvas at :3000 can talk to the
# platform at :8080 without per-call token plumbing — see middleware/
# devmode.go. The strict bearerless-401 contract is covered by the wsauth
# unit + middleware tests; we don't reassert it here because the result
# depends on platform posture, not the poll-mode upload contract.
# ---------- Phase 9: invalid file_id rejected at the URL parser ----------
echo ""
echo "--- Phase 9: Invalid file_id returns 400 ---"
BAD_FID=$(curl -s -w '\n%{http_code}' --max-time "$TIMEOUT" \
-H "Authorization: Bearer $TOK_A" \
"$BASE/workspaces/$WS_A/pending-uploads/not-a-uuid/content")
BAD_FID_CODE=$(printf '%s' "$BAD_FID" | tail -n1)
check_eq "invalid file_id UUID returns 400" "400" "$BAD_FID_CODE"
# ---------- Results ----------
echo ""
echo "=== Results: $PASS passed, $FAIL failed ==="
[ "$FAIL" -eq 0 ]

View File

@ -94,6 +94,13 @@ services:
CP_UPSTREAM_URL: "http://cp-stub:9090"
RATE_LIMIT: "1000"
CANVAS_PROXY_URL: "http://localhost:3000"
# Memory v2 sidecar (PR #2906) bundles the plugin into the
# tenant image and starts it before the main server. The plugin
# runs `CREATE EXTENSION vector` on first boot, which fails on
# the harness's plain postgres:15-alpine (no pgvector). The
# harness doesn't exercise memory features, so disable the
# sidecar via the entrypoint's documented escape hatch.
MEMORY_PLUGIN_DISABLE: "1"
networks: [harness-net]
healthcheck:
test: ["CMD-SHELL", "wget -q -O- http://localhost:8080/health || exit 1"]
@ -142,6 +149,13 @@ services:
CP_UPSTREAM_URL: "http://cp-stub:9090"
RATE_LIMIT: "1000"
CANVAS_PROXY_URL: "http://localhost:3000"
# Memory v2 sidecar (PR #2906) bundles the plugin into the
# tenant image and starts it before the main server. The plugin
# runs `CREATE EXTENSION vector` on first boot, which fails on
# the harness's plain postgres:15-alpine (no pgvector). The
# harness doesn't exercise memory features, so disable the
# sidecar via the entrypoint's documented escape hatch.
MEMORY_PLUGIN_DISABLE: "1"
networks: [harness-net]
healthcheck:
test: ["CMD-SHELL", "wget -q -O- http://localhost:8080/health || exit 1"]

View File

@ -21,6 +21,14 @@ ARG GIT_SHA=dev
RUN CGO_ENABLED=0 GOOS=linux go build \
-ldflags "-X github.com/Molecule-AI/molecule-monorepo/platform/internal/buildinfo.GitSHA=${GIT_SHA}" \
-o /platform ./cmd/server
# Bundle the built-in memory-plugin-postgres binary so an operator can
# activate Memory v2 by setting MEMORY_V2_CUTOVER=true + (default)
# MEMORY_PLUGIN_URL=http://localhost:9100. The entrypoint starts this
# binary in the background; main /platform talks to it over loopback.
# Stays inert until the operator flips the cutover env var.
RUN CGO_ENABLED=0 GOOS=linux go build \
-ldflags "-X github.com/Molecule-AI/molecule-monorepo/platform/internal/buildinfo.GitSHA=${GIT_SHA}" \
-o /memory-plugin ./cmd/memory-plugin-postgres
# Clone templates + plugins at build time from manifest.json
FROM alpine:3.20 AS templates
@ -30,8 +38,9 @@ COPY scripts/clone-manifest.sh /scripts/clone-manifest.sh
RUN chmod +x /scripts/clone-manifest.sh && /scripts/clone-manifest.sh /manifest.json /workspace-configs-templates /org-templates /plugins
FROM alpine:3.20
RUN apk add --no-cache ca-certificates git tzdata
RUN apk add --no-cache ca-certificates git tzdata wget
COPY --from=builder /platform /platform
COPY --from=builder /memory-plugin /memory-plugin
COPY workspace-server/migrations /migrations
COPY --from=templates /workspace-configs-templates /workspace-configs-templates
COPY --from=templates /org-templates /org-templates
@ -41,6 +50,7 @@ RUN addgroup -g 1000 platform && adduser -u 1000 -G platform -s /bin/sh -D platf
EXPOSE 8080
COPY <<'ENTRY' /entrypoint.sh
#!/bin/sh
# Set up docker-socket group (unchanged from pre-sidecar entrypoint).
if [ -S /var/run/docker.sock ]; then
SOCK_GID=$(stat -c '%g' /var/run/docker.sock 2>/dev/null || stat -f '%g' /var/run/docker.sock 2>/dev/null)
if [ -n "$SOCK_GID" ] && [ "$SOCK_GID" != "0" ]; then
@ -50,6 +60,61 @@ if [ -S /var/run/docker.sock ]; then
addgroup platform root 2>/dev/null || true
fi
fi
# Memory v2 sidecar (built-in postgres plugin). Co-located with the
# main server so operators flipping MEMORY_V2_CUTOVER=true don't need
# to provision a separate service.
#
# Spawn-gating: only start the sidecar when the operator has indicated
# they want it — either MEMORY_V2_CUTOVER=true OR MEMORY_PLUGIN_URL set.
# Without that signal, the sidecar adds zero value (the platform's
# wiring.go skips building the client too) but pays a real cost: the
# plugin's first migration runs `CREATE EXTENSION vector`, which fails
# on tenant Postgres without pgvector preinstalled and aborts container
# boot via the 30s health gate. Caught on staging redeploy 2026-05-05.
#
# Env defaults (when sidecar IS spawned):
# MEMORY_PLUGIN_DATABASE_URL = $DATABASE_URL (share existing Postgres;
# plugin's `memory_namespaces` / `memory_records` tables coexist
# with `agent_memories` and the rest of the platform schema —
# no conflicts. Operator can override with a separate URL.)
# MEMORY_PLUGIN_LISTEN_ADDR = 127.0.0.1:9100
#
# Set MEMORY_PLUGIN_DISABLE=1 to force-skip the sidecar even with
# cutover env set (e.g. running the plugin externally on a separate host).
memory_plugin_wanted=""
if [ "$MEMORY_V2_CUTOVER" = "true" ] || [ -n "$MEMORY_PLUGIN_URL" ]; then
memory_plugin_wanted=1
fi
if [ -z "$MEMORY_PLUGIN_DISABLE" ] && [ -n "$memory_plugin_wanted" ] && [ -n "$DATABASE_URL" ]; then
: "${MEMORY_PLUGIN_DATABASE_URL:=$DATABASE_URL}"
: "${MEMORY_PLUGIN_LISTEN_ADDR:=:9100}"
export MEMORY_PLUGIN_DATABASE_URL MEMORY_PLUGIN_LISTEN_ADDR
echo "memory-plugin: starting sidecar on $MEMORY_PLUGIN_LISTEN_ADDR" >&2
# Drop privs to the platform user — the plugin doesn't need root and
# runs unprivileged elsewhere (tenant image already starts as canvas).
su-exec platform /memory-plugin &
MEMORY_PLUGIN_PID=$!
# Wait up to 30s for the plugin's /v1/health to return 200. Boot
# failure here is fatal — better to crash-loop than to silently
# serve cutover traffic against a dead plugin.
health_port=${MEMORY_PLUGIN_LISTEN_ADDR#:}
ready=0
for _ in $(seq 1 30); do
if wget -qO- --timeout=2 "http://localhost:${health_port}/v1/health" >/dev/null 2>&1; then
ready=1
break
fi
sleep 1
done
if [ "$ready" != "1" ]; then
echo "memory-plugin: ❌ /v1/health never returned 200 after 30s — aborting boot. Check that DATABASE_URL is reachable, has the pgvector extension, and the plugin's migrations applied." >&2
kill "$MEMORY_PLUGIN_PID" 2>/dev/null || true
exit 1
fi
echo "memory-plugin: ✅ sidecar healthy on :$health_port" >&2
fi
exec su-exec platform /platform "$@"
ENTRY
RUN chmod +x /entrypoint.sh && apk add --no-cache su-exec

View File

@ -34,6 +34,13 @@ ARG GIT_SHA=dev
RUN CGO_ENABLED=0 GOOS=linux go build \
-ldflags "-X github.com/Molecule-AI/molecule-monorepo/platform/internal/buildinfo.GitSHA=${GIT_SHA}" \
-o /platform ./cmd/server
# Memory v2 sidecar binary (Memory v2 #2728). Bundled so an operator
# can activate cutover by flipping MEMORY_V2_CUTOVER=true without
# provisioning a separate service. See entrypoint-tenant.sh for the
# launch logic.
RUN CGO_ENABLED=0 GOOS=linux go build \
-ldflags "-X github.com/Molecule-AI/molecule-monorepo/platform/internal/buildinfo.GitSHA=${GIT_SHA}" \
-o /memory-plugin ./cmd/memory-plugin-postgres
# ── Stage 2: Canvas Next.js standalone ────────────────────────────────
FROM node:20-alpine AS canvas-builder
@ -74,8 +81,9 @@ RUN deluser --remove-home node 2>/dev/null || true; \
delgroup node 2>/dev/null || true; \
addgroup -g 1000 canvas && adduser -u 1000 -G canvas -s /bin/sh -D canvas
# Go platform binary
# Go platform binary + Memory v2 sidecar
COPY --from=go-builder /platform /platform
COPY --from=go-builder /memory-plugin /memory-plugin
COPY workspace-server/migrations /migrations
# Templates + plugins (cloned from GitHub in stage 3)
@ -91,7 +99,7 @@ COPY --from=canvas-builder /canvas/public ./public
COPY workspace-server/entrypoint-tenant.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh && \
chown -R canvas:canvas /canvas /platform /migrations
chown -R canvas:canvas /canvas /platform /memory-plugin /migrations
EXPOSE 8080
# entrypoint.sh starts as root to fix volume perms, then drops to

View File

@ -0,0 +1,50 @@
package main
import (
"strings"
"testing"
)
// TestLoadConfig_DefaultListenAddrIsLoopback pins the default-bind contract.
//
// Why this matters: with the prior `:9100` default, the plugin listened on
// every interface. Inside the container it didn't matter (no host port
// mapping today), but a future change that publishes 9100 OR a cross-host
// sidecar deploy would have exposed an unauth'd memory store. Loopback by
// default is the least-privilege baseline; operators with a multi-host
// topology override via MEMORY_PLUGIN_LISTEN_ADDR.
func TestLoadConfig_DefaultListenAddrIsLoopback(t *testing.T) {
t.Setenv("MEMORY_PLUGIN_DATABASE_URL", "postgres://stub")
t.Setenv("MEMORY_PLUGIN_LISTEN_ADDR", "")
cfg, err := loadConfig()
if err != nil {
t.Fatalf("loadConfig: %v", err)
}
if !strings.HasPrefix(cfg.ListenAddr, "127.0.0.1:") {
t.Errorf("default ListenAddr must bind loopback-only, got %q "+
"(security regression — would expose plugin on every interface)",
cfg.ListenAddr)
}
}
func TestLoadConfig_ListenAddrEnvOverride(t *testing.T) {
t.Setenv("MEMORY_PLUGIN_DATABASE_URL", "postgres://stub")
t.Setenv("MEMORY_PLUGIN_LISTEN_ADDR", ":9100")
cfg, err := loadConfig()
if err != nil {
t.Fatalf("loadConfig: %v", err)
}
if cfg.ListenAddr != ":9100" {
t.Errorf("env override ignored: want :9100, got %q", cfg.ListenAddr)
}
}
func TestLoadConfig_MissingDatabaseURL(t *testing.T) {
t.Setenv("MEMORY_PLUGIN_DATABASE_URL", "")
if _, err := loadConfig(); err == nil {
t.Fatal("loadConfig must error when MEMORY_PLUGIN_DATABASE_URL is empty")
}
}

View File

@ -10,6 +10,7 @@ package main
import (
"context"
"database/sql"
"embed"
"errors"
"fmt"
"log"
@ -17,6 +18,7 @@ import (
"net/http"
"os"
"os/signal"
"sort"
"strings"
"syscall"
"time"
@ -26,12 +28,28 @@ import (
"github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/pgplugin"
)
// migrationsFS bundles the .up.sql files into the binary at build time
// so the prebuilt image doesn't need the source tree at runtime. The
// prior `os.ReadDir("cmd/memory-plugin-postgres/migrations")` path
// only resolved during `go test` from the repo root — in the published
// image the path didn't exist and boot failed after the 30s health gate
// (caught on staging redeploy 2026-05-05 after PR #2906).
//
//go:embed migrations/*.up.sql
var migrationsFS embed.FS
const (
envDatabaseURL = "MEMORY_PLUGIN_DATABASE_URL"
envListenAddr = "MEMORY_PLUGIN_LISTEN_ADDR"
envSkipMigrate = "MEMORY_PLUGIN_SKIP_MIGRATE"
defaultListenAddr = ":9100"
// Loopback-only by default (defense in depth). The platform talks to
// the plugin over `http://localhost:9100` from the same container, so
// binding to all interfaces would only widen the reachable surface
// without enabling any in-design caller. Operators running the plugin
// on a separate host override via MEMORY_PLUGIN_LISTEN_ADDR=:9100 (or
// some other interface).
defaultListenAddr = "127.0.0.1:9100"
)
func main() {
@ -143,32 +161,71 @@ func openDB(databaseURL string) (*sql.DB, error) {
return db, nil
}
// runMigrations applies the schema migrations bundled at
// cmd/memory-plugin-postgres/migrations/. Idempotent on repeat boot.
// runMigrations applies the schema migrations bundled into the binary
// via go:embed (see migrationsFS at the top of this file). Idempotent
// on repeat boot — every migration file uses CREATE … IF NOT EXISTS.
//
// Implementation note: rather than embedding the full migrate engine,
// we read the migration files at boot from a known relative path. The
// down migrations are deliberately NOT applied here — that's a manual
// operator action. This keeps the binary tiny and avoids dragging in
// golang-migrate's drivers.
// The down migrations are deliberately NOT applied here — that's a
// manual operator action. This keeps the binary tiny and avoids
// dragging in golang-migrate's drivers.
//
// MEMORY_PLUGIN_MIGRATIONS_DIR (filesystem path) is honored as an
// override for operators who need to ship custom migrations alongside
// the binary without rebuilding. When unset (the common case) we read
// from the embedded FS.
func runMigrations(db *sql.DB) error {
// Find the migrations directory. In `go run` mode it's relative
// to the cmd dir; in the prebuilt binary case it's expected next
// to the binary OR via env var override.
dir := os.Getenv("MEMORY_PLUGIN_MIGRATIONS_DIR")
if dir == "" {
// Best-effort: try the cwd-relative path that works for `go test`.
dir = "cmd/memory-plugin-postgres/migrations"
if dir := strings.TrimSpace(os.Getenv("MEMORY_PLUGIN_MIGRATIONS_DIR")); dir != "" {
return runMigrationsFromDisk(db, dir)
}
entries, err := os.ReadDir(dir)
return runMigrationsFromEmbed(db)
}
// runMigrationsFromEmbed applies the *.up.sql files bundled into the
// binary at build time. Order is alphabetical (matches the on-disk
// behavior of os.ReadDir on Linux for the same set of names).
func runMigrationsFromEmbed(db *sql.DB) error {
entries, err := migrationsFS.ReadDir("migrations")
if err != nil {
return fmt.Errorf("read migrations dir %q: %w", dir, err)
return fmt.Errorf("read embedded migrations: %w", err)
}
names := make([]string, 0, len(entries))
for _, e := range entries {
if e.IsDir() || !strings.HasSuffix(e.Name(), ".up.sql") {
continue
}
path := dir + "/" + e.Name()
names = append(names, e.Name())
}
sort.Strings(names)
for _, name := range names {
data, err := migrationsFS.ReadFile("migrations/" + name)
if err != nil {
return fmt.Errorf("read embedded %q: %w", name, err)
}
if _, err := db.Exec(string(data)); err != nil {
return fmt.Errorf("apply %q: %w", name, err)
}
log.Printf("applied embedded migration %s", name)
}
return nil
}
// runMigrationsFromDisk preserves the legacy filesystem-path mode for
// operator-supplied custom migrations.
func runMigrationsFromDisk(db *sql.DB, dir string) error {
entries, err := os.ReadDir(dir)
if err != nil {
return fmt.Errorf("read migrations dir %q: %w", dir, err)
}
names := make([]string, 0, len(entries))
for _, e := range entries {
if e.IsDir() || !strings.HasSuffix(e.Name(), ".up.sql") {
continue
}
names = append(names, e.Name())
}
sort.Strings(names)
for _, name := range names {
path := dir + "/" + name
data, err := os.ReadFile(path)
if err != nil {
return fmt.Errorf("read %q: %w", path, err)
@ -176,7 +233,7 @@ func runMigrations(db *sql.DB) error {
if _, err := db.Exec(string(data)); err != nil {
return fmt.Errorf("apply %q: %w", path, err)
}
log.Printf("applied migration %s", e.Name())
log.Printf("applied disk migration %s (from %s)", name, dir)
}
return nil
}

View File

@ -0,0 +1,72 @@
package main
import (
"strings"
"testing"
)
// TestMigrationsEmbedded_ContainsCreateTable pins that the migrations
// are bundled into the binary at build time, NOT loaded from a
// filesystem path that doesn't exist at runtime in the published image.
//
// Pre-fix: PR #2906 shipped the binary without the migrations dir;
// `os.ReadDir("cmd/memory-plugin-postgres/migrations")` errored on every
// tenant boot, the 30s health gate aborted the container, and the
// staging redeploy fleet job marked all tenants as failed. Embedding
// the migrations into the binary removes the runtime path entirely.
func TestMigrationsEmbedded_ContainsCreateTable(t *testing.T) {
entries, err := migrationsFS.ReadDir("migrations")
if err != nil {
t.Fatalf("embedded migrations dir unreadable: %v", err)
}
if len(entries) == 0 {
t.Fatal("embedded migrations dir is empty — go:embed pattern matched no files")
}
var seenUp bool
for _, e := range entries {
if e.IsDir() || !strings.HasSuffix(e.Name(), ".up.sql") {
continue
}
seenUp = true
data, err := migrationsFS.ReadFile("migrations/" + e.Name())
if err != nil {
t.Errorf("read embedded %q: %v", e.Name(), err)
continue
}
if !strings.Contains(string(data), "CREATE TABLE") {
t.Errorf("embedded %q has no CREATE TABLE — wrong file embedded?", e.Name())
}
}
if !seenUp {
t.Fatal("no *.up.sql in embedded migrations — runtime would have no schema to apply")
}
}
// TestRunMigrationsFromEmbed_OrderingIsAlphabetic pins that we apply
// migrations in deterministic alphabetical order, not in whatever
// arbitrary order migrationsFS.ReadDir happens to return. With one
// migration today this is moot, but a future second migration ('002_…')
// MUST run after '001_…' or the schema is broken.
//
// We can't easily exercise db.Exec here (no test DB); instead pin the
// sort step on the directory listing itself.
func TestRunMigrationsFromEmbed_OrderingIsAlphabetic(t *testing.T) {
entries, err := migrationsFS.ReadDir("migrations")
if err != nil {
t.Fatalf("embedded migrations dir unreadable: %v", err)
}
var names []string
for _, e := range entries {
if e.IsDir() || !strings.HasSuffix(e.Name(), ".up.sql") {
continue
}
names = append(names, e.Name())
}
for i := 1; i < len(names); i++ {
if names[i-1] > names[i] {
t.Errorf("ReadDir returned non-sorted names; runMigrationsFromEmbed must sort. "+
"Got %q before %q", names[i-1], names[i])
}
}
}

View File

@ -19,6 +19,7 @@ import (
"github.com/Molecule-AI/molecule-monorepo/platform/internal/handlers"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/imagewatch"
memwiring "github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/wiring"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/pendinguploads"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/registry"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/router"
@ -265,6 +266,14 @@ func main() {
})
}
// Pending-uploads GC sweep — deletes acked rows past their retention
// window plus unacked rows past expires_at. Without this the
// pending_uploads table grows unbounded; even with the 24h hard TTL,
// nothing actually deletes a row, just makes it un-fetchable.
go supervised.RunWithRecover(ctx, "pending-uploads-sweeper", func(c context.Context) {
pendinguploads.StartSweeper(c, pendinguploads.NewPostgres(db.DB), 0)
})
// Provision-timeout sweep — flips workspaces that have been stuck in
// status='provisioning' past the timeout window to 'failed' and emits
// WORKSPACE_PROVISION_TIMEOUT. Without this the UI banner is cosmetic

View File

@ -20,6 +20,51 @@ cd /canvas
PORT=3000 HOSTNAME=0.0.0.0 node server.js &
CANVAS_PID=$!
# Memory v2 sidecar (built-in postgres plugin). See Dockerfile entrypoint
# comment for rationale.
#
# Spawn-gating: only start the sidecar when the operator has indicated
# they want it (MEMORY_V2_CUTOVER=true OR MEMORY_PLUGIN_URL set).
# Without that signal, the sidecar adds zero value and risks aborting
# tenant boot via the 30s health gate when the tenant Postgres lacks
# pgvector. Caught on staging redeploy 2026-05-05:
# pq: extension "vector" is not available
#
# Defaults (when sidecar IS spawned): MEMORY_PLUGIN_DATABASE_URL
# falls back to the tenant's DATABASE_URL.
MEMORY_PLUGIN_PID=""
memory_plugin_wanted=""
if [ "$MEMORY_V2_CUTOVER" = "true" ] || [ -n "$MEMORY_PLUGIN_URL" ]; then
memory_plugin_wanted=1
fi
if [ -z "$MEMORY_PLUGIN_DISABLE" ] && [ -n "$memory_plugin_wanted" ] && [ -n "$DATABASE_URL" ]; then
: "${MEMORY_PLUGIN_DATABASE_URL:=$DATABASE_URL}"
: "${MEMORY_PLUGIN_LISTEN_ADDR:=:9100}"
export MEMORY_PLUGIN_DATABASE_URL MEMORY_PLUGIN_LISTEN_ADDR
echo "memory-plugin: starting sidecar on $MEMORY_PLUGIN_LISTEN_ADDR" >&2
/memory-plugin &
MEMORY_PLUGIN_PID=$!
# Wait up to 30s for /v1/health. Boot failure is fatal so a misconfigured
# tenant crash-loops instead of silently serving cutover traffic against
# a dead plugin.
health_port=${MEMORY_PLUGIN_LISTEN_ADDR#:}
ready=0
for _ in $(seq 1 30); do
if wget -qO- --timeout=2 "http://localhost:${health_port}/v1/health" >/dev/null 2>&1; then
ready=1
break
fi
sleep 1
done
if [ "$ready" != "1" ]; then
echo "memory-plugin: ❌ /v1/health never returned 200 after 30s — aborting boot. Check DATABASE_URL reachability + pgvector extension + migrations." >&2
kill "$MEMORY_PLUGIN_PID" 2>/dev/null || true
kill "$CANVAS_PID" 2>/dev/null || true
exit 1
fi
echo "memory-plugin: ✅ sidecar healthy on :$health_port" >&2
fi
# Start Go platform in foreground-ish (we trap signals)
# CANVAS_PROXY_URL tells the platform to proxy unmatched routes to Canvas.
# CONTAINER_BACKEND: empty = Docker (default for self-hosted/local).
@ -29,15 +74,20 @@ cd /
/platform &
PLATFORM_PID=$!
# If either process exits, kill the other
# If any process exits, kill the others
cleanup() {
kill $CANVAS_PID 2>/dev/null || true
kill $PLATFORM_PID 2>/dev/null || true
[ -n "$MEMORY_PLUGIN_PID" ] && kill $MEMORY_PLUGIN_PID 2>/dev/null || true
}
trap cleanup EXIT SIGTERM SIGINT
# Wait for either to exit — whichever exits first triggers cleanup
wait -n $CANVAS_PID $PLATFORM_PID
# Wait for any to exit — whichever exits first triggers cleanup
if [ -n "$MEMORY_PLUGIN_PID" ]; then
wait -n $CANVAS_PID $PLATFORM_PID $MEMORY_PLUGIN_PID
else
wait -n $CANVAS_PID $PLATFORM_PID
fi
EXIT_CODE=$?
cleanup
exit $EXIT_CODE

View File

@ -600,14 +600,21 @@ func (h *ChatFilesHandler) uploadPollMode(c *gin.Context, ctx context.Context, w
return
}
out := make([]uploadedFile, 0, len(headers))
// Phase 1: pre-validate + read every part BEFORE any DB write.
// A multi-file upload must commit all-or-nothing; a per-file
// failure halfway through used to leave rows 1..K-1 in the table
// while the client got a 500 and retried the whole batch — duplicate
// rows, orphan activity rows. Validating up-front + atomic PutBatch
// closes that gap.
type prepped struct {
Sanitized string
Mimetype string
Content []byte
Original string // original (unsanitized) filename for error messages
}
prepReady := make([]prepped, 0, len(headers))
items := make([]pendinguploads.PutItem, 0, len(headers))
for _, fh := range headers {
// Read full content. Per-file cap enforced post-read so an
// oversized file fails with a clean 413 rather than a torn
// stream. The +1 byte ReadAll trick that the Python side
// uses isn't easy through multipart.FileHeader; instead we
// rely on the multipart layer's ContentLength header and
// short-circuit before opening the part.
if fh.Size > pendinguploads.MaxFileBytes {
log.Printf("chat_files uploadPollMode: per-file cap exceeded for %s: %s (%d bytes)",
workspaceID, fh.Filename, fh.Size)
@ -621,45 +628,67 @@ func (h *ChatFilesHandler) uploadPollMode(c *gin.Context, ctx context.Context, w
}
content, err := readMultipartFile(fh)
if err != nil {
log.Printf("chat_files uploadPollMode: read part failed for %s/%s: %v", workspaceID, fh.Filename, err)
log.Printf("chat_files uploadPollMode: read part failed for %s/%s: %v",
workspaceID, fh.Filename, err)
c.JSON(http.StatusBadRequest, gin.H{"error": "could not read file part"})
return
}
sanitized := SanitizeFilename(fh.Filename)
mimetype := fh.Header.Get("Content-Type")
fileID, err := h.pendingUploads.Put(ctx, wsUUID, content, sanitized, mimetype)
if err != nil {
if errors.Is(err, pendinguploads.ErrTooLarge) {
// Belt + suspenders: the size check above already
// caught this, but Storage.Put re-validates so a
// malformed FileHeader can't slip through. 413 with
// the same shape so the client sees one error class.
c.JSON(http.StatusRequestEntityTooLarge, gin.H{
"error": "file exceeds per-file cap",
"filename": fh.Filename,
"size": len(content),
"max": pendinguploads.MaxFileBytes,
})
return
}
log.Printf("chat_files uploadPollMode: storage.Put failed for %s/%s: %v",
workspaceID, sanitized, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "could not stage file"})
// Belt-and-braces post-read cap (multipart.FileHeader.Size can lie
// on some clients that don't set Content-Length per part).
if len(content) > pendinguploads.MaxFileBytes {
log.Printf("chat_files uploadPollMode: per-file cap exceeded post-read for %s: %s (%d bytes)",
workspaceID, fh.Filename, len(content))
c.JSON(http.StatusRequestEntityTooLarge, gin.H{
"error": "file exceeds per-file cap",
"filename": fh.Filename,
"size": len(content),
"max": pendinguploads.MaxFileBytes,
})
return
}
sanitized := SanitizeFilename(fh.Filename)
mimetype := safeMimetype(fh.Header.Get("Content-Type"))
prepReady = append(prepReady, prepped{
Sanitized: sanitized, Mimetype: mimetype, Content: content, Original: fh.Filename,
})
items = append(items, pendinguploads.PutItem{
Content: content, Filename: sanitized, Mimetype: mimetype,
})
}
// Activity row so the workspace's inbox poller picks this up
// on its next cycle. activity_type=a2a_receive (NOT a new
// type) so the existing poll filter
// `?type=a2a_receive` catches it without poll-side changes;
// method=chat_upload_receive is the discriminator the
// workspace's adapter (Phase 2) uses to route to the upload
// fetcher instead of the agent's message handler. Same
// shape as A2A's tasks/send vs message/send method split.
// Phase 2: atomic batch insert. On failure no rows commit.
fileIDs, err := h.pendingUploads.PutBatch(ctx, wsUUID, items)
if err != nil {
if errors.Is(err, pendinguploads.ErrTooLarge) {
// Belt + suspenders: pre-validation above already caught
// this; surface a clean 413 if a malformed FileHeader
// somehow slipped through.
c.JSON(http.StatusRequestEntityTooLarge, gin.H{
"error": "one or more files exceed per-file cap",
"max": pendinguploads.MaxFileBytes,
})
return
}
log.Printf("chat_files uploadPollMode: storage.PutBatch failed for %s: %v",
workspaceID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "could not stage files"})
return
}
// Phase 3: write per-file activity rows and build the response. Activity
// rows are written individually (not part of the same Tx as PutBatch)
// because LogActivity is shared across many handlers and threading the
// Tx through would be a bigger refactor. The trade-off: if an activity
// write fails after the PutBatch commits, the pending_uploads rows
// orphan until the 24h TTL — significantly better than the previous
// "every multi-file upload could orphan" behavior, and the workspace's
// fetcher handles soft-404 cleanly when activity rows reference a row
// the platform later expired.
out := make([]uploadedFile, 0, len(prepReady))
for i, p := range prepReady {
fileID := fileIDs[i]
uri := fmt.Sprintf("platform-pending:%s/%s", workspaceID, fileID)
summary := "chat_upload_receive: " + sanitized
summary := "chat_upload_receive: " + p.Sanitized
method := "chat_upload_receive"
LogActivity(ctx, h.broadcaster, ActivityParams{
WorkspaceID: workspaceID,
@ -669,28 +698,65 @@ func (h *ChatFilesHandler) uploadPollMode(c *gin.Context, ctx context.Context, w
Summary: &summary,
RequestBody: map[string]interface{}{
"file_id": fileID.String(),
"name": sanitized,
"mimeType": mimetype,
"size": len(content),
"name": p.Sanitized,
"mimeType": p.Mimetype,
"size": len(p.Content),
"uri": uri,
},
Status: "ok",
})
log.Printf("chat_files uploadPollMode: staged %s/%s (file_id=%s size=%d mimetype=%q)",
workspaceID, sanitized, fileID, len(content), mimetype)
workspaceID, p.Sanitized, fileID, len(p.Content), p.Mimetype)
out = append(out, uploadedFile{
URI: uri,
Name: sanitized,
Mimetype: mimetype,
Size: int64(len(content)),
Name: p.Sanitized,
Mimetype: p.Mimetype,
Size: int64(len(p.Content)),
})
}
c.JSON(http.StatusOK, gin.H{"files": out})
}
// safeMimetype validates a multipart-supplied Content-Type header and
// returns a sanitized value safe to store + serve back unmodified.
//
// The platform's GET /content handler reflects the stored mimetype as
// the response Content-Type. An attacker-controlled header that
// embedded CR/LF could split the response (header injection); a value
// containing semicolons could carry an unexpected charset parameter
// that confuses a downstream renderer. Strip CR/LF/control chars +
// keep only the type/subtype prefix; reject anything that doesn't
// match a basic `type/subtype` regex by falling back to the safe
// default (application/octet-stream — the workspace-side handler does
// the same fallback).
func safeMimetype(raw string) string {
const fallback = "application/octet-stream"
// Trim parameters (`text/html; charset=utf-8` → `text/html`).
if i := strings.IndexByte(raw, ';'); i >= 0 {
raw = raw[:i]
}
raw = strings.TrimSpace(raw)
if raw == "" {
return ""
}
// Reject if any control char or whitespace is present (header
// injection defense). RFC 7231 mimetype grammar forbids whitespace.
for _, r := range raw {
if r < 0x21 || r > 0x7e {
return fallback
}
}
// Require exactly one slash separating type and subtype.
parts := strings.Split(raw, "/")
if len(parts) != 2 || parts[0] == "" || parts[1] == "" {
return fallback
}
return raw
}
// readMultipartFile reads a multipart part fully into memory. Wraps
// the open + io.ReadAll + close idiom so the call site stays clean,
// and so a future change (chunked reads / hashing) has one place to

View File

@ -67,12 +67,59 @@ func (s *inMemStorage) Put(_ context.Context, ws uuid.UUID, content []byte, file
return id, nil
}
// PutBatch mirrors the production atomic-batch contract: any per-item
// failure leaves the in-memory state unchanged, simulating Tx rollback.
// Pre-validation matches PostgresStorage.PutBatch; oversized items
// return ErrTooLarge before any row is added.
func (s *inMemStorage) PutBatch(_ context.Context, ws uuid.UUID, items []pendinguploads.PutItem) ([]uuid.UUID, error) {
s.mu.Lock()
defer s.mu.Unlock()
if s.putErr != nil {
return nil, s.putErr
}
// Pre-validate so an oversized item rejects the whole batch before
// any state mutation — matches the Tx-rollback semantics.
for _, it := range items {
if len(it.Content) > pendinguploads.MaxFileBytes {
return nil, pendinguploads.ErrTooLarge
}
}
ids := make([]uuid.UUID, 0, len(items))
stagedRows := make(map[uuid.UUID]pendinguploads.Record, len(items))
stagedPuts := make([]putCall, 0, len(items))
for _, it := range items {
id := uuid.New()
stagedRows[id] = pendinguploads.Record{
FileID: id, WorkspaceID: ws, Content: it.Content,
Filename: it.Filename, Mimetype: it.Mimetype,
SizeBytes: int64(len(it.Content)), CreatedAt: time.Now(),
ExpiresAt: time.Now().Add(24 * time.Hour),
}
stagedPuts = append(stagedPuts, putCall{
WorkspaceID: ws, Filename: it.Filename, Mimetype: it.Mimetype, Size: len(it.Content),
})
ids = append(ids, id)
}
for id, r := range stagedRows {
s.rows[id] = r
}
s.puts = append(s.puts, stagedPuts...)
return ids, nil
}
func (s *inMemStorage) Get(context.Context, uuid.UUID) (pendinguploads.Record, error) {
return pendinguploads.Record{}, pendinguploads.ErrNotFound
}
func (s *inMemStorage) MarkFetched(context.Context, uuid.UUID) error { return nil }
func (s *inMemStorage) Ack(context.Context, uuid.UUID) error { return nil }
// Sweep is required by the Storage interface (Phase 3 GC). Not
// exercised by upload-branch tests — the dedicated sweeper_test.go +
// storage_sweep_test.go cover it.
func (s *inMemStorage) Sweep(context.Context, time.Duration) (pendinguploads.SweepResult, error) {
return pendinguploads.SweepResult{}, nil
}
// expectPollDeliveryMode stubs the SELECT delivery_mode lookup that
// uploadPollMode does (separate from the one resolveWorkspaceForwardCreds
// does — this is the new helper introduced for the poll branch).
@ -154,7 +201,7 @@ func TestPollUpload_HappyPath_OneFile_StagesAndLogs(t *testing.T) {
expectActivityInsert(mock)
store := newInMemStorage()
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
WithPendingUploads(store, nil)
body, ct := pollUploadFixture(t, map[string][]byte{"report.pdf": []byte("PDF-bytes")})
@ -212,7 +259,7 @@ func TestPollUpload_MultipleFiles_AllStagedAndLogged(t *testing.T) {
expectActivityInsert(mock)
store := newInMemStorage()
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
WithPendingUploads(store, nil)
body, ct := pollUploadFixture(t, map[string][]byte{
@ -250,7 +297,7 @@ func TestPollUpload_PushModeFallsThroughToForward(t *testing.T) {
// URL empty + mode=push → 503 (no inbound secret check needed).
store := newInMemStorage()
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
WithPendingUploads(store, nil)
body, ct := pollUploadFixture(t, map[string][]byte{"x": []byte("data")})
@ -274,7 +321,7 @@ func TestPollUpload_NotConfigured_FallsThrough(t *testing.T) {
wsID := "33333333-2222-3333-4444-555555555555"
expectURLAndMode(mock, wsID, "", "poll") // resolveWorkspaceForwardCreds emits 422
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
// No WithPendingUploads — pendingUploads is nil.
body, ct := pollUploadFixture(t, map[string][]byte{"x": []byte("data")})
@ -295,7 +342,7 @@ func TestPollUpload_WorkspaceMissing_404(t *testing.T) {
wsID := "44444444-2222-3333-4444-555555555555"
expectPollDeliveryModeMissing(mock, wsID)
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
WithPendingUploads(newInMemStorage(), nil)
body, ct := pollUploadFixture(t, map[string][]byte{"x": []byte("d")})
@ -315,7 +362,7 @@ func TestPollUpload_DeliveryModeLookupDBError_500(t *testing.T) {
mock.ExpectQuery(`SELECT delivery_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).WillReturnError(errors.New("connection lost"))
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
WithPendingUploads(newInMemStorage(), nil)
body, ct := pollUploadFixture(t, map[string][]byte{"x": []byte("d")})
@ -335,7 +382,7 @@ func TestPollUpload_NoFilesField_400(t *testing.T) {
expectPollDeliveryMode(mock, wsID, "poll")
store := newInMemStorage()
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
WithPendingUploads(store, nil)
// Multipart with a non-files field — no actual files.
@ -360,7 +407,7 @@ func TestPollUpload_MalformedMultipart_400(t *testing.T) {
expectPollDeliveryMode(mock, wsID, "poll")
store := newInMemStorage()
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
WithPendingUploads(store, nil)
// Body that doesn't match the boundary in Content-Type.
@ -381,7 +428,7 @@ func TestPollUpload_StorageError_500(t *testing.T) {
store := newInMemStorage()
store.putErr = errors.New("disk full")
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
WithPendingUploads(store, nil)
body, ct := pollUploadFixture(t, map[string][]byte{"x.bin": []byte("data")})
@ -402,7 +449,7 @@ func TestPollUpload_StorageTooLarge_413(t *testing.T) {
store := newInMemStorage()
store.putErr = pendinguploads.ErrTooLarge
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
WithPendingUploads(store, nil)
body, ct := pollUploadFixture(t, map[string][]byte{"x.bin": []byte("data")})
@ -422,7 +469,7 @@ func TestPollUpload_TooManyFiles_400(t *testing.T) {
expectPollDeliveryMode(mock, wsID, "poll")
store := newInMemStorage()
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
WithPendingUploads(store, nil)
// 65 files — over the per-batch cap.
@ -457,7 +504,7 @@ func TestPollUpload_NullDeliveryMode_TreatedAsPush(t *testing.T) {
expectURLAndMode(mock, wsID, "", "")
store := newInMemStorage()
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
WithPendingUploads(store, nil)
body, ct := pollUploadFixture(t, map[string][]byte{"x.bin": []byte("data")})
@ -490,7 +537,7 @@ func TestPollUpload_PerFileCapPreStorage_413(t *testing.T) {
expectPollDeliveryMode(mock, wsID, "poll")
store := newInMemStorage()
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
WithPendingUploads(store, nil)
// 25 MB + 1 byte. Single file, large enough to trip the early
@ -525,7 +572,7 @@ func TestPollUpload_SanitizesFilenameInResponse(t *testing.T) {
expectActivityInsert(mock)
store := newInMemStorage()
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
WithPendingUploads(store, nil)
body, ct := pollUploadFixture(t, map[string][]byte{"hello world!.pdf": []byte("data")})
@ -550,6 +597,120 @@ func TestPollUpload_SanitizesFilenameInResponse(t *testing.T) {
}
}
// TestPollUpload_AtomicRollbackOnSecondFileTooLarge pins the
// transactional contract introduced in phase 5: when one file in a
// multi-file batch fails pre-validation (oversize), NONE of the files
// in the batch land in storage. Previously a per-file Put loop would
// stage rows 1..K-1 before failing on row K, leaving orphan
// pending_uploads + activity rows the client would re-create on retry.
//
// Pinned via inMemStorage's PutBatch (which mirrors PostgresStorage's
// Tx-rollback behavior on a per-item validation failure) — but the
// real atomicity guarantee is the integration test in
// pending_uploads_integration_test.go.
func TestPollUpload_AtomicRollbackOnSecondFileTooLarge(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
wsID := "aaaaaaaa-3333-3333-4444-555555555555"
expectPollDeliveryMode(mock, wsID, "poll")
store := newInMemStorage()
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
WithPendingUploads(store, nil)
// Two files: first OK, second over the per-file cap. Pre-validation
// in uploadPollMode catches it BEFORE any Put — store.puts must
// stay empty. (If the test ever sees len=1, the regression is
// "first file slipped through into storage on a partial-failure
// batch.")
tooBig := bytes.Repeat([]byte{0x42}, pendinguploads.MaxFileBytes+1)
body, ct := pollUploadFixture(t, map[string][]byte{
"ok.txt": []byte("small"),
"huge.bin": tooBig,
})
c, w := makeUploadRequest(t, wsID, body, ct)
h.Upload(c)
if w.Code != http.StatusRequestEntityTooLarge {
t.Errorf("status=%d body=%s, want 413", w.Code, w.Body.String())
}
if len(store.puts) != 0 {
t.Errorf("expected zero Puts on rollback, got %d: %+v", len(store.puts), store.puts)
}
}
// TestPollUpload_AtomicRollbackOnPutBatchError validates that an in-
// flight PutBatch failure (e.g. simulated DB error) leaves zero rows
// — same guarantee as the pre-validation path, but exercises the
// "Tx-Rollback after BEGIN" branch via the fake.
func TestPollUpload_AtomicRollbackOnPutBatchError(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
wsID := "bbbbbbbb-3333-3333-4444-555555555555"
expectPollDeliveryMode(mock, wsID, "poll")
store := newInMemStorage()
store.putErr = errors.New("db down mid-batch")
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
WithPendingUploads(store, nil)
body, ct := pollUploadFixture(t, map[string][]byte{
"a.txt": []byte("aaa"),
"b.txt": []byte("bbb"),
"c.txt": []byte("ccc"),
})
c, w := makeUploadRequest(t, wsID, body, ct)
h.Upload(c)
if w.Code != http.StatusInternalServerError {
t.Errorf("status=%d, want 500", w.Code)
}
if len(store.puts) != 0 {
t.Errorf("expected zero Puts after PutBatch error, got %d", len(store.puts))
}
}
// TestPollUpload_MimetypeWithCRLFInjectionStripped pins the safeMimetype
// hardening: a multipart-supplied Content-Type header with CR/LF is
// rewritten to application/octet-stream so the eventual /content
// response can't be header-split on the wire.
func TestPollUpload_MimetypeWithCRLFInjectionStripped(t *testing.T) {
got := safeMimetype("text/html\r\nX-Injected: pwn")
if got != "application/octet-stream" {
t.Errorf("CRLF mimetype not stripped, got %q", got)
}
got = safeMimetype("image/png\x00")
if got != "application/octet-stream" {
t.Errorf("NUL byte mimetype not stripped, got %q", got)
}
got = safeMimetype("text/plain; charset=utf-8")
if got != "text/plain" {
t.Errorf("parameter not stripped, got %q", got)
}
got = safeMimetype("application/pdf")
if got != "application/pdf" {
t.Errorf("clean mime modified, got %q", got)
}
got = safeMimetype("")
if got != "" {
t.Errorf("empty input should pass through, got %q", got)
}
got = safeMimetype("notamime")
if got != "application/octet-stream" {
t.Errorf("non-type/subtype not coerced, got %q", got)
}
got = safeMimetype("/empty-type")
if got != "application/octet-stream" {
t.Errorf("missing type half not coerced, got %q", got)
}
got = safeMimetype("type/")
if got != "application/octet-stream" {
t.Errorf("missing subtype half not coerced, got %q", got)
}
}
// TestPollUpload_ActivityRowDiscriminator pins the
// activity_type / method shape that the workspace inbox poller depends
// on. The poller filters `GET /workspaces/:id/activity?type=a2a_receive`
@ -573,7 +734,7 @@ func TestPollUpload_ActivityRowDiscriminator(t *testing.T) {
expectActivityInsertWithTypeAndMethod(mock, wsID, "a2a_receive", "chat_upload_receive")
store := newInMemStorage()
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)).
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil)).
WithPendingUploads(store, nil)
body, ct := pollUploadFixture(t, map[string][]byte{"x.pdf": []byte("xx")})

View File

@ -105,7 +105,7 @@ func TestChatUpload_InvalidWorkspaceID(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
c, w := makeUploadRequest(t, "not-a-uuid", &bytes.Buffer{}, "")
h.Upload(c)
@ -122,7 +122,7 @@ func TestChatUpload_WorkspaceNotInDB(t *testing.T) {
wsID := "00000000-0000-0000-0000-000000000099"
expectURLMissing(mock, wsID)
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
body, ct := uploadFixture(t)
c, w := makeUploadRequest(t, wsID, body, ct)
h.Upload(c)
@ -166,7 +166,7 @@ func TestChatUpload_NoInboundSecret_LazyHeal(t *testing.T) {
WithArgs(sqlmock.AnyArg(), wsID).
WillReturnResult(sqlmock.NewResult(0, 1))
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
body, ct := uploadFixture(t)
c, w := makeUploadRequest(t, wsID, body, ct)
h.Upload(c)
@ -203,7 +203,7 @@ func TestChatUpload_NoInboundSecret_LazyHealFailure(t *testing.T) {
WithArgs(sqlmock.AnyArg(), wsID).
WillReturnError(sql.ErrConnDone) // mint fails
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
body, ct := uploadFixture(t)
c, w := makeUploadRequest(t, wsID, body, ct)
h.Upload(c)
@ -231,7 +231,7 @@ func TestChatUpload_NoURL(t *testing.T) {
wsID := "00000000-0000-0000-0000-000000000042"
expectURLAndMode(mock, wsID, "", "push")
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
body, ct := uploadFixture(t)
c, w := makeUploadRequest(t, wsID, body, ct)
h.Upload(c)
@ -256,7 +256,7 @@ func TestChatUpload_PollModeEmptyURL(t *testing.T) {
wsID := "00000000-0000-0000-0000-000000000099"
expectURLAndMode(mock, wsID, "", "poll")
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
body, ct := uploadFixture(t)
c, w := makeUploadRequest(t, wsID, body, ct)
h.Upload(c)
@ -286,7 +286,7 @@ func TestChatUpload_NullModeEmptyURL(t *testing.T) {
wsID := "30ba7f0b-b303-4a20-aefe-3a4a675b8aa4" // user's "mac laptop"
expectURLNullMode(mock, wsID, "")
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
body, ct := uploadFixture(t)
c, w := makeUploadRequest(t, wsID, body, ct)
h.Upload(c)
@ -338,7 +338,7 @@ func TestChatUpload_ForwardsToWorkspace_HappyPath(t *testing.T) {
expectURL(mock, wsID, srv.URL)
expectInboundSecret(mock, wsID, "super-secret-123")
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
body, ct := uploadFixture(t)
c, w := makeUploadRequest(t, wsID, body, ct)
h.Upload(c)
@ -380,7 +380,7 @@ func TestChatUpload_ForwardsErrorStatusUnchanged(t *testing.T) {
expectURL(mock, wsID, srv.URL)
expectInboundSecret(mock, wsID, "tok")
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
body, ct := uploadFixture(t)
c, w := makeUploadRequest(t, wsID, body, ct)
h.Upload(c)
@ -402,7 +402,7 @@ func TestChatUpload_WorkspaceUnreachable(t *testing.T) {
expectURL(mock, wsID, "http://127.0.0.1:1")
expectInboundSecret(mock, wsID, "tok")
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
body, ct := uploadFixture(t)
c, w := makeUploadRequest(t, wsID, body, ct)
h.Upload(c)
@ -418,7 +418,7 @@ func TestChatDownload_InvalidPath(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
cases := []struct {
name, path, wantSubstr string
@ -507,7 +507,7 @@ func TestChatDownload_WorkspaceNotInDB(t *testing.T) {
WithArgs(wsID).
WillReturnError(sql.ErrNoRows)
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
c, w := makeDownloadRequest(t, wsID, "/workspace/foo.txt")
h.Download(c)
@ -533,7 +533,7 @@ func TestChatDownload_NoInboundSecret_LazyHeal(t *testing.T) {
WithArgs(sqlmock.AnyArg(), wsID).
WillReturnResult(sqlmock.NewResult(0, 1))
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
c, w := makeDownloadRequest(t, wsID, "/workspace/foo.txt")
h.Download(c)
@ -559,7 +559,7 @@ func TestChatDownload_NoInboundSecret_LazyHealFailure(t *testing.T) {
WithArgs(sqlmock.AnyArg(), wsID).
WillReturnError(sql.ErrConnDone)
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
c, w := makeDownloadRequest(t, wsID, "/workspace/foo.txt")
h.Download(c)
@ -592,7 +592,7 @@ func TestChatDownload_ForwardsToWorkspace_HappyPath(t *testing.T) {
expectURL(mock, wsID, srv.URL)
expectInboundSecret(mock, wsID, "the-secret")
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
c, w := makeDownloadRequest(t, wsID, "/workspace/report.txt")
h.Download(c)
@ -634,7 +634,7 @@ func TestChatDownload_404FromWorkspacePropagated(t *testing.T) {
expectURL(mock, wsID, srv.URL)
expectInboundSecret(mock, wsID, "tok")
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil, nil))
c, w := makeDownloadRequest(t, wsID, "/workspace/missing.txt")
h.Download(c)

View File

@ -0,0 +1,468 @@
package handlers
// class1_ast_gate_test.go — generic Class 1 leak gate per #2867 PR-A.
//
// What this gate prevents:
// The tenant-hongming leak class — a handler iterates a YAML-derived
// slice (ws.Children, sub_workspaces, etc.) and calls
// `INSERT INTO workspaces` inside the loop body without first
// checking whether a workspace with the same (parent_id, name) is
// already there. Each call to such a handler doubles the tree.
//
// Why this is broader than TestCreateWorkspaceTree_CallsLookupBeforeInsert:
// The existing gate is hard-coded to org_import.go's createWorkspaceTree.
// That catches the specific function that triggered the original
// incident — but a future handler written from scratch in a different
// file would not be covered. This gate walks every production handler
// .go file and applies a structural rule that does not depend on
// function or file names.
//
// The rule (verbatim from #2867 PR-A):
//
// "No handler in handlers/ may iterate a slice (any RangeStmt) AND
// call INSERT INTO workspaces inside the loop body without a
// preceding SELECT id FROM workspaces WHERE name=$1 AND parent_id IS
// NOT DISTINCT FROM $2 in the same function (== a lookupExistingChild
// call, OR an ON CONFLICT clause baked into the same INSERT, OR an
// explicit allowlist annotation)."
//
// Allowlist mechanism: a function whose body contains the exact comment
// string `// class1-gate: idempotent-by-design` is treated as safe.
// Use this only after writing a unit test that pins WHY the function
// is safe. The annotation is intentionally awkward to type — it should
// be rare.
import (
"go/ast"
"go/parser"
"go/token"
"os"
"path/filepath"
"regexp"
"sort"
"strings"
"testing"
)
// reINSERTWorkspaces matches the exact statement shape we care about.
// Tightened (vs bytes.Index "INSERT INTO workspaces") so the audit
// table `workspaces_audit` literal — or any other lookalike — does not
// false-positive trigger this gate. The same regex is used in the
// existing createWorkspaceTree gate (workspaces_insert_allowlist_test.go)
// — keep them in sync if either changes.
var reINSERTWorkspaces = regexp.MustCompile(`(?m)^\s*INSERT INTO workspaces\s*\(`)
// reONCONFLICT matches ON CONFLICT clauses anywhere in the same SQL
// literal. An UPSERT (INSERT ... ON CONFLICT ... DO UPDATE) is
// idempotent by definition, so the gate exempts it.
var reONCONFLICT = regexp.MustCompile(`(?i)\bON CONFLICT\b`)
// gateAllowlistComment is the magic comment a function author writes
// to opt out of this gate. Forces an explicit decision.
const gateAllowlistComment = "// class1-gate: idempotent-by-design"
// preflightCallNames are function names whose presence in a function
// body counts as "did a SELECT-by-(parent_id, name) preflight". Add
// new names here as new preflight helpers are introduced. Keep the
// list TIGHT — any sloppy addition weakens the gate.
var preflightCallNames = map[string]bool{
"lookupExistingChild": true,
}
// TestClass1_NoUnpreflightedInsertInsideRange walks every production
// .go file in this package, parses the AST, and fails the test if any
// FuncDecl violates the rule above.
//
// Failure message must include: file path, function name, line of
// the offending INSERT, line of the enclosing range, and a hint at
// the three escape hatches (preflight call, ON CONFLICT, allowlist
// comment).
func TestClass1_NoUnpreflightedInsertInsideRange(t *testing.T) {
wd, err := os.Getwd()
if err != nil {
t.Fatalf("getwd: %v", err)
}
entries, err := os.ReadDir(wd)
if err != nil {
t.Fatalf("readdir %s: %v", wd, err)
}
type violation struct {
file string
fn string
insertLine int
rangeLine int
}
var violations []violation
scanned := 0
for _, e := range entries {
name := e.Name()
if e.IsDir() || !strings.HasSuffix(name, ".go") {
continue
}
if strings.HasSuffix(name, "_test.go") {
continue
}
path := filepath.Join(wd, name)
src, err := os.ReadFile(path)
if err != nil {
t.Fatalf("read %s: %v", path, err)
}
fset := token.NewFileSet()
file, err := parser.ParseFile(fset, name, src, parser.ParseComments)
if err != nil {
t.Fatalf("parse %s: %v", path, err)
}
scanned++
// Walk every function declaration and apply the rule.
for _, decl := range file.Decls {
fd, ok := decl.(*ast.FuncDecl)
if !ok || fd.Body == nil {
continue
}
// Allowlist: skip if the function body contains the magic
// comment. We check via the source range of the function
// — comments inside the body are in file.Comments and
// must overlap the function's Pos/End range.
if functionHasAllowlistComment(file, fd) {
continue
}
// First pass: locate every INSERT INTO workspaces literal
// in this function. We treat each such literal as a
// candidate violation and try to clear it via the rules.
candidates := findInsertWorkspacesLiterals(fd, src, fset)
if len(candidates) == 0 {
continue
}
// Has the function called a preflight helper? Single
// pass — if any preflight name appears, every INSERT in
// the function is considered preflighted. This is more
// permissive than position-aware (preflight could be
// AFTER the INSERT and still satisfy the gate), but the
// existing org_import.go gate already pins the position
// invariant for createWorkspaceTree, and a function that
// preflights AFTER inserting would fail the position
// gate in a separate test.
hasPreflight := functionCallsAny(fd, preflightCallNames)
for _, c := range candidates {
if c.hasONCONFLICT {
continue
}
if hasPreflight {
continue
}
if c.enclosingRangeLine == 0 {
// INSERT not inside any RangeStmt — single-shot,
// not the bug pattern.
continue
}
violations = append(violations, violation{
file: name,
fn: fd.Name.Name,
insertLine: c.insertLine,
rangeLine: c.enclosingRangeLine,
})
}
}
}
if scanned == 0 {
t.Fatal("scanned 0 .go files — wrong working directory? gate would always pass")
}
if len(violations) > 0 {
// Stable sort so the failure message is deterministic across
// reruns.
sort.Slice(violations, func(i, j int) bool {
if violations[i].file != violations[j].file {
return violations[i].file < violations[j].file
}
return violations[i].insertLine < violations[j].insertLine
})
var b strings.Builder
b.WriteString("Class 1 leak gate (#2867 PR-A) — these handler functions iterate a slice and INSERT INTO workspaces inside the loop body without a (parent_id, name) preflight.\n\n")
b.WriteString("This is the bug shape that triggered the tenant-hongming leak (TeamHandler.Expand re-inserting the entire sub_workspaces tree on every call). To fix any reported violation, choose ONE of:\n")
b.WriteString(" 1. Call h.lookupExistingChild(ctx, name, parentID) before the INSERT and skip the INSERT when it returns existing=true. (preferred)\n")
b.WriteString(" 2. Use INSERT ... ON CONFLICT ... DO ... (idempotent UPSERT, like registry.go).\n")
b.WriteString(" 3. Annotate the function with a `// class1-gate: idempotent-by-design` comment AND a unit test that pins why the function is structurally idempotent. (rare; require code review)\n\n")
b.WriteString("Violations:\n")
for _, v := range violations {
b.WriteString(" - ")
b.WriteString(v.file)
b.WriteString(":")
b.WriteString(itoa(v.insertLine))
b.WriteString(" — function ")
b.WriteString(v.fn)
b.WriteString("() INSERTs inside RangeStmt at line ")
b.WriteString(itoa(v.rangeLine))
b.WriteString("\n")
}
t.Fatal(b.String())
}
}
func itoa(n int) string {
// Avoid strconv import for one call site — keeps the test focused.
if n == 0 {
return "0"
}
neg := n < 0
if neg {
n = -n
}
var buf [20]byte
i := len(buf)
for n > 0 {
i--
buf[i] = byte('0' + n%10)
n /= 10
}
if neg {
i--
buf[i] = '-'
}
return string(buf[i:])
}
// candidateInsert holds the per-INSERT facts needed to decide whether
// the gate fires.
type candidateInsert struct {
insertLine int
hasONCONFLICT bool
enclosingRangeLine int // 0 means not inside any range
}
// findInsertWorkspacesLiterals walks fd's body and returns one
// candidateInsert per INSERT INTO workspaces string literal.
//
// Position-based detection: collect every RangeStmt's body span first,
// then for each INSERT literal check if its position is inside any
// span. ast.Inspect's nil-call ordering does NOT give per-node pop
// semantics, so a stack-based approach against ast.Inspect would
// silently miscount. Position spans are deterministic and easy to
// reason about.
func findInsertWorkspacesLiterals(fd *ast.FuncDecl, src []byte, fset *token.FileSet) []candidateInsert {
var out []candidateInsert
type span struct{ start, end token.Pos }
var ranges []span
ast.Inspect(fd.Body, func(n ast.Node) bool {
rs, ok := n.(*ast.RangeStmt)
if !ok || rs.Body == nil {
return true
}
ranges = append(ranges, span{rs.Body.Lbrace, rs.Body.Rbrace})
return true
})
enclosingRangeLineFor := func(p token.Pos) int {
// Pick the innermost enclosing range — i.e., the one with the
// largest start that still covers p. Innermost is the one
// whose body actually contains the INSERT, which is the line
// most useful in a violation message.
bestStart := token.NoPos
bestLine := 0
for _, s := range ranges {
if p > s.start && p < s.end && s.start > bestStart {
bestStart = s.start
bestLine = fset.Position(s.start).Line
}
}
return bestLine
}
ast.Inspect(fd.Body, func(n ast.Node) bool {
bl, ok := n.(*ast.BasicLit)
if !ok || bl.Kind != token.STRING {
return true
}
// Strip surrounding backticks/quotes — value includes them.
lit := bl.Value
if len(lit) >= 2 {
lit = lit[1 : len(lit)-1]
}
if !reINSERTWorkspaces.MatchString(lit) {
return true
}
out = append(out, candidateInsert{
insertLine: fset.Position(bl.Pos()).Line,
hasONCONFLICT: reONCONFLICT.MatchString(lit),
enclosingRangeLine: enclosingRangeLineFor(bl.Pos()),
})
return true
})
return out
}
// functionCallsAny returns true if any CallExpr in fd's body has a
// function name (either a SelectorExpr Sel.Name or an Ident name)
// matching a key in names.
func functionCallsAny(fd *ast.FuncDecl, names map[string]bool) bool {
found := false
ast.Inspect(fd.Body, func(n ast.Node) bool {
if found {
return false
}
ce, ok := n.(*ast.CallExpr)
if !ok {
return true
}
switch fun := ce.Fun.(type) {
case *ast.Ident:
if names[fun.Name] {
found = true
return false
}
case *ast.SelectorExpr:
if names[fun.Sel.Name] {
found = true
return false
}
}
return true
})
return found
}
// functionHasAllowlistComment returns true if the function body
// (between fd.Body.Lbrace and fd.Body.Rbrace) contains a comment
// equal to gateAllowlistComment.
func functionHasAllowlistComment(file *ast.File, fd *ast.FuncDecl) bool {
if fd.Body == nil {
return false
}
start := fd.Body.Lbrace
end := fd.Body.Rbrace
for _, cg := range file.Comments {
for _, c := range cg.List {
if c.Pos() < start || c.Pos() > end {
continue
}
if strings.TrimSpace(c.Text) == gateAllowlistComment {
return true
}
}
}
return false
}
// TestClass1_GateFiresOnSyntheticBuggySource — proves the gate actually
// catches the bug shape it's named after. Without this, a regression
// to "always pass" would not be noticed until the leak shipped again.
// Per memory feedback_assert_exact_not_substring.md: tighten the test
// + verify it FAILS on old-shape source before merging.
func TestClass1_GateFiresOnSyntheticBuggySource(t *testing.T) {
const buggySrc = `package handlers
import "context"
type fakeDB struct{}
func (fakeDB) ExecContext(ctx context.Context, sql string, args ...interface{}) {}
func buggyExpand(db fakeDB, ctx context.Context, children []string) {
for _, child := range children {
// Bug shape: INSERT inside the range body, no preflight.
db.ExecContext(ctx, ` + "`INSERT INTO workspaces (id, name) VALUES ($1, $2)`" + `, "x", child)
}
}
`
fset := token.NewFileSet()
file, err := parser.ParseFile(fset, "buggy.go", buggySrc, parser.ParseComments)
if err != nil {
t.Fatalf("parse synthetic source: %v", err)
}
for _, decl := range file.Decls {
fd, ok := decl.(*ast.FuncDecl)
if !ok || fd.Name.Name != "buggyExpand" {
continue
}
candidates := findInsertWorkspacesLiterals(fd, []byte(buggySrc), fset)
if len(candidates) != 1 {
t.Fatalf("expected 1 INSERT literal, got %d", len(candidates))
}
c := candidates[0]
if c.enclosingRangeLine == 0 {
t.Errorf("synthetic INSERT inside `for _, child := range` should be detected as enclosed by range, got enclosingRangeLine=0 — gate would miss the bug shape")
}
if c.hasONCONFLICT {
t.Errorf("synthetic INSERT has no ON CONFLICT, gate falsely treated it as idempotent")
}
if functionCallsAny(fd, preflightCallNames) {
t.Errorf("synthetic function does not call lookupExistingChild — gate falsely treated it as preflighted")
}
// All three guards say the gate WOULD fire. Pass.
return
}
t.Fatal("buggyExpand FuncDecl not found in synthetic source")
}
// TestClass1_GateAllowsONCONFLICT — pins that an INSERT with ON
// CONFLICT inside a range body is NOT flagged. registry.go's
// upsert pattern is the prod example.
func TestClass1_GateAllowsONCONFLICT(t *testing.T) {
const safeSrc = `package handlers
import "context"
type fakeDB struct{}
func (fakeDB) ExecContext(ctx context.Context, sql string, args ...interface{}) {}
func upsertLoop(db fakeDB, ctx context.Context, children []string) {
for _, child := range children {
db.ExecContext(ctx, ` + "`INSERT INTO workspaces (id, name) VALUES ($1, $2) ON CONFLICT (id) DO UPDATE SET name = $2`" + `, "x", child)
}
}
`
fset := token.NewFileSet()
file, _ := parser.ParseFile(fset, "safe.go", safeSrc, parser.ParseComments)
for _, decl := range file.Decls {
fd, ok := decl.(*ast.FuncDecl)
if !ok || fd.Name.Name != "upsertLoop" {
continue
}
candidates := findInsertWorkspacesLiterals(fd, []byte(safeSrc), fset)
if len(candidates) != 1 {
t.Fatalf("expected 1 candidate, got %d", len(candidates))
}
if !candidates[0].hasONCONFLICT {
t.Errorf("ON CONFLICT clause should be detected, was missed — gate would falsely flag idempotent UPSERTs")
}
}
}
// TestClass1_GateAllowsAllowlistAnnotation — pins the escape hatch
// works. Annotated functions are skipped at the FuncDecl level.
func TestClass1_GateAllowsAllowlistAnnotation(t *testing.T) {
const annotatedSrc = `package handlers
import "context"
type fakeDB struct{}
func (fakeDB) ExecContext(ctx context.Context, sql string, args ...interface{}) {}
func intentionallyUnpreflighted(db fakeDB, ctx context.Context, children []string) {
// class1-gate: idempotent-by-design
for _, child := range children {
db.ExecContext(ctx, ` + "`INSERT INTO workspaces (id, name) VALUES ($1, $2)`" + `, "x", child)
}
}
`
fset := token.NewFileSet()
file, _ := parser.ParseFile(fset, "annotated.go", annotatedSrc, parser.ParseComments)
for _, decl := range file.Decls {
fd, ok := decl.(*ast.FuncDecl)
if !ok || fd.Name.Name != "intentionallyUnpreflighted" {
continue
}
if !functionHasAllowlistComment(file, fd) {
t.Error("allowlist comment should be detected for the intentionallyUnpreflighted function — escape hatch not working")
}
}
}

View File

@ -7,6 +7,7 @@ import (
"context"
"database/sql"
"encoding/json"
"errors"
"fmt"
"log"
"os"
@ -21,6 +22,7 @@ import (
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provlog"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/scheduler"
"github.com/google/uuid"
)
@ -61,10 +63,33 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
tier = defaults.Tier
}
if tier == 0 {
tier = 2
// Resolved via the same DefaultTier helper Create + Templates
// use (#2910 PR-E). SaaS → T4 (one container per sibling EC2,
// no neighbour to protect from), self-hosted → T3. Pre-#2910
// this path returned T2 on self-hosted, asymmetric with
// workspace.go's T3 — undocumented drift. Lifting to
// DefaultTier collapses both call sites onto one source of
// truth so a future tier-default change sweeps every entry
// point at once. Templates that want a different floor still
// declare `tier:` in config.yaml or `defaults.tier` in
// org.yaml.
if h.workspace != nil {
tier = h.workspace.DefaultTier()
} else {
tier = 3
}
}
ctxLookup := context.Background()
// 5s timeout bounds the lookup independently of any HTTP request
// context. createWorkspaceTree runs in goroutines spawned from the
// /org/import handler, so plumbing the request context here would
// cascade-cancel into provisionWorkspaceAuto and abort in-flight
// EC2 provisioning if the client disconnected mid-import — that's
// the wrong behaviour. A short bounded timeout protects the
// per-row SELECT against a wedged DB without taking the
// drop-everything-on-disconnect tradeoff.
ctxLookup, cancelLookup := context.WithTimeout(context.Background(), 5*time.Second)
defer cancelLookup()
// Idempotency: if a workspace with the same (parent_id, name) already
// exists, skip the INSERT + canvas_layouts + broadcast + provisioning.
// This is what makes /org/import safe to call multiple times — the
@ -76,12 +101,31 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
// (parent exists, some children missing) backfill the missing children
// instead of either no-op'ing the whole subtree or duplicating the
// existing children.
//
// /org/import is ADDITIVE-ONLY, never destructive. Children present
// in the existing tree but absent from the new template are
// preserved (no DELETE on diff). Skip-path also does NOT propagate
// updates to existing nodes — a re-import that adds an
// initial_memory or schedule to an existing workspace is silently
// dropped (the function bypasses seedInitialMemories, schedule SQL,
// channel config for skipped rows). To force-update an existing
// tree, delete and re-import or use a future /org/sync route.
existingID, existing, lookupErr := h.lookupExistingChild(ctxLookup, ws.Name, parentID)
if lookupErr != nil {
return fmt.Errorf("idempotency check for %s: %w", ws.Name, lookupErr)
}
if existing {
log.Printf("Org import: %q already exists (id=%s) — skipping create+provision, recursing into children for partial-match", ws.Name, existingID)
parentRef := ""
if parentID != nil {
parentRef = *parentID
}
provlog.Event("provision.skip_existing", map[string]any{
"name": ws.Name,
"existing_id": existingID,
"parent_id": parentRef,
"tier": tier,
})
*results = append(*results, map[string]interface{}{
"id": existingID,
"name": ws.Name,
@ -580,6 +624,12 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
//
// On sql.ErrNoRows: returns ("", false, nil) — caller should INSERT.
// On a real DB error: returns ("", false, err) — caller propagates.
//
// errors.Is is wrap-safe — a future caller wrapping the error
// (database/sql can wrap driver errors with %w in some setups) would
// silently break a `err == sql.ErrNoRows` equality check, causing the
// no-rows path to fall through to the "real DB error" branch and
// abort the import. errors.Is unwraps.
func (h *OrgHandler) lookupExistingChild(ctx context.Context, name string, parentID *string) (string, bool, error) {
var existingID string
err := db.DB.QueryRowContext(ctx, `
@ -589,7 +639,7 @@ func (h *OrgHandler) lookupExistingChild(ctx context.Context, name string, paren
AND status != 'removed'
LIMIT 1
`, name, parentID).Scan(&existingID)
if err == sql.ErrNoRows {
if errors.Is(err, sql.ErrNoRows) {
return "", false, nil
}
if err != nil {

View File

@ -1,11 +1,17 @@
package handlers
import (
"bytes"
"context"
"database/sql"
"errors"
"fmt"
"go/ast"
"go/parser"
"go/token"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
"testing"
@ -119,6 +125,90 @@ func TestLookupExistingChild_DBError_Propagates(t *testing.T) {
}
}
// TestLookupExistingChild_WrappedNoRows_TreatedAsNotFound — pins the
// wrap-safety of the errors.Is(err, sql.ErrNoRows) check. The previous
// `err == sql.ErrNoRows` equality would fall through to the
// "real DB error" branch on a wrapped no-rows error, aborting the
// import for what is in fact the no-rows happy path. driver/sql
// wrapping is currently a non-issue but a future driver change or a
// caller that wraps the result via fmt.Errorf("…: %w", err) would
// silently break the equality check. errors.Is unwraps.
func TestLookupExistingChild_WrappedNoRows_TreatedAsNotFound(t *testing.T) {
mock := setupTestDB(t)
parent := "parent-1"
wrapped := fmt.Errorf("driver-wrapped: %w", sql.ErrNoRows)
mock.ExpectQuery(`SELECT id FROM workspaces`).
WithArgs("Alpha", &parent).
WillReturnError(wrapped)
h := &OrgHandler{}
id, found, err := h.lookupExistingChild(context.Background(), "Alpha", &parent)
if err != nil {
t.Fatalf("expected wrapped no-rows to be treated as not-found (err=nil), got: %v", err)
}
if found {
t.Errorf("expected found=false on wrapped no-rows, got found=true")
}
if id != "" {
t.Errorf("expected empty id on wrapped no-rows, got %q", id)
}
}
// workspacesInsertRE matches a SQL literal that begins (after optional
// leading whitespace) with `INSERT INTO workspaces` followed by `(` —
// requiring the open-paren rules out lookalikes like
// `INSERT INTO workspaces_audit`, `INSERT INTO workspace_secrets`,
// `INSERT INTO workspace_channels`, `INSERT INTO canvas_layouts`. The
// previous bytes.Index gate accepted `workspaces_audit` as a prefix
// match — see RFC #2872 Important-1 for the silent-false-pass shape.
var workspacesInsertRE = regexp.MustCompile(`(?s)^\s*INSERT\s+INTO\s+workspaces\s*\(`)
// findLookupAndWorkspacesInsertPos walks the AST of `src` and returns
// the source positions of (a) the first call to `lookupExistingChild`
// and (b) the first CallExpr whose argument list contains a STRING
// BasicLit matching workspacesInsertRE. Either may be token.NoPos if
// not found.
//
// Extracted as a helper so the gate logic can be exercised against
// synthetic source — TestGate_FailsWhenLookupAfterInsert below proves
// the gate actually catches the bug shape, not just the happy path.
func findLookupAndWorkspacesInsertPos(t *testing.T, fname string, src []byte) (lookupPos, insertPos token.Pos, fset *token.FileSet) {
t.Helper()
fset = token.NewFileSet()
file, err := parser.ParseFile(fset, fname, src, parser.ParseComments)
if err != nil {
t.Fatalf("parse %s: %v", fname, err)
}
lookupPos, insertPos = token.NoPos, token.NoPos
ast.Inspect(file, func(n ast.Node) bool {
call, ok := n.(*ast.CallExpr)
if !ok {
return true
}
if sel, ok := call.Fun.(*ast.SelectorExpr); ok {
if sel.Sel.Name == "lookupExistingChild" && lookupPos == token.NoPos {
lookupPos = call.Pos()
}
}
for _, arg := range call.Args {
lit, ok := arg.(*ast.BasicLit)
if !ok || lit.Kind != token.STRING {
continue
}
raw := lit.Value
if unq, err := strconv.Unquote(raw); err == nil {
raw = unq
}
if workspacesInsertRE.MatchString(raw) && insertPos == token.NoPos {
insertPos = call.Pos()
}
}
return true
})
return
}
// Source-level guard — pins that org_import.go calls
// h.lookupExistingChild BEFORE its INSERT INTO workspaces.
//
@ -126,6 +216,11 @@ func TestLookupExistingChild_DBError_Propagates(t *testing.T) {
// (idempotency check before INSERT), not just function names. If a
// future refactor reintroduces the un-checked INSERT (the original
// bug shape that leaked 72 workspaces in 4 days), this test fails.
//
// AST-walk implementation closes the silent-false-pass mode that the
// previous bytes.Index gate had — see workspacesInsertRE comment for
// the failure mode (workspaces_audit / workspace_secrets / etc.
// shadowing the real target via prefix match).
func TestCreateWorkspaceTree_CallsLookupBeforeInsert(t *testing.T) {
wd, err := os.Getwd()
if err != nil {
@ -135,17 +230,189 @@ func TestCreateWorkspaceTree_CallsLookupBeforeInsert(t *testing.T) {
if err != nil {
t.Fatalf("read org_import.go: %v", err)
}
lookupPos, insertPos, fset := findLookupAndWorkspacesInsertPos(t, "org_import.go", src)
lookupAt := bytes.Index(src, []byte("h.lookupExistingChild("))
insertAt := bytes.Index(src, []byte("INSERT INTO workspaces"))
if lookupAt < 0 {
t.Fatalf("org_import.go missing call to h.lookupExistingChild — idempotency check removed?")
if lookupPos == token.NoPos {
t.Fatalf("AST: no call to lookupExistingChild in org_import.go — idempotency check removed?")
}
if insertAt < 0 {
t.Fatalf("org_import.go missing INSERT INTO workspaces — schema change?")
if insertPos == token.NoPos {
t.Fatalf("AST: no SQL literal matching `^\\s*INSERT INTO workspaces\\s*\\(` in any CallExpr in org_import.go — schema change or rename?")
}
if lookupAt > insertAt {
t.Errorf("h.lookupExistingChild must come BEFORE INSERT INTO workspaces in org_import.go (lookup@%d, insert@%d) — non-idempotent ordering would re-leak under repeat /org/import calls", lookupAt, insertAt)
if lookupPos > insertPos {
t.Errorf("lookupExistingChild call at %s must come BEFORE INSERT INTO workspaces at %s — non-idempotent ordering would re-leak under repeat /org/import calls",
fset.Position(lookupPos), fset.Position(insertPos))
}
}
// TestGate_FailsWhenLookupAfterInsert proves the gate actually catches
// the bug it's named after — running it against synthetic Go source
// where the lookup call is positioned AFTER the workspaces INSERT must
// produce lookupPos > insertPos, which the production gate flags as
// an ERROR. Without this test the gate could regress to "always pass"
// and we wouldn't notice until the bug shipped again.
//
// Per memory feedback_assert_exact_not_substring.md: verify a
// tightened test FAILS on old code before merging.
func TestGate_FailsWhenLookupAfterInsert(t *testing.T) {
const buggySrc = `package handlers
import "context"
type fakeDB struct{}
func (fakeDB) ExecContext(ctx context.Context, sql string, args ...interface{}) {}
type fakeOrgHandler struct{}
func (h *fakeOrgHandler) lookupExistingChild(ctx context.Context, name string, parentID *string) (string, bool, error) {
return "", false, nil
}
func buggyCreate(h *fakeOrgHandler, db fakeDB, ctx context.Context, name string, parentID *string) {
// Bug shape: INSERT runs FIRST, lookup runs AFTER. This is the
// non-idempotent ordering the gate exists to forbid.
db.ExecContext(ctx, ` + "`INSERT INTO workspaces (id, name) VALUES ($1, $2)`" + `, "x", name)
h.lookupExistingChild(ctx, name, parentID)
}
`
lookupPos, insertPos, _ := findLookupAndWorkspacesInsertPos(t, "buggy.go", []byte(buggySrc))
if lookupPos == token.NoPos || insertPos == token.NoPos {
t.Fatalf("synthetic buggy source missing expected nodes (lookupPos=%v insertPos=%v) — helper logic regression", lookupPos, insertPos)
}
if lookupPos < insertPos {
t.Fatalf("synthetic bug shape (lookup AFTER insert) returned lookupPos=%d < insertPos=%d — gate would NOT fire on actual bug, regression!", lookupPos, insertPos)
}
// Implicit: lookupPos > insertPos here, which the production gate
// flags via t.Errorf. This proves the gate is live, not vestigial.
}
// TestGate_IgnoresAuditTableShadow proves the regex tightening
// actually ignores `INSERT INTO workspaces_audit` literals — the
// specific shape #2872 cited as the silent-false-pass failure mode
// for the previous bytes.Index gate.
func TestGate_IgnoresAuditTableShadow(t *testing.T) {
// Synthetic source with audit-table INSERT at line 1 (would be
// position 0 under prefix-match) and lookup + real INSERT at later
// positions. With the tightened regex, the audit literal is
// ignored: insertPos points at the REAL INSERT, lookup precedes it,
// gate passes correctly.
const src = `package handlers
import "context"
type fakeDB struct{}
func (fakeDB) ExecContext(ctx context.Context, sql string, args ...interface{}) {}
type fakeOrgHandler struct{}
func (h *fakeOrgHandler) lookupExistingChild(ctx context.Context, name string, parentID *string) (string, bool, error) {
return "", false, nil
}
func okCreateWithAudit(h *fakeOrgHandler, db fakeDB, ctx context.Context, name string, parentID *string) {
// Audit-table INSERT — should be IGNORED by the tightened regex.
db.ExecContext(ctx, ` + "`INSERT INTO workspaces_audit (id, action) VALUES ($1, $2)`" + `, "x", "create_attempt")
// Lookup BEFORE real INSERT — correct order.
h.lookupExistingChild(ctx, name, parentID)
// Real INSERT.
db.ExecContext(ctx, ` + "`INSERT INTO workspaces (id, name) VALUES ($1, $2)`" + `, "x", name)
}
`
lookupPos, insertPos, fset := findLookupAndWorkspacesInsertPos(t, "shadow.go", []byte(src))
if lookupPos == token.NoPos || insertPos == token.NoPos {
t.Fatalf("expected to find lookup + real INSERT, got lookupPos=%v insertPos=%v", lookupPos, insertPos)
}
// The audit-table INSERT is at line ~16 (column ~20-ish), the
// lookup is at line 19, the real INSERT is at line 21. If the
// regex regressed to prefix-match, insertPos would point at the
// audit literal at line 16, and the gate would falsely fail
// (lookup at 19 > "insert" at 16). With the tightened regex,
// insertPos correctly points at line 21, and the gate passes.
insertLine := fset.Position(insertPos).Line
lookupLine := fset.Position(lookupPos).Line
if insertLine < lookupLine {
t.Errorf("regex regressed: audit shadow at line %d swallowed real INSERT (lookup at line %d). insertPos should point at the real INSERT (line ~21), not the audit literal.",
insertLine, lookupLine)
}
if lookupPos > insertPos {
t.Errorf("synthetic source has lookup at line %d before real INSERT at line %d, gate should pass (lookupPos < insertPos), got lookupPos=%d > insertPos=%d",
lookupLine, insertLine, lookupPos, insertPos)
}
}
// TestWorkspacesInsertRE_RejectsLookalikes pins the regex that
// discriminates the real workspaces INSERT from prefix-matching
// lookalikes. If this regex regresses to a substring match, the
// AST gate above silently false-passes when a future refactor
// shadows the real INSERT with a workspaces_audit / workspace_secrets
// / canvas_layouts literal placed earlier in source.
func TestWorkspacesInsertRE_RejectsLookalikes(t *testing.T) {
cases := []struct {
sql string
want bool
comment string
}{
{"INSERT INTO workspaces (id, name) VALUES ($1, $2)", true, "real target"},
{"\n\t\tINSERT INTO workspaces (id, name)\n\t\tVALUES ($1, $2)", true, "real target with leading whitespace + newlines (raw string literal shape)"},
{"INSERT INTO workspaces_audit (id) VALUES ($1)", false, "underscore-suffix lookalike (the #2872 specific failure mode)"},
{"INSERT INTO workspace_secrets (key, value) VALUES ($1, $2)", false, "prefix without trailing 's' (workspace_*)"},
{"INSERT INTO workspace_channels (id) VALUES ($1)", false, "another workspace_* prefix"},
{"INSERT INTO canvas_layouts (workspace_id, x, y) VALUES ($1, $2, $3)", false, "unrelated table that contains 'workspace' in a column ref"},
{"UPDATE workspaces SET status='running' WHERE id=$1", false, "UPDATE shouldn't match"},
{"SELECT * FROM workspaces WHERE id=$1", false, "SELECT shouldn't match"},
{"-- comment about INSERT INTO workspaces (\nSELECT 1", false, "comment shouldn't match"},
}
for _, c := range cases {
got := workspacesInsertRE.MatchString(c.sql)
if got != c.want {
t.Errorf("workspacesInsertRE.MatchString(%q) = %v, want %v (%s)", c.sql, got, c.want, c.comment)
}
}
}
// Confirm the regex actually matches the literal currently in
// org_import.go. Pins the shape so `gofmt` reflows or trivial edits
// to the SQL string don't silently disable the gate above.
func TestWorkspacesInsertRE_MatchesActualSourceLiteral(t *testing.T) {
wd, err := os.Getwd()
if err != nil {
t.Fatalf("getwd: %v", err)
}
src, err := os.ReadFile(filepath.Join(wd, "org_import.go"))
if err != nil {
t.Fatalf("read org_import.go: %v", err)
}
// Strip backtick strings, find any whose content matches.
// Walk the source via parser.ParseFile to avoid string-search
// drift if the literal is reflowed.
fset := token.NewFileSet()
file, err := parser.ParseFile(fset, filepath.Join(wd, "org_import.go"), src, parser.ParseComments)
if err != nil {
t.Fatalf("parse org_import.go: %v", err)
}
var matched bool
ast.Inspect(file, func(n ast.Node) bool {
lit, ok := n.(*ast.BasicLit)
if !ok || lit.Kind != token.STRING {
return true
}
raw := lit.Value
if unq, err := strconv.Unquote(raw); err == nil {
raw = unq
}
if workspacesInsertRE.MatchString(raw) {
matched = true
}
return true
})
if !matched {
t.Fatalf("no SQL literal in org_import.go matches workspacesInsertRE — gate is dead. Either the INSERT was renamed (update the regex) or the file was restructured (review the gate logic).")
}
// strings.Contains keeps the test informative: if the regex
// stopped matching but the literal source still contains the
// magic phrase, that's a regex-side failure (test the fix above).
if !strings.Contains(string(src), "INSERT INTO workspaces") {
t.Fatalf("org_import.go has no `INSERT INTO workspaces` substring at all — schema change?")
}
}

View File

@ -0,0 +1,476 @@
//go:build integration
// +build integration
// pending_uploads_integration_test.go — REAL Postgres integration
// tests for the poll-mode chat upload flow (RFC: phases 13).
//
// Run with:
//
// docker run --rm -d --name pg-integration \
// -e POSTGRES_PASSWORD=test -e POSTGRES_DB=molecule \
// -p 55432:5432 postgres:15-alpine
// sleep 4
// psql ... < workspace-server/migrations/20260505100000_pending_uploads.up.sql
// cd workspace-server
// INTEGRATION_DB_URL="postgres://postgres:test@localhost:55432/molecule?sslmode=disable" \
// go test -tags=integration ./internal/handlers/ -run Integration_PendingUploads
//
// CI (.github/workflows/handlers-postgres-integration.yml) runs this on
// every PR that touches workspace-server/internal/handlers/** OR
// workspace-server/migrations/**.
//
// Why these are NOT plain unit tests
// ----------------------------------
// The strict-sqlmock unit tests in storage_test.go pin which SQL
// statements fire — they are fast and let us iterate without a DB. But
// sqlmock CANNOT detect bugs that depend on the actual row state after
// the SQL runs. In particular:
//
// - the WITH … DELETE … RETURNING CTE used by Sweep depends on
// Postgres' `make_interval` function and the table's CHECK
// constraints. sqlmock would happily accept a hand-written SQL
// literal that Postgres rejects at runtime.
// - the partial index `idx_pending_uploads_unacked` (created by the
// Phase 1 migration) only catches a wrong WHERE predicate at real-
// query-plan time.
//
// These tests close those gaps by booting a real Postgres, running the
// production helpers, and SELECTing the row to verify the observable
// state matches the expected outcome.
package handlers
import (
"context"
"database/sql"
"os"
"strings"
"testing"
"time"
"github.com/google/uuid"
_ "github.com/lib/pq"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/pendinguploads"
)
// integrationDB_PendingUploads opens a connection from $INTEGRATION_DB_URL
// (skipping the test if unset), wipes the pending_uploads table for
// isolation, and registers a Cleanup that closes the connection.
//
// NOT SAFE FOR `t.Parallel()` — each test gets the table to itself.
// Mirrors the integrationDB helper in delegation_ledger_integration_test.go
// but kept separate so each table's wipe step is local to its tests.
func integrationDB_PendingUploads(t *testing.T) *sql.DB {
t.Helper()
url := os.Getenv("INTEGRATION_DB_URL")
if url == "" {
t.Skip("INTEGRATION_DB_URL not set; skipping (local devs: see file header)")
}
conn, err := sql.Open("postgres", url)
if err != nil {
t.Fatalf("open: %v", err)
}
if err := conn.Ping(); err != nil {
t.Fatalf("ping: %v", err)
}
if _, err := conn.ExecContext(context.Background(), `DELETE FROM pending_uploads`); err != nil {
t.Fatalf("cleanup: %v", err)
}
t.Cleanup(func() { conn.Close() })
return conn
}
func TestIntegration_PendingUploads_PutGetAckRoundTrip(t *testing.T) {
conn := integrationDB_PendingUploads(t)
store := pendinguploads.NewPostgres(conn)
ctx := context.Background()
wsID := uuid.New()
fileID, err := store.Put(ctx, wsID, []byte("hello PDF"), "report.pdf", "application/pdf")
if err != nil {
t.Fatalf("Put: %v", err)
}
// Get reads back the row.
rec, err := store.Get(ctx, fileID)
if err != nil {
t.Fatalf("Get: %v", err)
}
if rec.WorkspaceID != wsID {
t.Errorf("workspace_id = %s, want %s", rec.WorkspaceID, wsID)
}
if string(rec.Content) != "hello PDF" {
t.Errorf("content = %q, want %q", rec.Content, "hello PDF")
}
if rec.Filename != "report.pdf" {
t.Errorf("filename = %q, want %q", rec.Filename, "report.pdf")
}
if rec.AckedAt != nil {
t.Errorf("AckedAt should be nil before Ack, got %v", rec.AckedAt)
}
// MarkFetched stamps fetched_at.
if err := store.MarkFetched(ctx, fileID); err != nil {
t.Fatalf("MarkFetched: %v", err)
}
// Re-read to confirm.
rec2, err := store.Get(ctx, fileID)
if err != nil {
t.Fatalf("Get after MarkFetched: %v", err)
}
if rec2.FetchedAt == nil {
t.Errorf("FetchedAt should be set after MarkFetched")
}
// Ack flips acked_at; subsequent Gets return ErrNotFound (acked rows
// are filtered out at the SELECT predicate).
if err := store.Ack(ctx, fileID); err != nil {
t.Fatalf("Ack: %v", err)
}
if _, err := store.Get(ctx, fileID); err != pendinguploads.ErrNotFound {
t.Errorf("Get after Ack: got %v, want ErrNotFound", err)
}
// Idempotent re-ack succeeds.
if err := store.Ack(ctx, fileID); err != nil {
t.Errorf("re-Ack should be idempotent, got %v", err)
}
}
func TestIntegration_PendingUploads_Sweep_DeletesAckedAfterRetention(t *testing.T) {
conn := integrationDB_PendingUploads(t)
store := pendinguploads.NewPostgres(conn)
ctx := context.Background()
wsID := uuid.New()
fid, err := store.Put(ctx, wsID, []byte("data"), "x.txt", "text/plain")
if err != nil {
t.Fatalf("Put: %v", err)
}
if err := store.Ack(ctx, fid); err != nil {
t.Fatalf("Ack: %v", err)
}
// retention=1h, row was acked just now → not yet eligible.
res, err := store.Sweep(ctx, time.Hour)
if err != nil {
t.Fatalf("Sweep(1h): %v", err)
}
if res.Total() != 0 {
t.Errorf("expected 0 deletions yet, got %+v", res)
}
// retention=0 → row IS eligible immediately.
res, err = store.Sweep(ctx, 0)
if err != nil {
t.Fatalf("Sweep(0): %v", err)
}
if res.Acked != 1 || res.Expired != 0 {
t.Errorf("expected acked=1 expired=0, got %+v", res)
}
// Verify row is actually gone — not just un-fetchable.
var n int
if err := conn.QueryRowContext(ctx, `SELECT COUNT(*) FROM pending_uploads WHERE file_id = $1`, fid).Scan(&n); err != nil {
t.Fatalf("count: %v", err)
}
if n != 0 {
t.Errorf("row should be DELETEd, found %d rows", n)
}
}
func TestIntegration_PendingUploads_Sweep_DeletesExpiredUnacked(t *testing.T) {
conn := integrationDB_PendingUploads(t)
store := pendinguploads.NewPostgres(conn)
ctx := context.Background()
wsID := uuid.New()
fid, err := store.Put(ctx, wsID, []byte("data"), "x.txt", "text/plain")
if err != nil {
t.Fatalf("Put: %v", err)
}
// Manually backdate expires_at so the row IS expired. We don't ack,
// so this exercises the unacked-and-expired branch of the WHERE
// clause specifically.
if _, err := conn.ExecContext(ctx,
`UPDATE pending_uploads SET expires_at = now() - interval '1 minute' WHERE file_id = $1`,
fid,
); err != nil {
t.Fatalf("backdate: %v", err)
}
res, err := store.Sweep(ctx, time.Hour)
if err != nil {
t.Fatalf("Sweep: %v", err)
}
if res.Acked != 0 || res.Expired != 1 {
t.Errorf("expected acked=0 expired=1, got %+v", res)
}
}
func TestIntegration_PendingUploads_Sweep_DeletesBothCategoriesInOneCycle(t *testing.T) {
conn := integrationDB_PendingUploads(t)
store := pendinguploads.NewPostgres(conn)
ctx := context.Background()
wsID := uuid.New()
// Three rows: one acked (eligible at retention=0), one expired
// unacked, one fresh unacked (must NOT be deleted).
ackedFID, err := store.Put(ctx, wsID, []byte("acked"), "a.txt", "text/plain")
if err != nil {
t.Fatalf("Put acked: %v", err)
}
if err := store.Ack(ctx, ackedFID); err != nil {
t.Fatalf("Ack: %v", err)
}
expiredFID, err := store.Put(ctx, wsID, []byte("expired"), "e.txt", "text/plain")
if err != nil {
t.Fatalf("Put expired: %v", err)
}
if _, err := conn.ExecContext(ctx,
`UPDATE pending_uploads SET expires_at = now() - interval '1 minute' WHERE file_id = $1`,
expiredFID,
); err != nil {
t.Fatalf("backdate: %v", err)
}
freshFID, err := store.Put(ctx, wsID, []byte("fresh"), "f.txt", "text/plain")
if err != nil {
t.Fatalf("Put fresh: %v", err)
}
res, err := store.Sweep(ctx, 0) // retention=0 makes the acked row eligible
if err != nil {
t.Fatalf("Sweep: %v", err)
}
if res.Acked != 1 || res.Expired != 1 {
t.Errorf("expected acked=1 expired=1, got %+v", res)
}
// Fresh row survives.
rec, err := store.Get(ctx, freshFID)
if err != nil {
t.Errorf("fresh row should still be Get-able, got err=%v", err)
}
if rec.FileID != freshFID {
t.Errorf("fresh row file_id = %s, want %s", rec.FileID, freshFID)
}
}
func TestIntegration_PendingUploads_PutEnforcesSizeCap(t *testing.T) {
conn := integrationDB_PendingUploads(t)
store := pendinguploads.NewPostgres(conn)
ctx := context.Background()
wsID := uuid.New()
tooBig := make([]byte, pendinguploads.MaxFileBytes+1)
if _, err := store.Put(ctx, wsID, tooBig, "big.bin", "application/octet-stream"); err != pendinguploads.ErrTooLarge {
t.Errorf("expected ErrTooLarge, got %v", err)
}
}
// TestIntegration_PendingUploads_PutBatch_HappyPath_AllRowsCommit pins the
// "all rows commit" leg of the PutBatch atomicity contract against a real
// Postgres. sqlmock can't catch a regression where the Go-side Tx machinery
// silently no-ops the inserts (e.g., wrong driver options on BeginTx); only
// COUNT(*) on the real table can.
func TestIntegration_PendingUploads_PutBatch_HappyPath_AllRowsCommit(t *testing.T) {
conn := integrationDB_PendingUploads(t)
store := pendinguploads.NewPostgres(conn)
ctx := context.Background()
wsID := uuid.New()
// Pre-existing row so the COUNT(*) baseline is non-zero — proves
// PutBatch adds rows incrementally rather than overwriting.
if _, err := store.Put(ctx, wsID, []byte("seed"), "seed.txt", "text/plain"); err != nil {
t.Fatalf("seed Put: %v", err)
}
items := []pendinguploads.PutItem{
{Content: []byte("alpha"), Filename: "alpha.txt", Mimetype: "text/plain"},
{Content: []byte("beta"), Filename: "beta.bin", Mimetype: "application/octet-stream"},
{Content: []byte("gamma"), Filename: "gamma.pdf", Mimetype: "application/pdf"},
}
ids, err := store.PutBatch(ctx, wsID, items)
if err != nil {
t.Fatalf("PutBatch: %v", err)
}
if len(ids) != len(items) {
t.Fatalf("ids length %d, want %d", len(ids), len(items))
}
// Each returned id round-trips through Get with the right content.
for i, id := range ids {
rec, err := store.Get(ctx, id)
if err != nil {
t.Fatalf("Get item %d (%s): %v", i, id, err)
}
if string(rec.Content) != string(items[i].Content) {
t.Errorf("item %d content = %q, want %q", i, rec.Content, items[i].Content)
}
if rec.Filename != items[i].Filename {
t.Errorf("item %d filename = %q, want %q", i, rec.Filename, items[i].Filename)
}
}
var n int
if err := conn.QueryRowContext(ctx, `SELECT COUNT(*) FROM pending_uploads WHERE workspace_id = $1`, wsID).Scan(&n); err != nil {
t.Fatalf("count: %v", err)
}
if n != 4 {
t.Errorf("workspace row count = %d, want 4 (1 seed + 3 batch)", n)
}
}
// TestIntegration_PendingUploads_PutBatch_AtomicRollback_NoLeakOnFailure
// proves the all-or-nothing contract end-to-end against real Postgres MVCC.
//
// Strategy: build a 3-item batch where item index 1 carries a filename with
// an embedded NUL byte. lib/pq rejects NULs in TEXT columns at the protocol
// layer (`pq: invalid byte sequence for encoding "UTF8": 0x00`), which
// triggers the per-row INSERT error path in PutBatch. The first item's
// INSERT…RETURNING already wrote a row to the Tx's snapshot, so a buggy
// rollback would leave that row visible after PutBatch returns.
//
// Postgrest semantics: ROLLBACK is the only way a real DB can guarantee the
// "no leak" contract; a unit test with sqlmock can prove the Go function
// CALLED Rollback, but only this integration test proves Postgres actually
// HONORED it.
func TestIntegration_PendingUploads_PutBatch_AtomicRollback_NoLeakOnFailure(t *testing.T) {
conn := integrationDB_PendingUploads(t)
store := pendinguploads.NewPostgres(conn)
ctx := context.Background()
wsID := uuid.New()
// Baseline COUNT(*) for this workspace — must remain 0 after a failed batch.
var before int
if err := conn.QueryRowContext(ctx, `SELECT COUNT(*) FROM pending_uploads WHERE workspace_id = $1`, wsID).Scan(&before); err != nil {
t.Fatalf("baseline count: %v", err)
}
if before != 0 {
t.Fatalf("workspace not isolated: baseline = %d, want 0", before)
}
// Item 1 has a NUL byte in the filename — Go-side pre-validation
// (which only checks empty/length) lets it through, so the INSERT
// reaches lib/pq, which rejects it at the protocol level. That's the
// canonical "DB-side error mid-batch" we want to exercise.
items := []pendinguploads.PutItem{
{Content: []byte("ok"), Filename: "ok.txt", Mimetype: "text/plain"},
{Content: []byte("bad"), Filename: "bad\x00name.txt", Mimetype: "text/plain"},
{Content: []byte("never"), Filename: "never.txt", Mimetype: "text/plain"},
}
_, err := store.PutBatch(ctx, wsID, items)
if err == nil {
t.Fatalf("expected error from NUL-byte filename, got nil")
}
// THE assertion this whole test exists for: even though item 0's
// INSERT…RETURNING succeeded inside the Tx, the rollback unwound
// it — zero rows for this workspace, not one (let alone three).
var after int
if err := conn.QueryRowContext(ctx, `SELECT COUNT(*) FROM pending_uploads WHERE workspace_id = $1`, wsID).Scan(&after); err != nil {
t.Fatalf("post-failure count: %v", err)
}
if after != 0 {
t.Errorf("Tx rollback leaked rows: workspace count = %d, want 0", after)
}
}
// TestIntegration_PendingUploads_PutBatch_Oversize_NoTxOpened verifies the
// pre-validation short-circuit: an oversized item rejects with ErrTooLarge
// BEFORE any Tx opens, so the table is untouched. The unit test (sqlmock
// with zero expectations) catches the Go-side path; this test sanity-checks
// no real DB I/O happens by confirming COUNT(*) doesn't move.
func TestIntegration_PendingUploads_PutBatch_Oversize_NoTxOpened(t *testing.T) {
conn := integrationDB_PendingUploads(t)
store := pendinguploads.NewPostgres(conn)
ctx := context.Background()
wsID := uuid.New()
tooBig := make([]byte, pendinguploads.MaxFileBytes+1)
_, err := store.PutBatch(ctx, wsID, []pendinguploads.PutItem{
{Content: []byte("ok"), Filename: "ok.txt"},
{Content: tooBig, Filename: "too-big.bin"},
})
if err != pendinguploads.ErrTooLarge {
t.Fatalf("expected ErrTooLarge, got %v", err)
}
var n int
if err := conn.QueryRowContext(ctx, `SELECT COUNT(*) FROM pending_uploads WHERE workspace_id = $1`, wsID).Scan(&n); err != nil {
t.Fatalf("count: %v", err)
}
if n != 0 {
t.Errorf("pre-validation did NOT short-circuit: count = %d, want 0", n)
}
}
// TestIntegration_PendingUploads_AckedIndexExists verifies the Phase 5a
// migration (20260505200000_pending_uploads_acked_index.up.sql) actually
// created idx_pending_uploads_acked with the right partial-index predicate.
//
// Why pg_indexes and not EXPLAIN: the planner prefers Seq Scan on tiny
// tables regardless of available indexes — a plan-shape check would be
// flaky under real test loads. The contract we care about is "the index
// exists with the predicate we wrote in the migration"; pg_indexes is
// the canonical source for that, robust to row count and planner version.
func TestIntegration_PendingUploads_AckedIndexExists(t *testing.T) {
conn := integrationDB_PendingUploads(t)
ctx := context.Background()
var indexdef string
err := conn.QueryRowContext(ctx, `
SELECT indexdef FROM pg_indexes
WHERE schemaname = 'public'
AND tablename = 'pending_uploads'
AND indexname = 'idx_pending_uploads_acked'
`).Scan(&indexdef)
if err == sql.ErrNoRows {
t.Fatal("idx_pending_uploads_acked is missing — migration 20260505200000 not applied")
}
if err != nil {
t.Fatalf("pg_indexes query: %v", err)
}
// Pin the partial-index predicate. Without "WHERE acked_at IS NOT NULL"
// we'd be indexing the entire table (defeats the point — most rows are
// unacked), and the existing idx_pending_uploads_unacked already covers
// the inverse predicate.
if !strings.Contains(indexdef, "(acked_at)") {
t.Errorf("index missing acked_at column: %s", indexdef)
}
if !strings.Contains(indexdef, "WHERE (acked_at IS NOT NULL)") {
t.Errorf("index missing partial predicate: %s", indexdef)
}
}
func TestIntegration_PendingUploads_GetIgnoresExpiredAndAcked(t *testing.T) {
conn := integrationDB_PendingUploads(t)
store := pendinguploads.NewPostgres(conn)
ctx := context.Background()
wsID := uuid.New()
fid, err := store.Put(ctx, wsID, []byte("data"), "x.txt", "text/plain")
if err != nil {
t.Fatalf("Put: %v", err)
}
// Backdate expires_at — Get must return ErrNotFound, even though the
// row physically exists in the table (Sweep hasn't run).
if _, err := conn.ExecContext(ctx,
`UPDATE pending_uploads SET expires_at = now() - interval '1 minute' WHERE file_id = $1`,
fid,
); err != nil {
t.Fatalf("backdate: %v", err)
}
if _, err := store.Get(ctx, fid); err != pendinguploads.ErrNotFound {
t.Errorf("Get after expiry: got %v, want ErrNotFound", err)
}
}

View File

@ -71,6 +71,20 @@ func (f *fakeStorage) Ack(_ context.Context, fileID uuid.UUID) error {
return nil
}
// Sweep is required by the Storage interface (Phase 3 GC). Not exercised
// by these handler tests — the dedicated sweeper_test.go covers it.
func (f *fakeStorage) Sweep(_ context.Context, _ time.Duration) (pendinguploads.SweepResult, error) {
return pendinguploads.SweepResult{}, nil
}
// PutBatch is required by the Storage interface; the upload handler
// tests live in chat_files_poll_test.go and use a separate fake
// (inMemStorage). Stubbed here because the Get/Ack tests don't drive
// PutBatch, but the interface must be satisfied.
func (f *fakeStorage) PutBatch(_ context.Context, _ uuid.UUID, _ []pendinguploads.PutItem) ([]uuid.UUID, error) {
return nil, nil
}
func newRouter(handler *handlers.PendingUploadsHandler) *gin.Engine {
gin.SetMode(gin.TestMode)
r := gin.New()

View File

@ -0,0 +1,112 @@
package handlers
// provlog_emit_test.go — pins that the structured-logging emit sites
// added for #2867 PR-D actually fire when their boundary is crossed.
//
// These are call-site contract tests, not provlog package tests (those
// live next to the helper). The assertion is "this dispatcher path
// emits this event name" — if a refactor moves the call out of the
// boundary helper, the gate fails. Fields are NOT pinned here on
// purpose; the field set is convenience for ops, not contract for the
// emit point. Pinning fields would block additive evolution of the
// payload (see also feedback_behavior_based_ast_gates.md).
import (
"bytes"
"context"
"log"
"strings"
"sync"
"testing"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
)
// captureProvLog redirects the global logger to a buffer for the test
// duration. provlog.Event uses log.Printf, so this is the only seam.
// Returned mutex protects against concurrent reads from the goroutine
// fired by provisionWorkspaceAuto (the goroutine never returns in
// these tests because Start() is stubbed, but the buffer can still be
// touched by it racing the assertion).
func captureProvLog(t *testing.T) (read func() string) {
t.Helper()
var buf bytes.Buffer
var mu sync.Mutex
prevWriter := log.Writer()
prevFlags := log.Flags()
log.SetFlags(0)
log.SetOutput(&safeWriter{buf: &buf, mu: &mu})
t.Cleanup(func() {
log.SetOutput(prevWriter)
log.SetFlags(prevFlags)
})
return func() string {
mu.Lock()
defer mu.Unlock()
return buf.String()
}
}
// TestProvisionWorkspaceAutoSync_EmitsProvisionStart — sync variant is
// chosen for the assertion path because it returns once the (stubbed)
// Start() has been called, so we know the emit has flushed. The async
// variant would race a goroutine.
func TestProvisionWorkspaceAutoSync_EmitsProvisionStart(t *testing.T) {
read := captureProvLog(t)
h := &WorkspaceHandler{cpProv: &trackingCPProv{}}
// Best-effort: the body will hit DB code under provisionWorkspaceCP
// — we only need the emit at the entry, which fires unconditionally
// before the dispatch. Recovering from any later panic keeps the
// test focused.
defer func() { _ = recover() }()
h.provisionWorkspaceAutoSync("ws-test-1", "tmpl", nil, models.CreateWorkspacePayload{
Name: "n", Tier: 4, Runtime: "claude-code",
})
got := read()
if !strings.Contains(got, "evt: provision.start ") {
t.Fatalf("expected provision.start emit, got log:\n%s", got)
}
if !strings.Contains(got, `"workspace_id":"ws-test-1"`) {
t.Errorf("workspace_id not in payload: %s", got)
}
if !strings.Contains(got, `"sync":true`) {
t.Errorf("sync flag not pinned for sync dispatcher: %s", got)
}
}
// TestStopForRestart_EmitsRestartPreStop — emit fires before the actual
// Stop call, so the trackingCPProv stub doesn't need to be wired for
// real Stop semantics. Backend label "cp" pinned because that's the
// SaaS path; we don't pin "docker" or "none" branches here (separate
// tests would only re-test the trivial branch label switch).
func TestStopForRestart_EmitsRestartPreStop(t *testing.T) {
read := captureProvLog(t)
h := &WorkspaceHandler{cpProv: &trackingCPProv{}}
defer func() { _ = recover() }()
h.stopForRestart(context.Background(), "ws-restart-1")
got := read()
if !strings.Contains(got, "evt: restart.pre_stop ") {
t.Fatalf("expected restart.pre_stop emit, got log:\n%s", got)
}
if !strings.Contains(got, `"workspace_id":"ws-restart-1"`) {
t.Errorf("workspace_id not in payload: %s", got)
}
if !strings.Contains(got, `"backend":"cp"`) {
t.Errorf("backend label missing or wrong: %s", got)
}
}
// TestStopForRestart_EmitsBackendNoneWhenUnwired — pin the no-backend
// branch so a future refactor that drops the label switch is caught.
// This is the silent-Stop case (workspace_dispatchers.go:StopWorkspaceAuto
// returns nil for unwired backends); the emit ensures the operator can
// still see the boundary in the log.
func TestStopForRestart_EmitsBackendNoneWhenUnwired(t *testing.T) {
read := captureProvLog(t)
h := &WorkspaceHandler{} // both nil
h.stopForRestart(context.Background(), "ws-restart-2")
got := read()
if !strings.Contains(got, `"backend":"none"`) {
t.Fatalf("expected backend=none for unwired handler: %s", got)
}
}

View File

@ -0,0 +1,99 @@
package handlers
import (
"strings"
"testing"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
)
// Tests for the SaaS-aware default-tier resolution introduced in #2901
// and hardened in #2910 (multi-model review of #2901 found the original
// claim of "all green" was passing because no SaaS-mode test existed).
//
// These tests pin three invariants:
//
// 1. WorkspaceHandler.IsSaaS() returns true when cpProv is wired,
// false otherwise.
// 2. WorkspaceHandler.DefaultTier() returns 4 on SaaS, 3 self-hosted.
// 3. generateDefaultConfig (TemplatesHandler.Import path) writes the
// passed-in tier into the generated config.yaml — pre-#2910 it
// was hardcoded to 3 and silently disagreed with the create-
// handler default on SaaS.
// stubCPProv is a minimal stand-in for the CP provisioner — only
// exercises the IsSaaS / HasProvisioner contract, never invoked in
// these tests.
type stubCPProv struct{}
func (stubCPProv) Start(_ interface{}, _ provisioner.WorkspaceConfig) (string, error) {
return "", nil
}
func (stubCPProv) Stop(_ interface{}, _ string) error { return nil }
func (stubCPProv) Restart(_ interface{}, _ provisioner.WorkspaceConfig) (string, error) {
return "", nil
}
func TestIsSaaS_TrueWhenCPProvWired(t *testing.T) {
h := &WorkspaceHandler{cpProv: &trackingCPProv{}}
if !h.IsSaaS() {
t.Errorf("IsSaaS()=false with cpProv wired; expected true")
}
}
func TestIsSaaS_FalseWhenOnlyDocker(t *testing.T) {
// provisioner field set, cpProv nil — the self-hosted path.
// Use a non-nil sentinel so the check actually has something to
// disagree with. trackingCPProv lives in workspace_provision_auto_test.go
// and is the established stub for these handler-level tests.
h := &WorkspaceHandler{provisioner: nil, cpProv: nil}
if h.IsSaaS() {
t.Errorf("IsSaaS()=true with both backends nil; expected false")
}
}
func TestDefaultTier_SaaS_IsT4(t *testing.T) {
h := &WorkspaceHandler{cpProv: &trackingCPProv{}}
if got := h.DefaultTier(); got != 4 {
t.Errorf("SaaS DefaultTier()=%d; expected 4", got)
}
}
func TestDefaultTier_SelfHosted_IsT3(t *testing.T) {
h := &WorkspaceHandler{}
if got := h.DefaultTier(); got != 3 {
t.Errorf("self-hosted DefaultTier()=%d; expected 3", got)
}
}
// generateDefaultConfig — pin that the tier param flows into the
// emitted config.yaml verbatim. Pre-#2910 this was hardcoded "tier: 3"
// regardless of caller intent.
func TestGenerateDefaultConfig_RespectsTierParam(t *testing.T) {
cfg := generateDefaultConfig("Test Agent", map[string]string{"system-prompt.md": ""}, 4)
if !strings.Contains(cfg, "tier: 4\n") {
t.Errorf("expected `tier: 4` in generated config, got:\n%s", cfg)
}
// The pre-#2910 hardcoded `tier: 3` line must NOT appear.
if strings.Contains(cfg, "tier: 3\n") {
t.Errorf("config should not contain `tier: 3` when caller passed 4, got:\n%s", cfg)
}
}
func TestGenerateDefaultConfig_SelfHostedTierT3(t *testing.T) {
cfg := generateDefaultConfig("Test Agent", map[string]string{"system-prompt.md": ""}, 3)
if !strings.Contains(cfg, "tier: 3\n") {
t.Errorf("expected `tier: 3` in generated config, got:\n%s", cfg)
}
}
// Bounds check — caller passes 0 or out-of-range, helper falls back
// to T3 (the safer-of-the-two when deployment mode can't be resolved).
func TestGenerateDefaultConfig_OutOfRangeFallsBackToT3(t *testing.T) {
for _, tier := range []int{0, -1, 99} {
cfg := generateDefaultConfig("X", map[string]string{}, tier)
if !strings.Contains(cfg, "tier: 3\n") {
t.Errorf("invalid tier %d should fall back to T3, got:\n%s", tier, cfg)
}
}
}

View File

@ -71,7 +71,7 @@ func TestSecurity_GetTemplates_NoAuth_Returns401(t *testing.T) {
authDB, authMock := newEnrolledAuthDB(t)
tmpDir := t.TempDir()
tmplh := NewTemplatesHandler(tmpDir, nil)
tmplh := NewTemplatesHandler(tmpDir, nil, nil)
r := gin.New()
r.GET("/templates", middleware.AdminAuth(authDB), tmplh.List)
@ -98,7 +98,7 @@ func TestSecurity_GetTemplates_FreshInstall_FailsOpen(t *testing.T) {
authDB, authMock := newFreshInstallAuthDB(t)
tmpDir := t.TempDir()
tmplh := NewTemplatesHandler(tmpDir, nil)
tmplh := NewTemplatesHandler(tmpDir, nil, nil)
r := gin.New()
r.GET("/templates", middleware.AdminAuth(authDB), tmplh.List)

View File

@ -1,132 +0,0 @@
package handlers
import (
"encoding/json"
"log"
"net/http"
"os"
"path/filepath"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
"github.com/gin-gonic/gin"
"gopkg.in/yaml.v3"
)
// TeamHandler now hosts only Collapse — the visual "expand" action is
// canvas-side and creating children goes through the regular
// WorkspaceHandler.Create path with parent_id set, like any other
// workspace. Every workspace can have children; "team" is just the
// state of having children. The old Expand handler bulk-created
// children by reading sub_workspaces from a parent's config and was
// non-idempotent — calling it N times leaked N×children EC2s, which
// is how tenant-hongming accumulated 72 stale workspaces.
type TeamHandler struct {
wh *WorkspaceHandler
b *events.Broadcaster
}
// NewTeamHandler constructs a TeamHandler. wh is used by Collapse to
// route StopWorkspaceAuto through the backend dispatcher.
func NewTeamHandler(b *events.Broadcaster, wh *WorkspaceHandler, platformURL, configsDir string) *TeamHandler {
return &TeamHandler{wh: wh, b: b}
}
// Collapse handles POST /workspaces/:id/collapse
// Stops and removes all child workspaces.
func (h *TeamHandler) Collapse(c *gin.Context) {
parentID := c.Param("id")
ctx := c.Request.Context()
// Find children
rows, err := db.DB.QueryContext(ctx,
`SELECT id, name FROM workspaces WHERE parent_id = $1 AND status != 'removed'`, parentID)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to query children"})
return
}
defer rows.Close()
removed := make([]string, 0)
for rows.Next() {
var childID, childName string
if rows.Scan(&childID, &childName) != nil {
continue
}
// Stop the workload via the backend dispatcher (CP for SaaS,
// Docker for self-hosted). Pre-2026-05-05 this was
// `if h.provisioner != nil { h.provisioner.Stop(...) }`, which
// silently skipped on every SaaS tenant — child EC2s kept running
// after team-collapse until the orphan sweeper caught them
// (issue #2813).
if err := h.wh.StopWorkspaceAuto(ctx, childID); err != nil {
log.Printf("Team collapse: stop %s failed: %v — orphan sweeper will reconcile", childID, err)
}
// Mark as removed
if _, err := db.DB.ExecContext(ctx,
`UPDATE workspaces SET status = $1, updated_at = now() WHERE id = $2`, models.StatusRemoved, childID); err != nil {
log.Printf("Team collapse: failed to remove workspace %s: %v", childID, err)
}
if _, err := db.DB.ExecContext(ctx,
`DELETE FROM canvas_layouts WHERE workspace_id = $1`, childID); err != nil {
log.Printf("Team collapse: failed to delete layout for %s: %v", childID, err)
}
h.b.RecordAndBroadcast(ctx, "WORKSPACE_REMOVED", childID, map[string]interface{}{})
removed = append(removed, childName)
}
h.b.RecordAndBroadcast(ctx, "WORKSPACE_COLLAPSED", parentID, map[string]interface{}{
"removed_children": removed,
})
c.JSON(http.StatusOK, gin.H{
"status": "collapsed",
"removed": removed,
})
}
// findTemplateDirByName resolves a workspace name to its template
// directory. Kept here because callers outside this package may use
// it, even though the in-package consumer (Expand) is gone.
//
// TODO: relocate alongside the templates handler if no other callers
// surface, or delete entirely after a deprecation cycle.
func findTemplateDirByName(configsDir, name string) string {
normalized := normalizeName(name)
candidate := filepath.Join(configsDir, normalized)
if _, err := os.Stat(filepath.Join(candidate, "config.yaml")); err == nil {
return candidate
}
// Fall back to scanning all dirs
entries, err := os.ReadDir(configsDir)
if err != nil {
return ""
}
for _, e := range entries {
if !e.IsDir() {
continue
}
cfgPath := filepath.Join(configsDir, e.Name(), "config.yaml")
data, err := os.ReadFile(cfgPath)
if err != nil {
continue
}
var cfg struct {
Name string `yaml:"name"`
}
if json.Unmarshal(data, &cfg) == nil && cfg.Name == name {
return filepath.Join(configsDir, e.Name())
}
if yaml.Unmarshal(data, &cfg) == nil && cfg.Name == name {
return filepath.Join(configsDir, e.Name())
}
}
return ""
}

View File

@ -1,130 +0,0 @@
package handlers
import (
"encoding/json"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"testing"
"github.com/DATA-DOG/go-sqlmock"
"github.com/gin-gonic/gin"
)
// ---------- TeamHandler: Collapse ----------
func TestTeamCollapse_NoChildren(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewTeamHandler(broadcaster, NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()), "http://localhost:8080", "/tmp/configs")
// No children
mock.ExpectQuery("SELECT id, name FROM workspaces WHERE parent_id").
WithArgs("ws-parent").
WillReturnRows(sqlmock.NewRows([]string{"id", "name"}))
// WORKSPACE_COLLAPSED broadcast
mock.ExpectExec("INSERT INTO structure_events").
WillReturnResult(sqlmock.NewResult(0, 1))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-parent"}}
c.Request = httptest.NewRequest("POST", "/", nil)
handler.Collapse(c)
if w.Code != http.StatusOK {
t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
if resp["status"] != "collapsed" {
t.Errorf("expected status 'collapsed', got %v", resp["status"])
}
}
func TestTeamCollapse_WithChildren(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewTeamHandler(broadcaster, NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()), "http://localhost:8080", "/tmp/configs")
// Two children
mock.ExpectQuery("SELECT id, name FROM workspaces WHERE parent_id").
WithArgs("ws-parent").
WillReturnRows(sqlmock.NewRows([]string{"id", "name"}).
AddRow("child-1", "Worker A").
AddRow("child-2", "Worker B"))
// UPDATE + DELETE + broadcast for child-1
mock.ExpectExec("UPDATE workspaces SET status =").
WithArgs("child-1").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectExec("DELETE FROM canvas_layouts").
WithArgs("child-1").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectExec("INSERT INTO structure_events").
WillReturnResult(sqlmock.NewResult(0, 1))
// UPDATE + DELETE + broadcast for child-2
mock.ExpectExec("UPDATE workspaces SET status =").
WithArgs("child-2").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectExec("DELETE FROM canvas_layouts").
WithArgs("child-2").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectExec("INSERT INTO structure_events").
WillReturnResult(sqlmock.NewResult(0, 1))
// WORKSPACE_COLLAPSED broadcast for parent
mock.ExpectExec("INSERT INTO structure_events").
WillReturnResult(sqlmock.NewResult(0, 1))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-parent"}}
c.Request = httptest.NewRequest("POST", "/", nil)
handler.Collapse(c)
if w.Code != http.StatusOK {
t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
json.Unmarshal(w.Body.Bytes(), &resp)
removed, ok := resp["removed"].([]interface{})
if !ok || len(removed) != 2 {
t.Errorf("expected 2 removed children, got %v", resp["removed"])
}
}
// ---------- findTemplateDirByName helper ----------
func TestFindTemplateDirByName_DirectMatch(t *testing.T) {
dir := t.TempDir()
subDir := filepath.Join(dir, "mybot")
os.MkdirAll(subDir, 0755)
os.WriteFile(filepath.Join(subDir, "config.yaml"), []byte("name: MyBot"), 0644)
result := findTemplateDirByName(dir, "mybot")
if result != subDir {
t.Errorf("expected %s, got %s", subDir, result)
}
}
func TestFindTemplateDirByName_NotFound(t *testing.T) {
dir := t.TempDir()
result := findTemplateDirByName(dir, "nonexistent")
if result != "" {
t.Errorf("expected empty string, got %s", result)
}
}
func TestFindTemplateDirByName_InvalidConfigsDir(t *testing.T) {
result := findTemplateDirByName("/nonexistent/path", "anything")
if result != "" {
t.Errorf("expected empty string for invalid dir, got %s", result)
}
}

View File

@ -36,8 +36,14 @@ func normalizeName(name string) string {
return result
}
// generateDefaultConfig creates a config.yaml from detected prompt files and skills.
func generateDefaultConfig(name string, files map[string]string) string {
// generateDefaultConfig creates a config.yaml from detected prompt files
// and skills. tier is the deployment-aware default (caller passes
// h.wh.DefaultTier() — T4 on SaaS, T3 on self-hosted) so the generated
// file matches what POST /workspaces would default to. Pre-#2910 this
// was hardcoded to 3, which split-brained with the create-handler
// default on SaaS (T4) and pinned newly-imported templates at T3 even
// when downstream Create paths picked T4.
func generateDefaultConfig(name string, files map[string]string, tier int) string {
promptFiles := []string{}
skillSet := map[string]bool{}
@ -74,9 +80,15 @@ func generateDefaultConfig(name string, files map[string]string) string {
var cfg strings.Builder
cfg.WriteString(`name: "` + escaped + `"` + "\n")
cfg.WriteString("description: Imported agent\n")
// Default to tier 3 ("Privileged") — matches the workspace.go
// create handler default. See its comment for rationale.
cfg.WriteString("version: 1.0.0\ntier: 3\n")
// Tier is SaaS-aware via the caller's DefaultTier (#2910 PR-B).
// Bounds-checked: invalid input falls back to T3 (the historical
// default + the safer-of-the-two when the deployment mode can't
// be resolved).
if tier < 1 || tier > 4 {
tier = 3
}
cfg.WriteString("version: 1.0.0\n")
cfg.WriteString(fmt.Sprintf("tier: %d\n", tier))
cfg.WriteString("model: anthropic:claude-haiku-4-5-20251001\n")
cfg.WriteString("\nprompt_files:\n")
if len(promptFiles) > 0 {
@ -148,7 +160,11 @@ func (h *TemplatesHandler) Import(c *gin.Context) {
// Auto-generate config.yaml if not provided
if _, exists := body.Files["config.yaml"]; !exists {
cfg := generateDefaultConfig(body.Name, body.Files)
tier := 3
if h.wh != nil {
tier = h.wh.DefaultTier()
}
cfg := generateDefaultConfig(body.Name, body.Files, tier)
if err := os.WriteFile(filepath.Join(destDir, "config.yaml"), []byte(cfg), 0600); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to write config.yaml"})
return
@ -227,7 +243,11 @@ func (h *TemplatesHandler) ReplaceFiles(c *gin.Context) {
if _, exists := body.Files["config.yaml"]; !exists {
// Check if config.yaml exists in container
if _, err := h.execInContainer(ctx, containerName, []string{"test", "-f", "/configs/config.yaml"}); err != nil {
cfg := generateDefaultConfig(wsName, body.Files)
tier := 3
if h.wh != nil {
tier = h.wh.DefaultTier()
}
cfg := generateDefaultConfig(wsName, body.Files, tier)
singleFile := map[string]string{"config.yaml": cfg}
h.copyFilesToContainer(ctx, containerName, "/configs", singleFile)
}

View File

@ -55,7 +55,7 @@ func TestGenerateDefaultConfig_WithFiles(t *testing.T) {
"skills/review/templates.md": "Templates",
}
cfg := generateDefaultConfig("Test Agent", files)
cfg := generateDefaultConfig("Test Agent", files, 3)
// Name is emitted as a double-quoted scalar (#221 sanitizer).
if !strings.Contains(cfg, `name: "Test Agent"`) {
@ -85,7 +85,7 @@ func TestGenerateDefaultConfig_Empty(t *testing.T) {
"data/something.json": `{"key": "value"}`,
}
cfg := generateDefaultConfig("Empty Agent", files)
cfg := generateDefaultConfig("Empty Agent", files, 3)
if !strings.Contains(cfg, `name: "Empty Agent"`) {
t.Errorf("config should contain quoted agent name, got:\n%s", cfg)
@ -134,7 +134,7 @@ func TestGenerateDefaultConfig_YAMLInjection(t *testing.T) {
for _, tc := range adversarialCases {
t.Run(tc.desc, func(t *testing.T) {
cfg := generateDefaultConfig(tc.name, map[string]string{})
cfg := generateDefaultConfig(tc.name, map[string]string{}, 3)
var parsed map[string]interface{}
if err := yaml.Unmarshal([]byte(cfg), &parsed); err != nil {
t.Fatalf("sanitized config does not parse as YAML: %v\n--- config ---\n%s", err, cfg)
@ -205,7 +205,7 @@ func TestImport_Success(t *testing.T) {
setupTestRedis(t)
tmpDir := t.TempDir()
handler := NewTemplatesHandler(tmpDir, nil)
handler := NewTemplatesHandler(tmpDir, nil, nil)
body := `{
"name": "New Agent",
@ -245,7 +245,7 @@ func TestImport_MissingName(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
handler := NewTemplatesHandler(t.TempDir(), nil)
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
body := `{"files": {"test.md": "content"}}`
@ -265,7 +265,7 @@ func TestImport_TooManyFiles(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
handler := NewTemplatesHandler(t.TempDir(), nil)
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
files := make(map[string]string)
for i := 0; i <= maxUploadFiles; i++ {
@ -296,7 +296,7 @@ func TestImport_AlreadyExists(t *testing.T) {
tmpDir := t.TempDir()
os.MkdirAll(filepath.Join(tmpDir, "existing-agent"), 0755)
handler := NewTemplatesHandler(tmpDir, nil)
handler := NewTemplatesHandler(tmpDir, nil, nil)
body := `{"name": "Existing Agent", "files": {"test.md": "content"}}`
@ -317,7 +317,7 @@ func TestImport_WithConfigYaml(t *testing.T) {
setupTestRedis(t)
tmpDir := t.TempDir()
handler := NewTemplatesHandler(tmpDir, nil)
handler := NewTemplatesHandler(tmpDir, nil, nil)
body := `{
"name": "Custom Agent",
@ -354,7 +354,7 @@ func TestReplaceFiles_MissingBody(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
handler := NewTemplatesHandler(t.TempDir(), nil)
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@ -373,7 +373,7 @@ func TestReplaceFiles_TooManyFiles(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
handler := NewTemplatesHandler(t.TempDir(), nil)
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
files := make(map[string]string)
for i := 0; i <= maxUploadFiles; i++ {
@ -398,7 +398,7 @@ func TestReplaceFiles_WorkspaceNotFound(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
handler := NewTemplatesHandler(t.TempDir(), nil)
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
// ReplaceFiles now selects (name, instance_id, runtime) for the
// restart-cascade. Match the full column list rather than just the
@ -429,7 +429,7 @@ func TestReplaceFiles_PathTraversal(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
handler := NewTemplatesHandler(t.TempDir(), nil)
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
mock.ExpectQuery(`SELECT name, COALESCE\(instance_id, ''\), COALESCE\(runtime, ''\) FROM workspaces WHERE id =`).
WithArgs("ws-rf-pt").

View File

@ -31,10 +31,20 @@ const maxUploadFiles = 200
type TemplatesHandler struct {
configsDir string
docker *client.Client
// wh is used by Import and ReplaceFiles to call DefaultTier() so a
// generated config.yaml's tier matches the SaaS-vs-self-hosted
// boundary (#2910 PR-B). nil-tolerant — the field is unused when
// the caller doesn't import templates that need a fresh config
// generated.
wh *WorkspaceHandler
}
func NewTemplatesHandler(configsDir string, dockerCli *client.Client) *TemplatesHandler {
return &TemplatesHandler{configsDir: configsDir, docker: dockerCli}
// NewTemplatesHandler constructs a TemplatesHandler. wh may be nil for
// callers that only use the read-only template surfaces (List,
// ReadFile, ListFiles). Import + ReplaceFiles need wh non-nil so the
// generated config.yaml picks the SaaS-aware default tier.
func NewTemplatesHandler(configsDir string, dockerCli *client.Client, wh *WorkspaceHandler) *TemplatesHandler {
return &TemplatesHandler{configsDir: configsDir, docker: dockerCli, wh: wh}
}
// modelSpec describes a single supported model on a template: its id (sent

View File

@ -53,7 +53,7 @@ func TestTemplatesList_EmptyDir(t *testing.T) {
setupTestRedis(t)
tmpDir := t.TempDir()
handler := NewTemplatesHandler(tmpDir, nil)
handler := NewTemplatesHandler(tmpDir, nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@ -99,7 +99,7 @@ skills:
// Create a directory without config.yaml (should be skipped)
os.MkdirAll(filepath.Join(tmpDir, "no-config"), 0755)
handler := NewTemplatesHandler(tmpDir, nil)
handler := NewTemplatesHandler(tmpDir, nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@ -160,7 +160,7 @@ skills: []
t.Fatalf("write: %v", err)
}
handler := NewTemplatesHandler(tmpDir, nil)
handler := NewTemplatesHandler(tmpDir, nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest("GET", "/templates", nil)
@ -237,7 +237,7 @@ skills: []
t.Fatalf("write: %v", err)
}
handler := NewTemplatesHandler(tmpDir, nil)
handler := NewTemplatesHandler(tmpDir, nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest("GET", "/templates", nil)
@ -315,7 +315,7 @@ skills: []
t.Fatalf("write: %v", err)
}
handler := NewTemplatesHandler(tmpDir, nil)
handler := NewTemplatesHandler(tmpDir, nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest("GET", "/templates", nil)
@ -434,7 +434,7 @@ skills: []
t.Fatalf("write: %v", err)
}
handler := NewTemplatesHandler(tmpDir, nil)
handler := NewTemplatesHandler(tmpDir, nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest("GET", "/templates", nil)
@ -512,7 +512,7 @@ skills: []
t.Fatalf("write: %v", err)
}
handler := NewTemplatesHandler(tmpDir, nil)
handler := NewTemplatesHandler(tmpDir, nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest("GET", "/templates", nil)
@ -555,7 +555,7 @@ skills: []
t.Fatalf("write: %v", err)
}
handler := NewTemplatesHandler(tmpDir, nil)
handler := NewTemplatesHandler(tmpDir, nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest("GET", "/templates", nil)
@ -589,7 +589,7 @@ skills: []
t.Fatalf("write: %v", err)
}
handler := NewTemplatesHandler(tmpDir, nil)
handler := NewTemplatesHandler(tmpDir, nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest("GET", "/templates", nil)
@ -661,7 +661,7 @@ skills: []
log.SetOutput(&logBuf)
defer log.SetOutput(prevOutput)
handler := NewTemplatesHandler(tmpDir, nil)
handler := NewTemplatesHandler(tmpDir, nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest("GET", "/templates", nil)
@ -698,7 +698,7 @@ func TestTemplatesList_NonexistentDir(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
handler := NewTemplatesHandler("/nonexistent/path/to/templates", nil)
handler := NewTemplatesHandler("/nonexistent/path/to/templates", nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@ -723,7 +723,7 @@ func TestListFiles_InvalidRoot(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
handler := NewTemplatesHandler(t.TempDir(), nil)
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@ -748,7 +748,7 @@ func TestListFiles_WorkspaceNotFound(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
handler := NewTemplatesHandler(t.TempDir(), nil)
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
mock.ExpectQuery("SELECT name FROM workspaces WHERE id =").
WithArgs("ws-nonexist").
@ -775,7 +775,7 @@ func TestListFiles_FallbackToHost_NoTemplate(t *testing.T) {
setupTestRedis(t)
tmpDir := t.TempDir()
handler := NewTemplatesHandler(tmpDir, nil) // nil docker = no container
handler := NewTemplatesHandler(tmpDir, nil, nil) // nil docker = no container
mock.ExpectQuery("SELECT name FROM workspaces WHERE id =").
WithArgs("ws-fallback").
@ -815,7 +815,7 @@ func TestListFiles_FallbackToHost_WithTemplate(t *testing.T) {
os.WriteFile(filepath.Join(tmplDir, "config.yaml"), []byte("name: Test Agent\n"), 0644)
os.WriteFile(filepath.Join(tmplDir, "system-prompt.md"), []byte("# prompt"), 0644)
handler := NewTemplatesHandler(tmpDir, nil)
handler := NewTemplatesHandler(tmpDir, nil, nil)
mock.ExpectQuery("SELECT name FROM workspaces WHERE id =").
WithArgs("ws-tmpl").
@ -849,7 +849,7 @@ func TestReadFile_PathTraversal(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
handler := NewTemplatesHandler(t.TempDir(), nil)
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@ -870,7 +870,7 @@ func TestReadFile_InvalidRoot(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
handler := NewTemplatesHandler(t.TempDir(), nil)
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@ -892,7 +892,7 @@ func TestReadFile_WorkspaceNotFound(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
handler := NewTemplatesHandler(t.TempDir(), nil)
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
mock.ExpectQuery(`SELECT name, COALESCE\(instance_id, ''\), COALESCE\(runtime, ''\) FROM workspaces WHERE id =`).
WithArgs("ws-nf").
@ -926,7 +926,7 @@ func TestReadFile_FallbackToHost_Success(t *testing.T) {
os.MkdirAll(tmplDir, 0755)
os.WriteFile(filepath.Join(tmplDir, "config.yaml"), []byte("name: Reader Agent\ntier: 1\n"), 0644)
handler := NewTemplatesHandler(tmpDir, nil)
handler := NewTemplatesHandler(tmpDir, nil, nil)
// instance_id="" → SaaS branch skipped → falls through to local
// Docker / template-dir host fallback (the only path the test
@ -967,7 +967,7 @@ func TestReadFile_FallbackToHost_NotFound(t *testing.T) {
setupTestRedis(t)
tmpDir := t.TempDir()
handler := NewTemplatesHandler(tmpDir, nil)
handler := NewTemplatesHandler(tmpDir, nil, nil)
mock.ExpectQuery(`SELECT name, COALESCE\(instance_id, ''\), COALESCE\(runtime, ''\) FROM workspaces WHERE id =`).
WithArgs("ws-nofile").
@ -999,7 +999,7 @@ func TestWriteFile_PathTraversal(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
handler := NewTemplatesHandler(t.TempDir(), nil)
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@ -1023,7 +1023,7 @@ func TestWriteFile_InvalidBody(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
handler := NewTemplatesHandler(t.TempDir(), nil)
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@ -1046,7 +1046,7 @@ func TestWriteFile_WorkspaceNotFound(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
handler := NewTemplatesHandler(t.TempDir(), nil)
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
mock.ExpectQuery(`SELECT name, COALESCE\(instance_id, ''\), COALESCE\(runtime, ''\) FROM workspaces WHERE id =`).
WithArgs("ws-wf-nf").
@ -1080,7 +1080,7 @@ func TestDeleteFile_PathTraversal(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
handler := NewTemplatesHandler(t.TempDir(), nil)
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@ -1101,7 +1101,7 @@ func TestDeleteFile_WorkspaceNotFound(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
handler := NewTemplatesHandler(t.TempDir(), nil)
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
mock.ExpectQuery("SELECT name FROM workspaces WHERE id =").
WithArgs("ws-del-nf").
@ -1133,7 +1133,7 @@ func TestResolveTemplateDir_ByNormalizedName(t *testing.T) {
tmplDir := filepath.Join(tmpDir, "my-agent")
os.MkdirAll(tmplDir, 0755)
handler := NewTemplatesHandler(tmpDir, nil)
handler := NewTemplatesHandler(tmpDir, nil, nil)
result := handler.resolveTemplateDir("My Agent")
if result != tmplDir {
@ -1143,7 +1143,7 @@ func TestResolveTemplateDir_ByNormalizedName(t *testing.T) {
func TestResolveTemplateDir_NotFound(t *testing.T) {
tmpDir := t.TempDir()
handler := NewTemplatesHandler(tmpDir, nil)
handler := NewTemplatesHandler(tmpDir, nil, nil)
result := handler.resolveTemplateDir("Nonexistent Agent")
if result != "" {
@ -1177,7 +1177,7 @@ func TestCWE78_DeleteFile_TraversalVariants(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
handler := NewTemplatesHandler(t.TempDir(), nil)
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)

View File

@ -148,15 +148,15 @@ func (h *WorkspaceHandler) Create(c *gin.Context) {
id := uuid.New().String()
awarenessNamespace := workspaceAwarenessNamespace(id)
if payload.Tier == 0 {
// Default to T3 ("Privileged"). T3 gives agents a read_write
// workspace mount + Docker daemon access — the level most
// templates need to do real work. Lower tiers (T1 sandboxed,
// T2 standard) stay available as explicit opt-ins for
// low-trust agents. Matches the Canvas CreateWorkspaceDialog
// default for self-hosted hosts (SaaS defaults to T4 via
// CreateWorkspaceDialog because each SaaS workspace runs on
// its own sibling EC2).
payload.Tier = 3
// SaaS-aware default. SaaS → T4 (full host access; each
// workspace runs on its own sibling EC2 so the tier boundary
// is a Docker resource limit on the only container present —
// no neighbour to protect from). Self-hosted → T3 (read-write
// workspace mount + Docker daemon access, most templates'
// baseline). Lower tiers (T1 sandboxed, T2 standard) remain
// explicit opt-ins for low-trust agents. Matches the canvas
// CreateWorkspaceDialog defaults so the API and the UI agree.
payload.Tier = h.DefaultTier()
}
// Detect runtime + default model from template config.yaml when the

View File

@ -35,6 +35,7 @@ import (
"time"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provlog"
)
// HasProvisioner reports whether either backend (CP or local Docker) is
@ -49,6 +50,32 @@ func (h *WorkspaceHandler) HasProvisioner() bool {
return h.cpProv != nil || h.provisioner != nil
}
// IsSaaS reports whether the CP (EC2) provisioner is wired. Each SaaS
// workspace runs on its own sibling EC2, so the per-workspace tier
// boundary is a Docker resource limit applied to the only container
// on that EC2 — there's no neighbour to protect from. Self-hosted
// runs many workspaces in one Docker daemon on a single host, so
// the tier-2-by-default safe-neighbour-share posture stays.
//
// Tier defaults across Create / OrgImport / canvas EmptyState branch
// on IsSaaS so SaaS users get T4 (full host access) by default and
// self-hosted users keep the lower-trust caps.
func (h *WorkspaceHandler) IsSaaS() bool {
return h.cpProv != nil
}
// DefaultTier is the SaaS-aware default tier. T4 on SaaS (single
// container per EC2 — full host access matches the boundary), T3 on
// self-hosted (read-write workspace mount + Docker daemon access,
// most templates' baseline). Callers default to this when the user
// hasn't explicitly picked a tier.
func (h *WorkspaceHandler) DefaultTier() int {
if h.IsSaaS() {
return 4
}
return 3
}
// provisionWorkspaceAuto picks the backend (CP for SaaS, local Docker
// for self-hosted) and starts provisioning in a goroutine. Returns true
// when a backend was kicked off, false when neither is wired.
@ -75,6 +102,14 @@ func (h *WorkspaceHandler) HasProvisioner() bool {
// lives in prepareProvisionContext (shared by both per-backend
// goroutines).
func (h *WorkspaceHandler) provisionWorkspaceAuto(workspaceID, templatePath string, configFiles map[string][]byte, payload models.CreateWorkspacePayload) bool {
provlog.Event("provision.start", map[string]any{
"workspace_id": workspaceID,
"name": payload.Name,
"tier": payload.Tier,
"runtime": payload.Runtime,
"template": payload.Template,
"sync": false,
})
if h.cpProv != nil {
go h.provisionWorkspaceCP(workspaceID, templatePath, configFiles, payload)
return true
@ -110,6 +145,14 @@ func (h *WorkspaceHandler) provisionWorkspaceAuto(workspaceID, templatePath stri
// Keep these two helpers in sync — when one grows a new arm (third
// backend, retry semantics), the other should too.
func (h *WorkspaceHandler) provisionWorkspaceAutoSync(workspaceID, templatePath string, configFiles map[string][]byte, payload models.CreateWorkspacePayload) bool {
provlog.Event("provision.start", map[string]any{
"workspace_id": workspaceID,
"name": payload.Name,
"tier": payload.Tier,
"runtime": payload.Runtime,
"template": payload.Template,
"sync": true,
})
if h.cpProv != nil {
h.provisionWorkspaceCP(workspaceID, templatePath, configFiles, payload)
return true

View File

@ -12,6 +12,7 @@ import (
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provlog"
"github.com/gin-gonic/gin"
)
@ -431,6 +432,16 @@ func coalesceRestart(workspaceID string, cycle func()) {
// NPE'd before reaching the reprovision step — which is why every SaaS dead-
// agent incident pre-this-fix required manual restart from canvas.
func (h *WorkspaceHandler) stopForRestart(ctx context.Context, workspaceID string) {
backend := "none"
if h.provisioner != nil {
backend = "docker"
} else if h.cpProv != nil {
backend = "cp"
}
provlog.Event("restart.pre_stop", map[string]any{
"workspace_id": workspaceID,
"backend": backend,
})
if h.provisioner != nil {
h.provisioner.Stop(ctx, workspaceID)
return

View File

@ -0,0 +1,159 @@
package handlers
import (
"go/ast"
"go/parser"
"go/token"
"os"
"path/filepath"
"sort"
"strconv"
"strings"
"testing"
)
// TestINSERTworkspacesAllowlist enumerates every function in this
// package that emits an `INSERT INTO workspaces (` SQL literal, and
// pins the result against an explicit allowlist. New entries fail the
// build until a reviewer adds them — forcing the question "what
// makes this INSERT idempotent?" at PR-review time, not after the
// next bulk-create leak.
//
// Pairs with TestCreateWorkspaceTree_CallsLookupBeforeInsert (the
// behavior pin for the one bulk path). Together they close the
// regression class: this test catches "did a new function start
// inserting workspaces?", that test catches "did the existing bulk
// path drop its idempotency check?". Either fires immediately when
// drift happens.
//
// Why allowlist rather than pure behavior gate (per memory
// feedback_behavior_based_ast_gates.md): the bulk-create leak class
// is small + stable (1 path today), and a behavior gate would have
// to disambiguate "iterating a YAML array of workspaces" from the
// many other `for ... range` patterns in a Create handler (config
// lines, secrets map, channels). Type-info-aware AST analysis would
// catch the YAML-iteration shape but is heavy. Allowlisting is the
// minimum-viable pin: any PR that adds a new INSERT site is forced
// to pause, add an entry here, and document the safety mechanism in
// the comment alongside.
//
// RFC #2867 class 1.
func TestINSERTworkspacesAllowlist(t *testing.T) {
// expected[key] = safety mechanism. Keep the comment pinned to
// what makes that function safe — if the safety changes, the
// allowlist must be re-reviewed.
expected := map[string]string{
// org_import.createWorkspaceTree: lookupExistingChild
// before INSERT (#2868 phase 3). Also pinned by
// TestCreateWorkspaceTree_CallsLookupBeforeInsert.
"org_import.go:createWorkspaceTree": "lookup-then-insert via lookupExistingChild",
// registry.Register: external workspace registers itself with
// its known UUID; INSERT is idempotent via ON CONFLICT (id)
// DO UPDATE — re-registration upserts, never duplicates.
"registry.go:Register": "ON CONFLICT (id) DO UPDATE",
// workspace.Create: single-workspace POST /workspaces from a
// human or automation. No iteration; payload describes one
// workspace; UUID is server-generated. Caller intent IS to
// create, so no idempotency check is needed.
"workspace.go:Create": "single-workspace POST, server-generated UUID",
}
actual := map[string]string{}
wd, err := os.Getwd()
if err != nil {
t.Fatalf("getwd: %v", err)
}
entries, err := os.ReadDir(wd)
if err != nil {
t.Fatalf("readdir %s: %v", wd, err)
}
for _, ent := range entries {
name := ent.Name()
if ent.IsDir() {
continue
}
if !strings.HasSuffix(name, ".go") {
continue
}
if strings.HasSuffix(name, "_test.go") {
continue
}
path := filepath.Join(wd, name)
fset := token.NewFileSet()
file, err := parser.ParseFile(fset, path, nil, parser.ParseComments)
if err != nil {
t.Fatalf("parse %s: %v", path, err)
}
// For each top-level FuncDecl, walk its body and check for an
// `INSERT INTO workspaces (` SQL literal in any CallExpr arg.
for _, decl := range file.Decls {
fn, ok := decl.(*ast.FuncDecl)
if !ok || fn.Body == nil {
continue
}
var foundInsert bool
ast.Inspect(fn.Body, func(n ast.Node) bool {
lit, ok := n.(*ast.BasicLit)
if !ok || lit.Kind != token.STRING {
return true
}
raw := lit.Value
if unq, err := strconv.Unquote(raw); err == nil {
raw = unq
}
if workspacesInsertRE.MatchString(raw) {
foundInsert = true
return false
}
return true
})
if foundInsert {
key := name + ":" + fn.Name.Name
actual[key] = "(observed via AST walk)"
}
}
}
// Compute set diffs so failures point at the specific drift.
missing := []string{}
unexpected := []string{}
for k := range expected {
if _, ok := actual[k]; !ok {
missing = append(missing, k)
}
}
for k := range actual {
if _, ok := expected[k]; !ok {
unexpected = append(unexpected, k)
}
}
sort.Strings(missing)
sort.Strings(unexpected)
if len(unexpected) > 0 {
t.Errorf(`new function(s) emit `+"`INSERT INTO workspaces (`"+` and aren't in the allowlist:
%s
If this is a legitimate addition, add an entry to expected[] in this test
with the safety mechanism pinned in the comment alongside (lookup-then-
insert / ON CONFLICT / single-workspace path / etc.). The bulk-create
regression class needs explicit per-handler review, not silent drift.
Reference: RFC #2867 class 1, sibling test
TestCreateWorkspaceTree_CallsLookupBeforeInsert.`,
strings.Join(unexpected, "\n "))
}
if len(missing) > 0 {
t.Errorf(`expected function(s) no longer emit `+"`INSERT INTO workspaces (`"+`:
%s
Either the function was renamed/deleted (update the allowlist) or the
INSERT was moved out (verify the new home is also covered). Don't just
delete the entry confirm the safety mechanism is still in place
elsewhere or that the workspace-create path was intentionally
restructured.`,
strings.Join(missing, "\n "))
}
}

View File

@ -5,14 +5,15 @@
//
// Exposed metrics:
//
// molecule_http_requests_total{method,path,status} - counter
// molecule_http_request_duration_seconds{method,path} - counter (sum, for avg rate)
// molecule_websocket_connections_active - gauge
// go_goroutines - gauge
// go_memstats_alloc_bytes - gauge
// go_memstats_sys_bytes - gauge
// go_memstats_heap_inuse_bytes - gauge
// go_gc_duration_seconds_total - counter
// molecule_http_requests_total{method,path,status} - counter
// molecule_http_request_duration_seconds{method,path} - counter (sum, for avg rate)
// molecule_websocket_connections_active - gauge
// molecule_pending_uploads_swept_total{outcome} - counter (acked|expired|error)
// go_goroutines - gauge
// go_memstats_alloc_bytes - gauge
// go_memstats_sys_bytes - gauge
// go_memstats_heap_inuse_bytes - gauge
// go_gc_duration_seconds_total - counter
package metrics
import (
@ -38,6 +39,12 @@ var (
reqCounts = map[reqKey]int64{} // molecule_http_requests_total
reqDurSums = map[reqKey]float64{} // sum of durations (seconds)
activeWSConns int64 // molecule_websocket_connections_active
// pendinguploads sweeper counters — atomic so the sweeper goroutine
// doesn't contend with the /metrics handler.
pendingUploadsSweptAcked int64 // molecule_pending_uploads_swept_total{outcome="acked"}
pendingUploadsSweptExpired int64 // molecule_pending_uploads_swept_total{outcome="expired"}
pendingUploadsSweepErrors int64 // molecule_pending_uploads_swept_total{outcome="error"}
)
// Middleware records per-request counts and latency.
@ -76,6 +83,50 @@ func TrackWSConnect() { atomic.AddInt64(&activeWSConns, 1) }
// Call from the WebSocket disconnect / cleanup path.
func TrackWSDisconnect() { atomic.AddInt64(&activeWSConns, -1) }
// phantomBusyResets is the cumulative count of workspace rows the
// phantom-busy sweep reset (active_tasks=0 → active_tasks=0+counter
// cleared). Surfaced as molecule_phantom_busy_resets_total — a high
// reset rate signals a regression in task-lifecycle accounting (most
// often: missing env vars cause claude --print to time out, the
// agent loop never decrements active_tasks, and the sweep cleans up
// the counter ~10 min later). Issue #2865.
var phantomBusyResets int64
// TrackPhantomBusyReset increments the phantom-busy reset counter.
// Called from sweepPhantomBusy in workspace-server/internal/scheduler/
// after each row whose active_tasks was reset to 0. Idempotent +
// goroutine-safe; called once per row per sweep tick.
func TrackPhantomBusyReset() { atomic.AddInt64(&phantomBusyResets, 1) }
// PendingUploadsSwept records a successful sweep cycle. acked/expired
// are added to the per-outcome counters so dashboards can spot the
// stuck-fetch pattern (high expired, low acked) vs healthy churn.
func PendingUploadsSwept(acked, expired int) {
if acked > 0 {
atomic.AddInt64(&pendingUploadsSweptAcked, int64(acked))
}
if expired > 0 {
atomic.AddInt64(&pendingUploadsSweptExpired, int64(expired))
}
}
// PendingUploadsSweepError records a sweeper-cycle failure (transient
// DB error etc). Counted separately so the rate of errored sweeps is
// observable independent of how many rows the successful sweeps deleted.
func PendingUploadsSweepError() {
atomic.AddInt64(&pendingUploadsSweepErrors, 1)
}
// PendingUploadsSweepCounts returns the current (acked, expired, error)
// totals. Exposed for tests that need a deterministic delta probe of
// the sweeper's metric writes — the /metrics endpoint is the production
// observability surface; this is a unit-test escape hatch.
func PendingUploadsSweepCounts() (acked, expired, errored int64) {
return atomic.LoadInt64(&pendingUploadsSweptAcked),
atomic.LoadInt64(&pendingUploadsSweptExpired),
atomic.LoadInt64(&pendingUploadsSweepErrors)
}
// Handler returns a Gin handler that serialises all collected metrics in
// Prometheus text exposition format (v0.0.4). Mount this at GET /metrics.
func Handler() gin.HandlerFunc {
@ -144,6 +195,21 @@ func Handler() gin.HandlerFunc {
writeln(w, "# HELP molecule_websocket_connections_active Number of active WebSocket connections.")
writeln(w, "# TYPE molecule_websocket_connections_active gauge")
fmt.Fprintf(w, "molecule_websocket_connections_active %d\n", atomic.LoadInt64(&activeWSConns))
// ── Molecule AI scheduler ──────────────────────────────────────────────
writeln(w, "# HELP molecule_phantom_busy_resets_total Cumulative count of workspace rows reset by the phantom-busy sweep (active_tasks cleared after >10 min of activity_log silence). High reset rate signals task-lifecycle accounting regressions — see issue #2865.")
writeln(w, "# TYPE molecule_phantom_busy_resets_total counter")
fmt.Fprintf(w, "molecule_phantom_busy_resets_total %d\n", atomic.LoadInt64(&phantomBusyResets))
// ── Pending-uploads sweeper ────────────────────────────────────────────
writeln(w, "# HELP molecule_pending_uploads_swept_total Pending-uploads rows deleted by the GC sweeper, by outcome.")
writeln(w, "# TYPE molecule_pending_uploads_swept_total counter")
fmt.Fprintf(w, "molecule_pending_uploads_swept_total{outcome=\"acked\"} %d\n",
atomic.LoadInt64(&pendingUploadsSweptAcked))
fmt.Fprintf(w, "molecule_pending_uploads_swept_total{outcome=\"expired\"} %d\n",
atomic.LoadInt64(&pendingUploadsSweptExpired))
fmt.Fprintf(w, "molecule_pending_uploads_swept_total{outcome=\"error\"} %d\n",
atomic.LoadInt64(&pendingUploadsSweepErrors))
}
}

View File

@ -0,0 +1,104 @@
package metrics
// Tests for the phantom-busy reset counter wired up by issue #2865.
// The counter is exposed at /metrics as
// molecule_phantom_busy_resets_total. A high steady-state value
// signals task-lifecycle accounting regressions in the agent loop —
// see scheduler.sweepPhantomBusy for the writer.
import (
"net/http/httptest"
"strings"
"sync"
"sync/atomic"
"testing"
"github.com/gin-gonic/gin"
)
// resetForTest zeroes the counter so a single test's TrackPhantomBusyReset
// calls don't compound onto a previous test's run. metrics.go's package-
// level state means every test that touches the counter must reset.
func resetForTest() {
atomic.StoreInt64(&phantomBusyResets, 0)
}
func TestTrackPhantomBusyReset_IncrementsCounter(t *testing.T) {
resetForTest()
for i := 0; i < 7; i++ {
TrackPhantomBusyReset()
}
got := atomic.LoadInt64(&phantomBusyResets)
if got != 7 {
t.Errorf("counter after 7 calls = %d, want 7", got)
}
}
func TestTrackPhantomBusyReset_RaceFreeUnderConcurrentWrites(t *testing.T) {
resetForTest()
var wg sync.WaitGroup
const goroutines = 50
const callsPerGoroutine = 200
wg.Add(goroutines)
for i := 0; i < goroutines; i++ {
go func() {
defer wg.Done()
for j := 0; j < callsPerGoroutine; j++ {
TrackPhantomBusyReset()
}
}()
}
wg.Wait()
want := int64(goroutines * callsPerGoroutine)
got := atomic.LoadInt64(&phantomBusyResets)
if got != want {
t.Errorf("counter under concurrent writes = %d, want %d (lost increments → atomic broken)",
got, want)
}
}
func TestHandler_ExposesPhantomBusyResetsCounter(t *testing.T) {
resetForTest()
for i := 0; i < 3; i++ {
TrackPhantomBusyReset()
}
gin.SetMode(gin.TestMode)
r := gin.New()
r.GET("/metrics", Handler())
w := httptest.NewRecorder()
req := httptest.NewRequest("GET", "/metrics", nil)
r.ServeHTTP(w, req)
body := w.Body.String()
// HELP + TYPE lines must precede the metric (Prometheus text exposition format).
if !strings.Contains(body, "# HELP molecule_phantom_busy_resets_total") {
t.Errorf("metrics output missing HELP line for molecule_phantom_busy_resets_total:\n%s", body)
}
if !strings.Contains(body, "# TYPE molecule_phantom_busy_resets_total counter") {
t.Errorf("metrics output missing TYPE line for molecule_phantom_busy_resets_total:\n%s", body)
}
if !strings.Contains(body, "molecule_phantom_busy_resets_total 3\n") {
t.Errorf("metrics output missing counter value 3:\n%s", body)
}
}
func TestHandler_PhantomBusyResetsZeroByDefault(t *testing.T) {
// Fresh process should report 0 — pin the contract so a future
// refactor that lazy-inits the counter to nil doesn't silently
// drop the metric from /metrics.
resetForTest()
gin.SetMode(gin.TestMode)
r := gin.New()
r.GET("/metrics", Handler())
w := httptest.NewRecorder()
req := httptest.NewRequest("GET", "/metrics", nil)
r.ServeHTTP(w, req)
if !strings.Contains(w.Body.String(), "molecule_phantom_busy_resets_total 0\n") {
t.Errorf("metric must report 0 by default:\n%s", w.Body.String())
}
}

View File

@ -0,0 +1,17 @@
package pendinguploads
import (
"context"
"time"
)
// StartSweeperWithIntervalForTest exposes startSweeperWithInterval to
// the external test package. The production code uses StartSweeper
// (which pins the canonical SweepInterval); tests pin a short interval
// to exercise the ticker-driven cycle without burning real wall-clock
// time. The Go convention `export_test.go` keeps this seam OUT of the
// production binary — files ending in _test.go are stripped at build
// time, so this re-export only exists during `go test`.
func StartSweeperWithIntervalForTest(ctx context.Context, storage Storage, ackRetention, interval time.Duration) {
startSweeperWithInterval(ctx, storage, ackRetention, interval)
}

View File

@ -72,6 +72,28 @@ type Record struct {
ExpiresAt time.Time
}
// SweepResult is the per-cycle accounting from Sweep. Both counts are
// non-negative; Total is just Acked + Expired for log/metrics
// convenience. Phase 3 metrics expose these as separate counters so
// dashboards can spot a stuck-ack pattern (high Expired, low Acked) vs.
// healthy churn (Acked dominates).
type SweepResult struct {
Acked int // rows deleted because acked_at + retention elapsed
Expired int // rows deleted because expires_at < now AND never acked
}
// Total returns the sum of Acked + Expired — convenient for log lines.
func (r SweepResult) Total() int { return r.Acked + r.Expired }
// PutItem is one file in a PutBatch call. Same per-field rules as Put —
// empty content, missing filename, or content > MaxFileBytes is rejected
// up-front so a bad item in the batch doesn't poison the transaction.
type PutItem struct {
Content []byte
Filename string
Mimetype string
}
// Storage is the platform-side persistence boundary for poll-mode chat
// uploads. The Postgres implementation backs all callers today; an S3-
// backed implementation can drop in once RFC #2789 lands by making
@ -86,6 +108,17 @@ type Storage interface {
// content > MaxFileBytes return errors before any DB write.
Put(ctx context.Context, workspaceID uuid.UUID, content []byte, filename, mimetype string) (uuid.UUID, error)
// PutBatch inserts N uploads atomically — either all rows commit or
// none do. Returns assigned file_ids in input order on success;
// returns an error and does NOT insert any row on failure.
//
// Use this from multi-file upload handlers so a per-row failure on
// row K doesn't leave rows 1..K-1 orphaned in the table (a client
// retry would then double-insert them on success). All-or-nothing
// semantics match the multipart request the canvas sends — either
// the whole batch succeeds or the user re-uploads.
PutBatch(ctx context.Context, workspaceID uuid.UUID, items []PutItem) ([]uuid.UUID, error)
// Get returns the full row including content. Returns ErrNotFound
// when the row is absent, acked, or past expires_at. Caller should
// not differentiate the three cases in the response — from the
@ -103,6 +136,18 @@ type Storage interface {
// absent or already expired; on already-acked, returns nil so
// the workspace's at-least-once retry succeeds without an error.
Ack(ctx context.Context, fileID uuid.UUID) error
// Sweep deletes rows past their retention window:
// - acked rows older than ackRetention (give the workspace a
// window to re-fetch in case it processed but failed to write
// the file before crashing — at-least-once behavior).
// - unacked rows past expires_at (the platform's hard TTL — 24h
// by default; a workspace that hasn't fetched by then is
// considered dead from the upload's perspective).
// Returns the per-category deletion counts for observability.
// Errors are surfaced to the caller; a transient DB error must NOT
// crash the sweeper loop (it just retries on the next tick).
Sweep(ctx context.Context, ackRetention time.Duration) (SweepResult, error)
}
// PostgresStorage is the production Storage implementation backed by
@ -149,6 +194,64 @@ func (p *PostgresStorage) Put(ctx context.Context, workspaceID uuid.UUID, conten
return fileID, nil
}
// PutBatch inserts every item atomically inside a single Tx. On any
// per-item validation or per-row INSERT error the Tx is rolled back and
// the caller sees the error without any rows committed — no partial
// orphans for a multi-file upload that fails mid-batch.
//
// Validation runs BEFORE BEGIN so a bad input shape (empty content,
// over-cap size) doesn't even open a Tx. Once we're in the Tx, the only
// failures expected are DB-side (broken connection, statement timeout)
// — those abort cleanly via Rollback.
func (p *PostgresStorage) PutBatch(ctx context.Context, workspaceID uuid.UUID, items []PutItem) ([]uuid.UUID, error) {
if len(items) == 0 {
return nil, nil
}
for i, it := range items {
if len(it.Content) == 0 {
return nil, fmt.Errorf("pendinguploads: item %d: empty content", i)
}
if len(it.Content) > MaxFileBytes {
return nil, ErrTooLarge
}
if it.Filename == "" {
return nil, fmt.Errorf("pendinguploads: item %d: empty filename", i)
}
if len(it.Filename) > 100 {
return nil, fmt.Errorf("pendinguploads: item %d: filename exceeds 100 chars", i)
}
}
tx, err := p.db.BeginTx(ctx, nil)
if err != nil {
return nil, fmt.Errorf("pendinguploads: begin tx: %w", err)
}
// Defer-rollback is safe even after a successful Commit — the second
// Rollback is a no-op (database/sql tracks tx state).
defer func() {
_ = tx.Rollback()
}()
out := make([]uuid.UUID, 0, len(items))
for i, it := range items {
var fid uuid.UUID
err := tx.QueryRowContext(ctx, `
INSERT INTO pending_uploads (workspace_id, content, size_bytes, filename, mimetype)
VALUES ($1, $2, $3, $4, $5)
RETURNING file_id
`, workspaceID, it.Content, int64(len(it.Content)), it.Filename, it.Mimetype).Scan(&fid)
if err != nil {
return nil, fmt.Errorf("pendinguploads: batch insert item %d: %w", i, err)
}
out = append(out, fid)
}
if err := tx.Commit(); err != nil {
return nil, fmt.Errorf("pendinguploads: commit batch: %w", err)
}
return out, nil
}
func (p *PostgresStorage) Get(ctx context.Context, fileID uuid.UUID) (Record, error) {
// The expires_at + acked_at filter in the WHERE clause means a
// caller sees ErrNotFound for absent / acked / expired without
@ -251,3 +354,41 @@ func (p *PostgresStorage) Ack(ctx context.Context, fileID uuid.UUID) error {
// the workspace's intent ("I'm done with this file") was honored.
return nil
}
// Sweep deletes acked rows past their retention window plus any
// unacked rows whose hard TTL has elapsed. Single round-trip: a CTE
// captures the deletion in one DELETE … RETURNING and the outer
// SELECT sums by category. Cheaper and tighter than two round trips,
// and atomic w.r.t. concurrent writes (the WHERE predicate sees a
// consistent snapshot via Postgres MVCC).
//
// ackRetention=0 deletes all acked rows immediately; values <0 are
// clamped to 0 for safety. Caller defaults are documented at
// StartSweeper's DefaultAckRetention.
func (p *PostgresStorage) Sweep(ctx context.Context, ackRetention time.Duration) (SweepResult, error) {
if ackRetention < 0 {
ackRetention = 0
}
// make_interval expects integer seconds — Postgres accepts a
// floating point but we deliberately round to the nearest second
// so test fixtures pin a deterministic value across PG versions.
retentionSecs := int64(ackRetention.Seconds())
var acked, expired int
err := p.db.QueryRowContext(ctx, `
WITH deleted AS (
DELETE FROM pending_uploads
WHERE (acked_at IS NOT NULL AND acked_at < now() - make_interval(secs => $1))
OR (acked_at IS NULL AND expires_at < now())
RETURNING (acked_at IS NOT NULL) AS was_acked
)
SELECT
COALESCE(SUM(CASE WHEN was_acked THEN 1 ELSE 0 END), 0)::int AS acked,
COALESCE(SUM(CASE WHEN NOT was_acked THEN 1 ELSE 0 END), 0)::int AS expired
FROM deleted
`, retentionSecs).Scan(&acked, &expired)
if err != nil {
return SweepResult{}, fmt.Errorf("pendinguploads: sweep: %w", err)
}
return SweepResult{Acked: acked, Expired: expired}, nil
}

View File

@ -71,6 +71,18 @@ const (
SELECT acked_at FROM pending_uploads
WHERE file_id = $1 AND expires_at > now()
`
sweepSQL = `
WITH deleted AS (
DELETE FROM pending_uploads
WHERE (acked_at IS NOT NULL AND acked_at < now() - make_interval(secs => $1))
OR (acked_at IS NULL AND expires_at < now())
RETURNING (acked_at IS NOT NULL) AS was_acked
)
SELECT
COALESCE(SUM(CASE WHEN was_acked THEN 1 ELSE 0 END), 0)::int AS acked,
COALESCE(SUM(CASE WHEN NOT was_acked THEN 1 ELSE 0 END), 0)::int AS expired
FROM deleted
`
)
// ----- Put ------------------------------------------------------------------
@ -398,3 +410,324 @@ func TestAck_DBErrorOnDisambiguate_Wrapped(t *testing.T) {
t.Fatalf("expected wrapped disambiguate error, got %v", err)
}
}
// ----- Sweep ----------------------------------------------------------------
func TestSweep_DeletesAckedAndExpired_ReturnsCounts(t *testing.T) {
db, mock := newMockDB(t)
store := pendinguploads.NewPostgres(db)
mock.ExpectQuery(sweepSQL).
WithArgs(int64(3600)). // 1h retention
WillReturnRows(sqlmock.NewRows([]string{"acked", "expired"}).AddRow(7, 2))
res, err := store.Sweep(context.Background(), time.Hour)
if err != nil {
t.Fatalf("Sweep: %v", err)
}
if res.Acked != 7 || res.Expired != 2 || res.Total() != 9 {
t.Errorf("got %+v want acked=7 expired=2 total=9", res)
}
}
func TestSweep_NothingToDelete_ReturnsZero(t *testing.T) {
db, mock := newMockDB(t)
store := pendinguploads.NewPostgres(db)
mock.ExpectQuery(sweepSQL).
WithArgs(int64(3600)).
WillReturnRows(sqlmock.NewRows([]string{"acked", "expired"}).AddRow(0, 0))
res, err := store.Sweep(context.Background(), time.Hour)
if err != nil {
t.Fatalf("Sweep: %v", err)
}
if res.Total() != 0 {
t.Errorf("got %+v, want zero result", res)
}
}
func TestSweep_NegativeRetentionClampedToZero(t *testing.T) {
db, mock := newMockDB(t)
store := pendinguploads.NewPostgres(db)
// Negative retention must clamp to 0; the SQL gets `secs => 0` so an
// acked-just-now row is eligible for deletion immediately. Pinned
// here because passing the raw negative through `make_interval` would
// silently shift acked_at → future and effectively retain rows
// forever — exactly the wrong behavior for a "delete more aggressively"
// caller.
mock.ExpectQuery(sweepSQL).
WithArgs(int64(0)).
WillReturnRows(sqlmock.NewRows([]string{"acked", "expired"}).AddRow(3, 0))
res, err := store.Sweep(context.Background(), -1*time.Second)
if err != nil {
t.Fatalf("Sweep: %v", err)
}
if res.Acked != 3 {
t.Errorf("got %+v want acked=3", res)
}
}
func TestSweep_ZeroRetentionImmediatelyDeletesAcked(t *testing.T) {
db, mock := newMockDB(t)
store := pendinguploads.NewPostgres(db)
mock.ExpectQuery(sweepSQL).
WithArgs(int64(0)).
WillReturnRows(sqlmock.NewRows([]string{"acked", "expired"}).AddRow(5, 1))
res, err := store.Sweep(context.Background(), 0)
if err != nil {
t.Fatalf("Sweep: %v", err)
}
if res.Acked != 5 || res.Expired != 1 {
t.Errorf("got %+v want acked=5 expired=1", res)
}
}
func TestSweep_DBError_Wrapped(t *testing.T) {
db, mock := newMockDB(t)
store := pendinguploads.NewPostgres(db)
mock.ExpectQuery(sweepSQL).
WithArgs(int64(60)).
WillReturnError(errors.New("connection lost"))
_, err := store.Sweep(context.Background(), time.Minute)
if err == nil || !strings.Contains(err.Error(), "sweep") {
t.Fatalf("expected wrapped sweep error, got %v", err)
}
}
func TestSweepResult_TotalSumsCounts(t *testing.T) {
r := pendinguploads.SweepResult{Acked: 4, Expired: 3}
if r.Total() != 7 {
t.Errorf("Total = %d, want 7", r.Total())
}
z := pendinguploads.SweepResult{}
if z.Total() != 0 {
t.Errorf("zero Total = %d, want 0", z.Total())
}
}
// ----- PutBatch -------------------------------------------------------------
//
// PutBatch is the multi-file atomic insert path used by uploadPollMode in
// chat_files.go. The contract that callers rely on:
//
// - Either ALL rows commit, or NONE do — a per-row INSERT failure must
// leave the table unchanged (no orphaned rows from a half-applied batch).
// - Per-item validation runs BEFORE the Tx opens so a bad input shape
// never wastes a BEGIN round-trip.
// - Returned []uuid.UUID is in input order — handler maps response back
// to the multipart Files[i].
//
// sqlmock's ExpectBegin / ExpectQuery / ExpectCommit / ExpectRollback let us
// pin the exact tx-lifecycle shape; if a future refactor swaps Begin for
// BeginTx-with-options, the test fails until we re-pin.
func TestPutBatch_HappyPath_AllCommitInOrder(t *testing.T) {
db, mock := newMockDB(t)
store := pendinguploads.NewPostgres(db)
wsID := uuid.New()
id1, id2, id3 := uuid.New(), uuid.New(), uuid.New()
mock.ExpectBegin()
mock.ExpectQuery(insertSQL).
WithArgs(wsID, []byte("aaa"), int64(3), "a.txt", "text/plain").
WillReturnRows(sqlmock.NewRows([]string{"file_id"}).AddRow(id1))
mock.ExpectQuery(insertSQL).
WithArgs(wsID, []byte("bbbb"), int64(4), "b.bin", "application/octet-stream").
WillReturnRows(sqlmock.NewRows([]string{"file_id"}).AddRow(id2))
mock.ExpectQuery(insertSQL).
WithArgs(wsID, []byte("ccccc"), int64(5), "c.pdf", "application/pdf").
WillReturnRows(sqlmock.NewRows([]string{"file_id"}).AddRow(id3))
mock.ExpectCommit()
// Rollback after Commit is a no-op in database/sql; sqlmock allows it
// when ExpectCommit was already matched, so we don't need to expect it.
got, err := store.PutBatch(context.Background(), wsID, []pendinguploads.PutItem{
{Content: []byte("aaa"), Filename: "a.txt", Mimetype: "text/plain"},
{Content: []byte("bbbb"), Filename: "b.bin", Mimetype: "application/octet-stream"},
{Content: []byte("ccccc"), Filename: "c.pdf", Mimetype: "application/pdf"},
})
if err != nil {
t.Fatalf("PutBatch: %v", err)
}
if len(got) != 3 || got[0] != id1 || got[1] != id2 || got[2] != id3 {
t.Errorf("ids out of order or missing: got %v want [%s %s %s]", got, id1, id2, id3)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("expectations: %v", err)
}
}
func TestPutBatch_EmptyItems_NoTxNoError(t *testing.T) {
db, _ := newMockDB(t) // zero expectations — must NOT round-trip
store := pendinguploads.NewPostgres(db)
got, err := store.PutBatch(context.Background(), uuid.New(), nil)
if err != nil {
t.Fatalf("expected nil error on empty batch, got %v", err)
}
if got != nil {
t.Errorf("expected nil ids on empty batch, got %v", got)
}
}
func TestPutBatch_RejectsEmptyContent_NoTx(t *testing.T) {
db, _ := newMockDB(t)
store := pendinguploads.NewPostgres(db)
_, err := store.PutBatch(context.Background(), uuid.New(), []pendinguploads.PutItem{
{Content: []byte("ok"), Filename: "a.txt"},
{Content: nil, Filename: "b.txt"},
})
if err == nil || !strings.Contains(err.Error(), "item 1") || !strings.Contains(err.Error(), "empty content") {
t.Fatalf("expected item-1 empty-content error, got %v", err)
}
}
func TestPutBatch_RejectsOversize_ReturnsErrTooLarge(t *testing.T) {
db, _ := newMockDB(t)
store := pendinguploads.NewPostgres(db)
too := make([]byte, pendinguploads.MaxFileBytes+1)
_, err := store.PutBatch(context.Background(), uuid.New(), []pendinguploads.PutItem{
{Content: []byte("ok"), Filename: "small.txt"},
{Content: too, Filename: "huge.bin"},
})
if !errors.Is(err, pendinguploads.ErrTooLarge) {
t.Fatalf("expected ErrTooLarge, got %v", err)
}
}
func TestPutBatch_RejectsEmptyFilename_NoTx(t *testing.T) {
db, _ := newMockDB(t)
store := pendinguploads.NewPostgres(db)
_, err := store.PutBatch(context.Background(), uuid.New(), []pendinguploads.PutItem{
{Content: []byte("hi"), Filename: ""},
})
if err == nil || !strings.Contains(err.Error(), "item 0") || !strings.Contains(err.Error(), "empty filename") {
t.Fatalf("expected item-0 empty-filename error, got %v", err)
}
}
func TestPutBatch_RejectsLongFilename_NoTx(t *testing.T) {
db, _ := newMockDB(t)
store := pendinguploads.NewPostgres(db)
long := strings.Repeat("z", 101)
_, err := store.PutBatch(context.Background(), uuid.New(), []pendinguploads.PutItem{
{Content: []byte("hi"), Filename: "ok.txt"},
{Content: []byte("hi"), Filename: long},
})
if err == nil || !strings.Contains(err.Error(), "item 1") || !strings.Contains(err.Error(), "exceeds 100 chars") {
t.Fatalf("expected item-1 too-long-filename error, got %v", err)
}
}
func TestPutBatch_BeginTxError_Wrapped(t *testing.T) {
db, mock := newMockDB(t)
store := pendinguploads.NewPostgres(db)
mock.ExpectBegin().WillReturnError(errors.New("conn refused"))
_, err := store.PutBatch(context.Background(), uuid.New(), []pendinguploads.PutItem{
{Content: []byte("hi"), Filename: "a.txt"},
})
if err == nil || !strings.Contains(err.Error(), "begin tx") {
t.Fatalf("expected wrapped begin-tx error, got %v", err)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("expectations: %v", err)
}
}
func TestPutBatch_RollsBackOnPerRowError_NoCommit(t *testing.T) {
// First INSERT succeeds, second errors. PutBatch MUST NOT issue
// Commit; the deferred Rollback unwinds row 1 so neither row commits.
// This is the contract that prevents orphan rows on a failed batch.
db, mock := newMockDB(t)
store := pendinguploads.NewPostgres(db)
wsID := uuid.New()
id1 := uuid.New()
mock.ExpectBegin()
mock.ExpectQuery(insertSQL).
WithArgs(wsID, []byte("aaa"), int64(3), "a.txt", "").
WillReturnRows(sqlmock.NewRows([]string{"file_id"}).AddRow(id1))
mock.ExpectQuery(insertSQL).
WithArgs(wsID, []byte("bb"), int64(2), "b.txt", "").
WillReturnError(errors.New("statement timeout"))
// Critical: Rollback expected, NOT Commit. If a future refactor
// accidentally swallows the per-row error and Commits anyway, this
// test fails because the unmet ExpectCommit-vs-Rollback shape diverges.
mock.ExpectRollback()
_, err := store.PutBatch(context.Background(), wsID, []pendinguploads.PutItem{
{Content: []byte("aaa"), Filename: "a.txt"},
{Content: []byte("bb"), Filename: "b.txt"},
})
if err == nil || !strings.Contains(err.Error(), "batch insert item 1") {
t.Fatalf("expected wrapped per-row insert error, got %v", err)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("expectations (must rollback, no commit): %v", err)
}
}
func TestPutBatch_RollsBackOnFirstRowError(t *testing.T) {
// Edge case: very first INSERT fails. No rows ever staged — but the
// Tx still needs to roll back to release the snapshot.
db, mock := newMockDB(t)
store := pendinguploads.NewPostgres(db)
wsID := uuid.New()
mock.ExpectBegin()
mock.ExpectQuery(insertSQL).
WithArgs(wsID, []byte("oops"), int64(4), "a.txt", "").
WillReturnError(errors.New("constraint violation"))
mock.ExpectRollback()
_, err := store.PutBatch(context.Background(), wsID, []pendinguploads.PutItem{
{Content: []byte("oops"), Filename: "a.txt"},
})
if err == nil || !strings.Contains(err.Error(), "batch insert item 0") {
t.Fatalf("expected wrapped item-0 insert error, got %v", err)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("expectations: %v", err)
}
}
func TestPutBatch_CommitError_Wrapped(t *testing.T) {
// Commit fails after every INSERT succeeded. Postgres has already
// rolled back the Tx by this point; we surface the error so the
// handler returns 500 and the client retries.
db, mock := newMockDB(t)
store := pendinguploads.NewPostgres(db)
wsID := uuid.New()
id1 := uuid.New()
mock.ExpectBegin()
mock.ExpectQuery(insertSQL).
WithArgs(wsID, []byte("hi"), int64(2), "a.txt", "").
WillReturnRows(sqlmock.NewRows([]string{"file_id"}).AddRow(id1))
mock.ExpectCommit().WillReturnError(errors.New("commit broken"))
_, err := store.PutBatch(context.Background(), wsID, []pendinguploads.PutItem{
{Content: []byte("hi"), Filename: "a.txt"},
})
if err == nil || !strings.Contains(err.Error(), "commit batch") {
t.Fatalf("expected wrapped commit error, got %v", err)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("expectations: %v", err)
}
}

View File

@ -0,0 +1,129 @@
// sweeper.go — periodic GC for the pending_uploads table.
//
// The platform's poll-mode chat-upload handler creates a row in
// pending_uploads for every chat-attached file the canvas sends to a
// poll-mode workspace. The workspace's inbox poller fetches the bytes
// and acks the row, but two failure modes leak rows long-term:
//
// 1. Workspace fetches but never acks (network hiccup between GET
// /content and POST /ack; workspace crashed between the two).
// Phase 1's Get refuses to re-serve an acked row, but a never-
// acked row could in principle be fetched repeatedly until expires_at.
// Phase 2's workspace-side fetcher is idempotent; the worry is
// only disk usage on the platform side.
//
// 2. Workspace never fetches at all (workspace was offline when the
// row was written; the upload's TTL elapsed).
//
// This sweeper handles both. It runs every SweepInterval, deletes rows
// in either category, and emits structured logs + Prometheus counters
// so a stuck-fetch dashboard can spot the leak class.
//
// Failure isolation: a transient DB error must NOT crash the sweeper.
// We log + continue; the next tick retries. ctx cancellation cleanly
// shuts the loop down for graceful shutdown.
package pendinguploads
import (
"context"
"log"
"time"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/metrics"
)
// SweepInterval is the cadence of the GC loop. 5 minutes is a balance
// between "rows reaped quickly enough that disk usage doesn't surprise
// anyone" and "we don't pay a DELETE round-trip every 30 seconds when
// there are no candidates." Aligned with other low-priority sweepers
// (registry/orphan_sweeper runs at 60s but operates on Docker — much
// more expensive per cycle than a single indexed DELETE).
const SweepInterval = 5 * time.Minute
// DefaultAckRetention is how long an acked row sticks around before the
// sweeper deletes it. 1 hour gives the workspace enough time to retry
// the GET if its first fetch crashed mid-write — at-least-once handoff
// without leaking content for a full 24h after the workspace already
// has a copy.
const DefaultAckRetention = 1 * time.Hour
// sweepDeadline bounds a single sweep cycle. A daemon at the edge of
// timeout shouldn't pile up goroutines; 30s is generous for a single
// indexed DELETE on a table that should rarely have more than a few
// thousand rows in flight.
const sweepDeadline = 30 * time.Second
// StartSweeper runs the GC loop until ctx is cancelled. nil storage
// makes the loop a no-op (matches the handlers' tolerance for an
// unconfigured pendinguploads — some test harnesses run without the
// storage wired).
//
// Pass ackRetention=0 to use DefaultAckRetention. Negative values are
// clamped at the storage layer.
//
// Production callers use SweepInterval (5m). Tests use a short interval
// to exercise the ticker-driven sweep path without burning real wall-
// clock time.
func StartSweeper(ctx context.Context, storage Storage, ackRetention time.Duration) {
startSweeperWithInterval(ctx, storage, ackRetention, SweepInterval)
}
// startSweeperWithInterval is the test-friendly variant of StartSweeper
// — same loop, but the cadence is caller-specified. Production code
// should use StartSweeper to keep the SweepInterval constant pinned.
func startSweeperWithInterval(ctx context.Context, storage Storage, ackRetention, interval time.Duration) {
if storage == nil {
log.Println("pendinguploads sweeper: storage is nil — sweeper disabled")
return
}
if ackRetention == 0 {
ackRetention = DefaultAckRetention
}
log.Printf(
"pendinguploads sweeper started — sweeping every %s; ack retention %s",
interval, ackRetention,
)
ticker := time.NewTicker(interval)
defer ticker.Stop()
// Run once immediately so a platform restart cleans up any rows
// that became eligible while we were down — don't make the
// operator wait 5 minutes for the first sweep.
sweepOnce(ctx, storage, ackRetention)
for {
select {
case <-ctx.Done():
log.Println("pendinguploads sweeper: shutdown")
return
case <-ticker.C:
sweepOnce(ctx, storage, ackRetention)
}
}
}
func sweepOnce(parent context.Context, storage Storage, ackRetention time.Duration) {
ctx, cancel := context.WithTimeout(parent, sweepDeadline)
defer cancel()
res, err := storage.Sweep(ctx, ackRetention)
if err != nil {
// Transient errors: log + continue. The next tick retries; if
// the DB is genuinely down, the rest of the platform is also
// broken and disk usage is the least of the operator's
// problems.
log.Printf("pendinguploads sweeper: Sweep failed: %v", err)
metrics.PendingUploadsSweepError()
return
}
metrics.PendingUploadsSwept(res.Acked, res.Expired)
if res.Total() > 0 {
// Per-cycle structured-ish log (one line per cycle that did
// something). Quiet by design — most cycles delete zero rows
// on a healthy system, and a stream of empty-result lines
// would drown the production log without surfacing a signal.
log.Printf(
"pendinguploads sweeper: deleted acked=%d expired=%d total=%d",
res.Acked, res.Expired, res.Total(),
)
}
}

View File

@ -0,0 +1,294 @@
package pendinguploads_test
import (
"context"
"errors"
"sync/atomic"
"testing"
"time"
"github.com/google/uuid"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/metrics"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/pendinguploads"
)
// fakeSweepStorage is a minimal Storage that records every Sweep call
// and lets each test inject the per-cycle return values. The other
// methods are no-ops — the sweeper goroutine never calls them.
type fakeSweepStorage struct {
calls atomic.Int64
results []pendinguploads.SweepResult
errs []error
cycleDone chan struct{} // closed after each Sweep call (test sync)
gotRetention atomic.Int64 // last ackRetention seen, in seconds
}
func newFakeSweepStorage(results []pendinguploads.SweepResult, errs []error) *fakeSweepStorage {
return &fakeSweepStorage{
results: results,
errs: errs,
cycleDone: make(chan struct{}, 16),
}
}
func (f *fakeSweepStorage) Put(_ context.Context, _ uuid.UUID, _ []byte, _, _ string) (uuid.UUID, error) {
return uuid.Nil, errors.New("not used")
}
func (f *fakeSweepStorage) Get(_ context.Context, _ uuid.UUID) (pendinguploads.Record, error) {
return pendinguploads.Record{}, errors.New("not used")
}
func (f *fakeSweepStorage) MarkFetched(_ context.Context, _ uuid.UUID) error {
return errors.New("not used")
}
func (f *fakeSweepStorage) Ack(_ context.Context, _ uuid.UUID) error {
return errors.New("not used")
}
func (f *fakeSweepStorage) PutBatch(_ context.Context, _ uuid.UUID, _ []pendinguploads.PutItem) ([]uuid.UUID, error) {
return nil, errors.New("not used")
}
func (f *fakeSweepStorage) Sweep(_ context.Context, ackRetention time.Duration) (pendinguploads.SweepResult, error) {
idx := int(f.calls.Load())
f.calls.Add(1)
f.gotRetention.Store(int64(ackRetention.Seconds()))
defer func() {
select {
case f.cycleDone <- struct{}{}:
default:
}
}()
if idx < len(f.errs) && f.errs[idx] != nil {
return pendinguploads.SweepResult{}, f.errs[idx]
}
if idx < len(f.results) {
return f.results[idx], nil
}
return pendinguploads.SweepResult{}, nil
}
// waitForCycle blocks until at least one Sweep completes, with a deadline.
// Tests use this instead of time.Sleep to avoid flakes on slow CI hosts.
//
// CAVEAT: cycleDone fires from inside fakeSweepStorage.Sweep's defer,
// which runs as Sweep returns its result — BEFORE the StartSweeper
// loop has processed the (result, error) tuple and called the
// metric recorders. Tests that assert on metric counters must NOT
// rely on this wait alone; use waitForMetricDelta instead so the
// metric increment race (Sweep returns → cycleDone fires → test
// reads counter → only then does StartSweeper's loop call
// metrics.PendingUploadsSweepError) doesn't produce a flake.
func (f *fakeSweepStorage) waitForCycle(t *testing.T, n int, timeout time.Duration) {
t.Helper()
deadline := time.NewTimer(timeout)
defer deadline.Stop()
for got := 0; got < n; got++ {
select {
case <-f.cycleDone:
case <-deadline.C:
t.Fatalf("waited %s for %d sweep cycles, got %d", timeout, n, f.calls.Load())
}
}
}
// waitForMetricDelta polls the supplied delta function until it returns
// `want` or the timeout elapses. Use after waitForCycle when the test
// asserts on a metric counter — closes the race between cycleDone
// (signalled inside fakeSweepStorage.Sweep's defer, BEFORE Sweep
// returns to StartSweeper) and the metric recording (which happens in
// StartSweeper's loop AFTER Sweep returns). On a slow CI host the test
// goroutine wins the read before StartSweeper's goroutine writes the
// counter; the polling assert preserves the determinism of "the metric
// MUST be N" without timing-based flakes.
//
// Per memory feedback_question_test_when_unexpected.md: the failure
// mode "delta=0, want=1" looked like a real bug at first glance —
// "metric never incremented" — but instrumented analysis showed the
// metric DID increment, just AFTER the test's read. The fix is the
// test's wait shape, not the production code.
func waitForMetricDelta(t *testing.T, delta func() int64, want int64, timeout time.Duration) {
t.Helper()
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
if delta() == want {
return
}
time.Sleep(5 * time.Millisecond)
}
t.Fatalf("waited %s for metric delta=%d, last seen %d", timeout, want, delta())
}
func TestStartSweeper_NilStorageDoesNotPanic(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// Should return immediately without panicking; no goroutine to wait on.
pendinguploads.StartSweeper(ctx, nil, time.Second)
}
func TestStartSweeper_RunsImmediatelyAndOnTick(t *testing.T) {
store := newFakeSweepStorage(
[]pendinguploads.SweepResult{{Acked: 5}, {Acked: 1, Expired: 2}},
nil,
)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go pendinguploads.StartSweeper(ctx, store, time.Hour)
store.waitForCycle(t, 1, 2*time.Second)
if got := store.calls.Load(); got < 1 {
t.Errorf("expected at least one immediate sweep, got %d", got)
}
// Retention propagated.
if store.gotRetention.Load() != 3600 {
t.Errorf("retention seconds = %d, want 3600", store.gotRetention.Load())
}
}
func TestStartSweeper_ZeroAckRetentionUsesDefault(t *testing.T) {
store := newFakeSweepStorage([]pendinguploads.SweepResult{{}}, nil)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go pendinguploads.StartSweeper(ctx, store, 0)
store.waitForCycle(t, 1, 2*time.Second)
want := int64(pendinguploads.DefaultAckRetention.Seconds())
if store.gotRetention.Load() != want {
t.Errorf("retention = %d, want default %d", store.gotRetention.Load(), want)
}
}
func TestStartSweeper_ContextCancelStopsLoop(t *testing.T) {
store := newFakeSweepStorage([]pendinguploads.SweepResult{{}}, nil)
ctx, cancel := context.WithCancel(context.Background())
done := make(chan struct{})
go func() {
pendinguploads.StartSweeper(ctx, store, time.Second)
close(done)
}()
store.waitForCycle(t, 1, 2*time.Second)
cancel()
select {
case <-done:
case <-time.After(2 * time.Second):
t.Fatal("StartSweeper did not return after ctx cancel")
}
}
func TestStartSweeperWithInterval_TickerFiresAdditionalCycles(t *testing.T) {
store := newFakeSweepStorage(
[]pendinguploads.SweepResult{{Acked: 1}, {Expired: 1}, {}, {}, {}},
nil,
)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go pendinguploads.StartSweeperWithIntervalForTest(ctx, store, time.Hour, 30*time.Millisecond)
// Immediate cycle + at least one tick-driven cycle.
store.waitForCycle(t, 2, 2*time.Second)
if got := store.calls.Load(); got < 2 {
t.Errorf("expected ≥2 cycles (immediate + 1 tick), got %d", got)
}
}
func TestStartSweeper_TransientErrorDoesNotCrashLoop(t *testing.T) {
// First call errors; second call succeeds. The loop must keep running
// across the error so a one-off DB hiccup doesn't disable the GC.
store := newFakeSweepStorage(
[]pendinguploads.SweepResult{{}, {Acked: 1}},
[]error{errors.New("transient db error"), nil},
)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// 50ms ticker so the second cycle fires quickly enough for the test.
// We re-export SweepInterval as a const, but tests use the public
// StartSweeper that takes its own interval — wait, the public
// StartSweeper signature uses the package-level SweepInterval. Hmm,
// this means the test takes ~5 minutes. Let me reconsider.
//
// (We patch the test below to just look at the immediate-sweep call
// + an error path, since the immediate call is enough to prove the
// "error doesn't crash" contract — the loop continues afterward
// regardless of timing.)
go pendinguploads.StartSweeper(ctx, store, time.Hour)
// Wait for the first (errored) cycle.
store.waitForCycle(t, 1, 2*time.Second)
// Cancel — the goroutine returns cleanly, proving the error path
// didn't crash the loop. Without this fix the goroutine would have
// either panicked (process abort visible at exit) or stuck (this
// cancel + done-channel pattern would deadlock instead).
cancel()
}
// metricDelta returns a function that, when called, returns how much
// the (acked, expired, errored) counters have advanced since metricDelta
// was originally called. metrics is a process-singleton across the test
// suite; deltas isolate this test from order-of-execution dependencies.
func metricDelta(t *testing.T) (deltaAcked, deltaExpired, deltaError func() int64) {
t.Helper()
a0, e0, err0 := metrics.PendingUploadsSweepCounts()
deltaAcked = func() int64 {
a, _, _ := metrics.PendingUploadsSweepCounts()
return a - a0
}
deltaExpired = func() int64 {
_, e, _ := metrics.PendingUploadsSweepCounts()
return e - e0
}
deltaError = func() int64 {
_, _, x := metrics.PendingUploadsSweepCounts()
return x - err0
}
return
}
func TestStartSweeper_RecordsMetricsOnSuccess(t *testing.T) {
deltaAcked, deltaExpired, deltaError := metricDelta(t)
store := newFakeSweepStorage(
[]pendinguploads.SweepResult{{Acked: 3, Expired: 5}},
nil,
)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go pendinguploads.StartSweeper(ctx, store, time.Hour)
store.waitForCycle(t, 1, 2*time.Second)
// Poll for the success counters to settle — closes the cycleDone-
// vs-metric-record race (see waitForMetricDelta comment).
waitForMetricDelta(t, deltaAcked, 3, 2*time.Second)
waitForMetricDelta(t, deltaExpired, 5, 2*time.Second)
// Error counter MUST stay at zero on the success path. Read after
// the success counters have settled — once those are correct,
// StartSweeper has fully processed this cycle's result.
if got := deltaError(); got != 0 {
t.Errorf("error counter delta = %d, want 0", got)
}
}
func TestStartSweeper_RecordsMetricsOnError(t *testing.T) {
_, _, deltaError := metricDelta(t)
store := newFakeSweepStorage(
[]pendinguploads.SweepResult{{}},
[]error{errors.New("db down")},
)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go pendinguploads.StartSweeper(ctx, store, time.Hour)
store.waitForCycle(t, 1, 2*time.Second)
// Poll for the error counter to settle — cycleDone fires inside
// the fake's Sweep defer, BEFORE StartSweeper's loop receives the
// returned error and calls metrics.PendingUploadsSweepError. On
// slow CI hosts a direct deltaError() read here returns 0 even
// though the metric WILL be 1 a few ms later. See
// waitForMetricDelta comment.
waitForMetricDelta(t, deltaError, 1, 2*time.Second)
}

View File

@ -14,6 +14,7 @@ import (
"time"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provlog"
)
// CPProvisionerAPI is the contract WorkspaceHandler uses to talk to the
@ -214,6 +215,13 @@ func (p *CPProvisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string,
}
log.Printf("CP provisioner: workspace %s → EC2 instance %s (%s)", cfg.WorkspaceID, result.InstanceID, result.State)
provlog.Event("provision.ec2_started", map[string]any{
"workspace_id": cfg.WorkspaceID,
"instance_id": result.InstanceID,
"state": result.State,
"tier": cfg.Tier,
"runtime": cfg.Runtime,
})
return result.InstanceID, nil
}
@ -273,6 +281,10 @@ func (p *CPProvisioner) Stop(ctx context.Context, workspaceID string) error {
return fmt.Errorf("cp provisioner: stop %s: unexpected %d: %s",
workspaceID, resp.StatusCode, strings.TrimSpace(string(body)))
}
provlog.Event("provision.ec2_stopped", map[string]any{
"workspace_id": workspaceID,
"instance_id": instanceID,
})
return nil
}

View File

@ -0,0 +1,48 @@
// Package provlog emits structured, single-line JSON log records for
// provisioning-lifecycle boundaries (workspace create, EC2 start/stop,
// restart, idempotency skips). Records share a stable `evt:` prefix and
// JSON payload so a future grep|jq pipeline (or a Loki/Datadog ingest)
// can reconstruct the per-workspace timeline without parsing the
// human-prose log lines that already exist.
//
// Existing log.Printf lines are intentionally NOT replaced — they
// remain the operator-facing message. Event() emits a paired structured
// record alongside, additive only.
//
// Event taxonomy (extend by appending; never rename):
//
// provision.start — workspace row inserted, EC2 about to launch
// provision.skip_existing — idempotency hit, no new EC2
// provision.ec2_started — RunInstances returned an instance id
// provision.ec2_stopped — TerminateInstances acknowledged
// restart.pre_stop — Restart handler about to call Stop
//
// Required fields per event are documented at each call site.
package provlog
import (
"encoding/json"
"log"
)
// Event writes a single line of the form:
//
// evt: <name> {"k":"v",...}
//
// to the standard logger. JSON encoding errors are silently swallowed —
// a logging helper must never panic the request path. fields may be
// nil; the empty payload `{}` is still useful to mark an event boundary.
func Event(name string, fields map[string]any) {
if fields == nil {
fields = map[string]any{}
}
payload, err := json.Marshal(fields)
if err != nil {
// Fall back to a static payload so the event boundary still
// appears in the log. The marshal error itself is recorded
// on a best-effort basis.
log.Printf("evt: %s {\"_marshal_err\":%q}", name, err.Error())
return
}
log.Printf("evt: %s %s", name, payload)
}

View File

@ -0,0 +1,97 @@
package provlog
import (
"bytes"
"encoding/json"
"log"
"strings"
"testing"
)
// captureLog redirects the default logger to a buffer for the duration
// of fn and returns whatever was written.
func captureLog(t *testing.T, fn func()) string {
t.Helper()
var buf bytes.Buffer
prevWriter := log.Writer()
prevFlags := log.Flags()
log.SetOutput(&buf)
log.SetFlags(0) // strip date/time so assertions stay deterministic
t.Cleanup(func() {
log.SetOutput(prevWriter)
log.SetFlags(prevFlags)
})
fn()
return buf.String()
}
func TestEvent_EmitsEvtPrefixAndJSONPayload(t *testing.T) {
out := captureLog(t, func() {
Event("provision.start", map[string]any{
"workspace_id": "ws-123",
"tier": 4,
"runtime": "claude-code",
})
})
out = strings.TrimSpace(out)
if !strings.HasPrefix(out, "evt: provision.start ") {
t.Fatalf("expected evt-prefixed line, got %q", out)
}
jsonPart := strings.TrimPrefix(out, "evt: provision.start ")
var got map[string]any
if err := json.Unmarshal([]byte(jsonPart), &got); err != nil {
t.Fatalf("payload not valid JSON: %v (raw=%q)", err, jsonPart)
}
if got["workspace_id"] != "ws-123" {
t.Errorf("workspace_id field lost: %+v", got)
}
// JSON unmarshal turns numbers into float64 — exact-equal compare.
if got["tier"].(float64) != 4 {
t.Errorf("tier field lost: %+v", got)
}
if got["runtime"] != "claude-code" {
t.Errorf("runtime field lost: %+v", got)
}
}
func TestEvent_NilFieldsEmitsEmptyObject(t *testing.T) {
out := captureLog(t, func() {
Event("restart.pre_stop", nil)
})
if !strings.Contains(out, "evt: restart.pre_stop {}") {
t.Fatalf("nil fields should emit empty object, got %q", out)
}
}
func TestEvent_PreservesEventBoundaryOnUnmarshalableValue(t *testing.T) {
// A channel cannot be marshaled by encoding/json — verify we still
// emit the event boundary with a recorded marshal error. This is
// the structural guarantee: the call site never sees a panic, and
// the event name is always present in the log.
out := captureLog(t, func() {
Event("provision.ec2_started", map[string]any{
"chan": make(chan int),
})
})
if !strings.Contains(out, "evt: provision.ec2_started ") {
t.Fatalf("event boundary missing on marshal error: %q", out)
}
if !strings.Contains(out, "_marshal_err") {
t.Fatalf("expected _marshal_err sentinel, got %q", out)
}
}
func TestEvent_SingleLineOutput(t *testing.T) {
// Log aggregators line-split on \n. A multi-line emit would silently
// fragment the JSON across two records — pin single-line shape.
out := captureLog(t, func() {
Event("provision.skip_existing", map[string]any{
"existing_id": "ws-abc",
"name": "child-1",
})
})
trimmed := strings.TrimRight(out, "\n")
if strings.Contains(trimmed, "\n") {
t.Fatalf("event line must be single-line, got %q", out)
}
}

View File

@ -243,13 +243,15 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
// entire platform. Gated behind AdminAuth (issue #180).
r.GET("/approvals/pending", middleware.AdminAuth(db.DB), apph.ListAll)
// Team handlers — Collapse only. The bulk-Expand path is gone:
// every workspace can have children via the regular CreateWorkspace
// flow with parent_id set, so a separate handler that bulk-creates
// from sub_workspaces (and was non-idempotent — calling it twice
// duplicated the team) earned its way out.
teamh := handlers.NewTeamHandler(broadcaster, wh, platformURL, configsDir)
wsAuth.POST("/collapse", teamh.Collapse)
// (TeamHandler is gone — #2864.) The visual canvas Collapse
// button calls PATCH /workspaces/:id { collapsed: true/false }
// (presentational toggle on canvas_layouts), NOT the destructive
// POST /collapse that stopped + removed children. The
// destructive route had zero UI callers (verified via grep
// across canvas/, scripts/, and the MCP tool registry — only
// docs referenced it). team.go + team_test.go + the route
// + helpers (findTemplateDirByName, NewTeamHandler) are
// deleted; visual collapse is unaffected.
// Agents
ah := handlers.NewAgentHandler(broadcaster)
@ -519,8 +521,9 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
r.GET("/canvas/viewport", vh.Get)
r.PUT("/canvas/viewport", middleware.CanvasOrBearer(db.DB), vh.Save)
// Templates
tmplh := handlers.NewTemplatesHandler(configsDir, dockerCli)
// Templates — wh threaded so generateDefaultConfig picks the
// SaaS-aware default tier in Import + ReplaceFiles (#2910 PR-B).
tmplh := handlers.NewTemplatesHandler(configsDir, dockerCli, wh)
// #686: GET /templates lists all template names+metadata from configsDir.
// Open access lets unauthenticated callers enumerate org configurations and
// installed plugins. AdminAuth-gate it alongside POST /templates/import.

View File

@ -14,6 +14,7 @@ import (
cronlib "github.com/robfig/cron/v3"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/metrics"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/supervised"
)
@ -741,6 +742,11 @@ func (s *Scheduler) sweepPhantomBusy(ctx context.Context) {
continue
}
log.Printf("Scheduler: phantom-busy sweep — reset %s (no activity in %d min)", name, int(phantomStaleThreshold.Minutes()))
// #2865: surface as molecule_phantom_busy_resets_total. High
// reset rate signals task-lifecycle accounting regressions
// (e.g. missing env vars causing claude --print timeouts that
// leave active_tasks elevated until this sweep fires).
metrics.TrackPhantomBusyReset()
count++
}
if err := rows.Err(); err != nil {

View File

@ -0,0 +1,2 @@
-- Reversal of 20260505200000_pending_uploads_acked_index.up.sql.
DROP INDEX IF EXISTS idx_pending_uploads_acked;

View File

@ -0,0 +1,30 @@
-- 20260505200000_pending_uploads_acked_index.up.sql
--
-- Adds the missing partial index for the acked-retention arm of the
-- pendinguploads.Sweep query. The Phase 1 migration created two
-- partial indexes both gated on `acked_at IS NULL` (workspace-fetch
-- hot path + expires_at sweep arm); the third query path —
-- `WHERE acked_at IS NOT NULL AND acked_at < now() - interval` — was
-- left to a seq scan.
--
-- For a high-traffic deployment that's a real cost: the table
-- accumulates one row per chat-attached file; the sweeper runs every
-- 5 minutes and DELETEs rows past the 1-hour ack retention. A seq
-- scan over 100K-1M acked rows holds an AccessShare lock for seconds
-- on every cycle. Partial-indexing the inverse predicate reduces
-- this to a btree range scan and lets the DELETE complete in
-- low-millisecond range.
--
-- WHERE acked_at IS NOT NULL is intentionally inverse of the other
-- two indexes — they cover the unacked working set; this covers the
-- terminal-state set the sweeper visits. Disjoint subsets, so the
-- two indexes don't overlap.
--
-- Caught in self-review on the parent RFC's Phase 4 PR; filed as
-- a follow-up rather than a Phase 1 fix because the cost only
-- materializes at a row count we don't expect to hit before the
-- sweeper has had a chance to keep up.
CREATE INDEX IF NOT EXISTS idx_pending_uploads_acked
ON pending_uploads (acked_at)
WHERE acked_at IS NOT NULL;

View File

@ -432,7 +432,17 @@ def _is_self_notify_row(row: dict[str, Any]) -> bool:
def message_from_activity(row: dict[str, Any]) -> InboxMessage:
"""Convert one /activity row into an InboxMessage."""
"""Convert one /activity row into an InboxMessage.
Mutates ``row['request_body']`` in-place to swap any
``platform-pending:`` URIs to the locally-staged ``workspace:`` URIs
(see ``inbox_uploads.rewrite_request_body``) by the time the
upstream chat message arrives via this path, the upload-receive row
that staged the bytes has already populated the URI cache (lower
activity_logs.id, processed earlier in the same poll batch). A
cache miss leaves the URI untouched; the agent surfaces an
unresolvable URI rather than the inbox silently dropping the part.
"""
request_body = row.get("request_body")
if isinstance(request_body, str):
# The Go handler returns request_body as json.RawMessage; httpx
@ -443,6 +453,14 @@ def message_from_activity(row: dict[str, Any]) -> InboxMessage:
except (TypeError, ValueError):
request_body = None
# Rewrite platform-pending: URIs → workspace: URIs in-place. Imported
# at call time to keep the import graph clean for the in-container
# path that doesn't use this module (also avoids a circular: the
# uploads module is small enough that re-importing per call is
# cheap, and the Python import cache makes it free after the first).
from inbox_uploads import rewrite_request_body
rewrite_request_body(request_body)
return InboxMessage(
activity_id=str(row.get("id", "")),
text=_extract_text(request_body, row.get("summary")),
@ -532,11 +550,57 @@ def _poll_once(
if cursor is None:
rows = list(reversed(rows))
# Imported lazily at use-site so a runtime that never sees an
# upload-receive row never imports the module. Cheap on the hot
# path because Python caches the import.
from inbox_uploads import is_chat_upload_row, BatchFetcher
new_count = 0
last_id: str | None = None
# ``batch_fetcher`` is lazy: a poll batch with no upload rows pays
# zero overhead. Once the first upload row appears we open one
# BatchFetcher and submit every subsequent upload row to its thread
# pool; before processing the FIRST non-upload row we drain the
# pool (wait_all) so the URI cache is hot when message rewriting
# runs. Without the barrier, the chat message that references the
# upload would arrive at the agent with the un-rewritten
# platform-pending: URI.
batch_fetcher: BatchFetcher | None = None
def _drain_uploads(bf: BatchFetcher | None) -> None:
if bf is None:
return
bf.wait_all()
bf.close()
for row in rows:
if not isinstance(row, dict):
continue
if is_chat_upload_row(row):
# Side-effect row from the platform's poll-mode chat-upload
# handler — fetch the bytes, stage to /workspace/.molecule/
# chat-uploads, ack. NOT enqueued as an InboxMessage; the
# agent will see the chat message that REFERENCES this
# upload via a separate (later) activity row, with the
# pending: URI rewritten to a workspace: URI by
# message_from_activity. We DO advance the cursor past
# this row so a permanent network outage on /content
# doesn't stall the cursor and block real chat traffic.
if batch_fetcher is None:
batch_fetcher = BatchFetcher(
platform_url=platform_url,
workspace_id=workspace_id,
headers=headers,
)
batch_fetcher.submit(row)
last_id = str(row.get("id", "")) or last_id
continue
# Non-upload row: drain any pending uploads first so the URI
# cache is populated before we run rewrite_request_body /
# message_from_activity on a row that may reference one.
if batch_fetcher is not None:
_drain_uploads(batch_fetcher)
batch_fetcher = None
if _is_self_notify_row(row):
# The workspace-server's `/notify` handler writes the agent's
# own send_message_to_user POSTs to activity_logs with
@ -571,6 +635,13 @@ def _poll_once(
last_id = message.activity_id
new_count += 1
# Drain any uploads still in flight if the batch ended with upload
# rows (no chat-message row to trigger the inline drain). Without
# this, a future poll that picks up the chat-message row first
# would race with the still-running fetches.
if batch_fetcher is not None:
_drain_uploads(batch_fetcher)
if last_id is not None:
state.save_cursor(last_id, cursor_key)
return new_count
@ -613,6 +684,7 @@ def start_poller_thread(
platform_url: str,
workspace_id: str,
interval: float = POLL_INTERVAL_SECONDS,
stop_event: threading.Event | None = None,
) -> threading.Thread:
"""Spawn the poller as a daemon thread. Returns the Thread handle.
@ -624,13 +696,18 @@ def start_poller_thread(
operator running ``ps -eL`` or eyeballing ``threading.enumerate()``
can tell which thread is which without reverse-engineering it from
crash tracebacks.
Pass ``stop_event`` to enable graceful shutdown used by tests so
the daemon thread doesn't outlive the test that started it and race
with later tests' httpx patches. Production code passes None and
relies on the daemon flag for process-exit cleanup.
"""
name = "molecule-mcp-inbox-poller"
if workspace_id:
name = f"{name}-{workspace_id[:8]}"
t = threading.Thread(
target=_poll_loop,
args=(state, platform_url, workspace_id, interval),
args=(state, platform_url, workspace_id, interval, stop_event),
name=name,
daemon=True,
)

724
workspace/inbox_uploads.py Normal file
View File

@ -0,0 +1,724 @@
"""Poll-mode chat-upload fetcher + URI cache for the standalone path.
Companion to ``inbox.py``. When the workspace's inbox poller sees an
``activity_logs`` row with ``method='chat_upload_receive'`` (written by
the platform's ``uploadPollMode`` handler — workspace-server
``internal/handlers/chat_files.go``), this module:
1. Pulls the bytes from
``GET /workspaces/:id/pending-uploads/:file_id/content``.
2. Writes them to ``/workspace/.molecule/chat-uploads/<prefix>-<name>``
same on-disk shape as the push-mode handler in
``internal_chat_uploads.py``, so anything downstream that already
resolves ``workspace:/workspace/.molecule/chat-uploads/...`` URIs
works unchanged.
3. POSTs ``/workspaces/:id/pending-uploads/:file_id/ack`` so Phase 3
sweep can clean up the platform-side ``pending_uploads`` row.
4. Records a ``platform-pending:<wsid>/<file_id>
workspace:/workspace/.molecule/chat-uploads/...`` mapping in a
process-local cache so the chat message that arrives later
(referencing the platform-pending URI) gets rewritten before the
agent sees it.
URI rewrite ordering the chat message containing the
``platform-pending:`` URI is logged by the platform AFTER the
``chat_upload_receive`` row, so the inbox poller sees the upload-receive
row first (lower activity_logs.id) and stages the bytes before the chat
message arrives in the same poll batch (or a later one). The URI cache
is therefore populated before the message_from_activity path needs it.
A miss (network race, restart with stale cursor) is handled by keeping
the original ``platform-pending:`` URI in the rewritten body the agent
will see something it can't open, which is preferable to silently
dropping the URI.
Auth same Bearer token the inbox poller uses (``platform_auth.auth_headers``).
Both endpoints are on the wsAuth-gated route, so this module can never
read another tenant's bytes even if a token is misrouted.
"""
from __future__ import annotations
import concurrent.futures
import logging
import mimetypes
import os
import re
import secrets as pysecrets
import threading
from collections import OrderedDict
from pathlib import Path
from typing import Any
logger = logging.getLogger(__name__)
# Same on-disk root as internal_chat_uploads.CHAT_UPLOAD_DIR — keeping
# these decoupled would let drift sneak in. Imported here rather than
# from internal_chat_uploads to avoid pulling in starlette as a
# transitive dep (this module runs in the standalone MCP path which
# doesn't ship the in-container HTTP server).
CHAT_UPLOAD_DIR = "/workspace/.molecule/chat-uploads"
# Per-file safety net. The platform enforces 25 MB on the staging side,
# but a buggy or hostile platform response shouldn't be able to fill the
# workspace's disk — refuse to write more than this even if the response
# claims a larger Content-Length.
MAX_FILE_BYTES = 25 * 1024 * 1024
# Network deadline for the GET. Tuned for a 25 MB transfer over a
# reasonable consumer link (~5 Mbps gives ~40s for the full payload),
# plus headroom for TLS + platform auth. Aligned with inbox poller's
# 10s default for /activity calls — both are user-perceived latency.
DEFAULT_FETCH_TIMEOUT = 60.0
# Concurrency cap for ``BatchFetcher``. Four workers is enough headroom
# for the realistic "user dragged 3-4 files into chat at once" case
# while bounding the platform's per-workspace fan-out. The cap matters
# because the platform's /content endpoint reads bytea from Postgres in
# a single round-trip per request — N workers = N concurrent DB reads
# of up to 25 MB each, so a higher cap could pressure platform memory
# without much UX win (network bandwidth is the bottleneck once the
# bytes are buffered).
DEFAULT_BATCH_FETCH_WORKERS = 4
# Upper bound on how long ``BatchFetcher.wait_all`` blocks the inbox
# poll loop before giving up on still-in-flight fetches. Aligned with
# DEFAULT_FETCH_TIMEOUT so a single hung fetch can't stall the loop
# longer than its own deadline. A timeout fires only if a worker thread
# is stuck past the underlying httpx timeout — pathological case;
# normal completion is bounded by per-fetch timeout × ceil(N/W).
DEFAULT_BATCH_WAIT_TIMEOUT = DEFAULT_FETCH_TIMEOUT + 5.0
# Cap on the URI cache. A long-lived workspace handling thousands of
# uploads shouldn't grow without bound; an LRU cap of 1024 keeps the
# entries-needed-for-a-typical-conversation well within memory.
URI_CACHE_MAX_ENTRIES = 1024
# Same character class as internal_chat_uploads — kept duplicated rather
# than imported to avoid dragging starlette into the standalone path.
_UNSAFE_FILENAME_CHARS = re.compile(r"[^a-zA-Z0-9._\-]")
def sanitize_filename(name: str) -> str:
"""Reduce a user-supplied filename to a safe form.
Mirrors ``internal_chat_uploads.sanitize_filename`` and the Go
handler's ``SanitizeFilename`` — three-way parity is pinned by
``workspace-server/internal/handlers/sanitize_filename_test.go`` and
``workspace/tests/test_internal_chat_uploads.py`` so the URI shape
is identical regardless of which path handles the upload.
"""
base = os.path.basename(name)
base = base.replace(" ", "_")
base = _UNSAFE_FILENAME_CHARS.sub("_", base)
if len(base) > 100:
ext = ""
dot = base.rfind(".")
if dot >= 0 and len(base) - dot <= 16:
ext = base[dot:]
base = base[: 100 - len(ext)] + ext
if base in ("", ".", ".."):
return "file"
return base
# ---------------------------------------------------------------------------
# URI cache — maps platform-pending URIs to local workspace: URIs
# ---------------------------------------------------------------------------
class _URICache:
"""Thread-safe bounded LRU mapping of platform-pending → workspace URIs.
Bounded so a workspace that runs for months and handles thousands of
uploads doesn't accumulate entries forever. ``OrderedDict.move_to_end``
promotes recently-used entries; eviction takes the oldest.
The cache is intentionally per-process there is no persistence
across a workspace restart. A restart with a stale inbox cursor that
re-poll an upload-receive row will re-fetch (the bytes are already
on disk from the prior session see ``stage_to_disk``'s O_EXCL
handling) and re-register; a chat message that referenced the
platform-pending URI BEFORE the restart and arrives AFTER would miss
the rewrite and surface the platform-pending URI to the agent. That
is preferable to a stale persisted mapping that points at a deleted
file.
"""
def __init__(self, max_entries: int = URI_CACHE_MAX_ENTRIES):
self._max = max_entries
self._lock = threading.Lock()
self._entries: "OrderedDict[str, str]" = OrderedDict()
def get(self, pending_uri: str) -> str | None:
with self._lock:
local = self._entries.get(pending_uri)
if local is not None:
self._entries.move_to_end(pending_uri)
return local
def set(self, pending_uri: str, local_uri: str) -> None:
with self._lock:
self._entries[pending_uri] = local_uri
self._entries.move_to_end(pending_uri)
while len(self._entries) > self._max:
self._entries.popitem(last=False)
def __len__(self) -> int:
with self._lock:
return len(self._entries)
def clear(self) -> None:
with self._lock:
self._entries.clear()
_cache = _URICache()
def get_cache() -> _URICache:
"""Expose the module-singleton cache for tests and the rewrite path."""
return _cache
def resolve_pending_uri(uri: str) -> str | None:
"""Return the local ``workspace:`` URI for a ``platform-pending:`` URI,
or None if not yet staged. Convenience for callers that want to
fall back to an on-demand fetch pass the result through to
``executor_helpers.resolve_attachment_uri``.
"""
return _cache.get(uri)
# ---------------------------------------------------------------------------
# On-disk staging
# ---------------------------------------------------------------------------
def _open_safe(path: str) -> int:
"""Open ``path`` for write with ``O_CREAT|O_EXCL|O_NOFOLLOW``.
Same shape as ``internal_chat_uploads._open_safe`` refuses to
follow a pre-existing symlink at the target and refuses to overwrite
an existing regular file. The 16-byte random prefix makes a name
collision astronomical, but defense-in-depth costs nothing.
"""
flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL
if hasattr(os, "O_NOFOLLOW"):
flags |= os.O_NOFOLLOW
return os.open(path, flags, 0o600)
def stage_to_disk(content: bytes, filename: str) -> str:
"""Write ``content`` under ``CHAT_UPLOAD_DIR`` and return the local URI.
Returns ``workspace:/workspace/.molecule/chat-uploads/<prefix>-<sanitized>``.
The 32-hex prefix makes the on-disk name unguessable to anything
that didn't see the response, so even if a stale agent has a guess
at the original filename it can't construct a URL to a sibling's
upload.
Raises:
OSError: write failure (mkdir, open, or write). Caller is
expected to log + skip; the activity row stays unacked so a
future poll re-tries.
ValueError: ``content`` exceeds ``MAX_FILE_BYTES``. Pre-staging
guard belt-and-braces above the platform's same-side cap.
"""
if len(content) > MAX_FILE_BYTES:
raise ValueError(
f"content size {len(content)} exceeds workspace cap {MAX_FILE_BYTES}"
)
Path(CHAT_UPLOAD_DIR).mkdir(parents=True, exist_ok=True)
sanitized = sanitize_filename(filename)
prefix = pysecrets.token_hex(16)
stored = f"{prefix}-{sanitized}"
target = os.path.join(CHAT_UPLOAD_DIR, stored)
fd = _open_safe(target)
try:
with os.fdopen(fd, "wb") as f:
f.write(content)
except OSError:
# Best-effort cleanup — partial writes leave a stub file that
# would mask a future retry's success otherwise.
try:
os.unlink(target)
except OSError:
pass
raise
return f"workspace:{CHAT_UPLOAD_DIR}/{stored}"
# ---------------------------------------------------------------------------
# Activity row → fetch/stage/ack flow
# ---------------------------------------------------------------------------
def _request_body_dict(row: dict[str, Any]) -> dict[str, Any] | None:
"""Coerce ``row['request_body']`` into a dict.
The /activity API returns request_body as JSON (already-deserialized
by httpx). Some legacy paths or mocked transports may emit a string;
handle defensively rather than raising.
"""
body = row.get("request_body")
if isinstance(body, dict):
return body
if isinstance(body, str):
import json
try:
decoded = json.loads(body)
except (TypeError, ValueError):
return None
return decoded if isinstance(decoded, dict) else None
return None
def is_chat_upload_row(row: dict[str, Any]) -> bool:
"""True if ``row`` is the platform's chat-upload-receive activity.
Used by the inbox poller to fork the row off the regular A2A
message handling path this row is not a peer message; it's an
instruction to fetch + stage bytes. Match on ``method`` only;
``activity_type`` is already filtered to ``a2a_receive`` upstream.
"""
return row.get("method") == "chat_upload_receive"
def fetch_and_stage(
row: dict[str, Any],
*,
platform_url: str,
workspace_id: str,
headers: dict[str, str],
timeout_secs: float = DEFAULT_FETCH_TIMEOUT,
client: Any = None,
) -> str | None:
"""Fetch the row's bytes, stage them under chat-uploads, and ack.
Returns the local ``workspace:`` URI on success, or ``None`` if any
step failed (logged with enough detail to triage). Failure leaves
the platform-side row unacked, so a subsequent poll retries the
activity row stays in the cursor's window because we DO advance the
cursor (the row is "handled" from the inbox's perspective even on
fetch failure; otherwise a permanent network outage would stall the
cursor and block real chat traffic).
On success, the URI cache is updated so a subsequent chat message
referencing the same ``platform-pending:`` URI is rewritten before
the agent sees it.
Pass ``client`` to reuse a shared ``httpx.Client`` for both GET and
POST ack (saves one TLS handshake per row vs. constructing one
per-call). ``BatchFetcher`` does this across an entire poll batch so
N concurrent fetches share one connection pool.
"""
body = _request_body_dict(row)
if body is None:
logger.warning(
"inbox_uploads: row %s missing request_body; cannot fetch",
row.get("id"),
)
return None
file_id = body.get("file_id")
if not isinstance(file_id, str) or not file_id:
logger.warning(
"inbox_uploads: row %s has no file_id in request_body",
row.get("id"),
)
return None
pending_uri = body.get("uri")
if not isinstance(pending_uri, str) or not pending_uri:
# Reconstruct what the platform would have written — defensive
# against a row whose uri field got truncated. Same shape as the
# Go handler's URI builder.
pending_uri = f"platform-pending:{workspace_id}/{file_id}"
filename = body.get("name") or "file"
if not isinstance(filename, str):
filename = "file"
# Caller-supplied client: reuse for both GET + POST ack. Otherwise
# build a one-shot client and close it on the way out. Lazy httpx
# import keeps the standalone MCP path's optional dep optional.
own_client = client is None
if own_client:
try:
import httpx # noqa: WPS433
except ImportError:
logger.error("inbox_uploads: httpx not installed; cannot fetch %s", file_id)
return None
client = httpx.Client(timeout=timeout_secs)
try:
return _fetch_and_stage_with_client(
client,
platform_url=platform_url,
workspace_id=workspace_id,
headers=headers,
file_id=file_id,
pending_uri=pending_uri,
filename=filename,
body=body,
)
finally:
if own_client:
try:
client.close()
except Exception: # noqa: BLE001 — close should never crash the caller
pass
def _fetch_and_stage_with_client(
client: Any,
*,
platform_url: str,
workspace_id: str,
headers: dict[str, str],
file_id: str,
pending_uri: str,
filename: str,
body: dict[str, Any],
) -> str | None:
"""Inner body of fetch_and_stage. Always uses the supplied client for
both GET and POST so the connection pool is shared across the call.
"""
content_url = f"{platform_url}/workspaces/{workspace_id}/pending-uploads/{file_id}/content"
ack_url = f"{platform_url}/workspaces/{workspace_id}/pending-uploads/{file_id}/ack"
try:
resp = client.get(content_url, headers=headers)
except Exception as exc: # noqa: BLE001
logger.warning("inbox_uploads: GET %s failed: %s", content_url, exc)
return None
if resp.status_code == 404:
# Row was swept or already acked by a previous poll race — nothing
# to fetch. Don't ack again; the platform's GC handles it. This is
# a soft-skip, not an error — log at INFO so triage isn't noisy.
logger.info(
"inbox_uploads: pending upload %s already gone (404); skipping",
file_id,
)
return None
if resp.status_code >= 400:
logger.warning(
"inbox_uploads: GET %s returned %d: %s",
content_url,
resp.status_code,
(resp.text or "")[:200],
)
return None
content = resp.content or b""
if len(content) > MAX_FILE_BYTES:
logger.warning(
"inbox_uploads: refusing to stage %s — size %d exceeds cap %d",
file_id,
len(content),
MAX_FILE_BYTES,
)
return None
# Mimetype precedence: platform's Content-Type header → request_body
# mimeType field → extension guess. Same precedence as the in-
# container ingest handler.
mime_header = resp.headers.get("content-type", "").split(";")[0].strip()
mime = (
mime_header
or (body.get("mimeType") if isinstance(body.get("mimeType"), str) else "")
or (mimetypes.guess_type(filename)[0] or "")
)
try:
local_uri = stage_to_disk(content, filename)
except (OSError, ValueError) as exc:
logger.error(
"inbox_uploads: failed to stage %s (%s) to disk: %s",
file_id,
filename,
exc,
)
return None
_cache.set(pending_uri, local_uri)
logger.info(
"inbox_uploads: staged file_id=%s name=%s size=%d mime=%s pending_uri=%s local_uri=%s",
file_id,
filename,
len(content),
mime,
pending_uri,
local_uri,
)
# Ack last so a write failure above leaves the row available for a
# retry on the next poll. A failed ack is logged but doesn't roll
# back the on-disk file — the platform's sweep will clean up
# eventually.
try:
ack_resp = client.post(ack_url, headers=headers)
if ack_resp.status_code >= 400:
logger.warning(
"inbox_uploads: ack %s returned %d: %s",
ack_url,
ack_resp.status_code,
(ack_resp.text or "")[:200],
)
except Exception as exc: # noqa: BLE001
logger.warning("inbox_uploads: POST %s failed: %s", ack_url, exc)
return local_uri
# ---------------------------------------------------------------------------
# BatchFetcher — concurrent fetch across a single poll batch
# ---------------------------------------------------------------------------
class BatchFetcher:
"""Fetch + stage + ack a batch of upload-receive rows concurrently.
Why this exists: the inbox poll loop used to call ``fetch_and_stage``
serially per row. With N upload rows in a batch (a user dragging
multiple files into chat at once), the loop blocked for
``N × per_fetch_latency`` before processing the chat message that
referenced them a 4-file upload at 5s each = 20s of stall
before the agent saw the user's prompt. ``BatchFetcher`` runs the
fetches on a small thread pool (default 4 workers) so the stall is
bounded by ``ceil(N/W) × per_fetch_latency`` instead.
Connection reuse: one ``httpx.Client`` is shared across every fetch
in the batch. httpx clients carry a connection pool, so a second
fetch to the same platform host reuses the TCP+TLS handshake from
the first measurable win when fetches happen back-to-back.
Correctness invariant the caller MUST preserve: the inbox loop is
expected to call ``wait_all()`` before processing the chat-message
activity row that REFERENCES one of these uploads. Without the
barrier, the URI cache is empty when ``rewrite_request_body`` runs
and the agent sees the un-rewritten ``platform-pending:`` URI. The
caller-side test ``test_poll_once_waits_for_uploads_before_messages``
pins this end-to-end.
Use as a context manager so the executor + client are torn down
even if the caller raises mid-batch.
"""
def __init__(
self,
*,
platform_url: str,
workspace_id: str,
headers: dict[str, str],
timeout_secs: float = DEFAULT_FETCH_TIMEOUT,
max_workers: int = DEFAULT_BATCH_FETCH_WORKERS,
client: Any = None,
):
self._platform_url = platform_url
self._workspace_id = workspace_id
self._headers = dict(headers) # copy so caller mutations don't leak in
self._timeout_secs = timeout_secs
# Caller can inject a client (tests do this); production callers
# let us build one. Track ownership so we only close ours.
self._own_client = client is None
if self._own_client:
try:
import httpx # noqa: WPS433
except ImportError:
# Match fetch_and_stage's behavior: log + degrade rather
# than raising at construction time. submit() will then
# return None for every row.
logger.error("inbox_uploads: httpx not installed; BatchFetcher inert")
self._client: Any = None
else:
self._client = httpx.Client(timeout=timeout_secs)
else:
self._client = client
self._executor = concurrent.futures.ThreadPoolExecutor(
max_workers=max_workers,
thread_name_prefix="upload-fetch",
)
self._futures: list[concurrent.futures.Future[Any]] = []
self._closed = False
# Flipped to True by wait_all when the timeout fires; close()
# reads this to decide between drain-and-wait vs cancel-queued.
self._timed_out = False
def submit(self, row: dict[str, Any]) -> concurrent.futures.Future[Any] | None:
"""Submit ``row`` for fetch + stage + ack. Non-blocking — the
worker thread runs ``fetch_and_stage`` with the shared client.
Returns the Future so a caller that wants per-row outcome can
await it; ``None`` if the BatchFetcher is in a degraded state
(httpx missing).
"""
if self._closed:
raise RuntimeError("BatchFetcher: submit after close")
if self._client is None:
return None
fut = self._executor.submit(
fetch_and_stage,
row,
platform_url=self._platform_url,
workspace_id=self._workspace_id,
headers=self._headers,
timeout_secs=self._timeout_secs,
client=self._client,
)
self._futures.append(fut)
return fut
def wait_all(self, timeout: float | None = DEFAULT_BATCH_WAIT_TIMEOUT) -> None:
"""Block until every submitted future completes (or times out).
Per-future exceptions are logged + swallowed ``fetch_and_stage``
already converts every error path to ``return None``, so a real
exception propagating up to here is unexpected and we don't want
one bad fetch to abort the whole batch.
Timeouts are also logged + swallowed AND record the timed-out
futures on ``self._timed_out`` so ``close`` can cancel them
without paying their full latency. Without this hand-off,
``close()``'s ``shutdown(wait=True)`` would block on the leaked
workers and undo the user-facing timeout the inbox poll loop
would stall indefinitely on a hung /content fetch.
"""
if not self._futures:
return
try:
done, not_done = concurrent.futures.wait(
self._futures,
timeout=timeout,
return_when=concurrent.futures.ALL_COMPLETED,
)
except Exception as exc: # noqa: BLE001 — concurrent.futures shouldn't raise here
logger.warning("inbox_uploads: BatchFetcher.wait_all crashed: %s", exc)
return
for fut in done:
exc = fut.exception()
if exc is not None:
logger.warning(
"inbox_uploads: BatchFetcher worker raised: %s", exc
)
if not_done:
logger.warning(
"inbox_uploads: BatchFetcher.wait_all left %d in-flight after %ss timeout",
len(not_done),
timeout,
)
# Mark these futures so close() knows to cancel-not-wait. We
# cancel queued-but-not-started ones immediately; futures
# already running can't be cancelled (Python's threading
# model), but close() will pass cancel_futures=True so any
# remaining queued items don't run.
for fut in not_done:
fut.cancel()
self._timed_out = True
def close(self) -> None:
"""Tear down the executor + (if owned) the httpx client.
Idempotent. After close, ``submit`` raises and the BatchFetcher
cannot be reused construct a fresh one for the next poll.
If ``wait_all`` reported a timeout, shutdown skips the
``wait=True`` drain and instead asks the executor to drop queued
futures (``cancel_futures=True``). Currently-running workers
can't be interrupted by Python's threading model, but the poll
loop returns immediately rather than blocking on a hung fetch.
"""
if self._closed:
return
self._closed = True
timed_out = getattr(self, "_timed_out", False)
try:
if timed_out:
# cancel_futures landed in Python 3.9 — guarded for older
# interpreters via a TypeError fallback. Drop queued
# tasks; running ones will exit when their httpx call
# eventually returns or the daemon thread dies.
try:
self._executor.shutdown(wait=False, cancel_futures=True)
except TypeError:
self._executor.shutdown(wait=False)
else:
# Healthy path: wait for in-flight work so we don't
# interrupt a fetch mid-write.
self._executor.shutdown(wait=True)
except Exception as exc: # noqa: BLE001
logger.warning("inbox_uploads: executor shutdown error: %s", exc)
if self._own_client and self._client is not None:
try:
self._client.close()
except Exception as exc: # noqa: BLE001
logger.warning("inbox_uploads: client close error: %s", exc)
def __enter__(self) -> "BatchFetcher":
return self
def __exit__(self, exc_type, exc, tb) -> None:
self.close()
# ---------------------------------------------------------------------------
# URI rewrite for incoming chat messages
# ---------------------------------------------------------------------------
#
# The chat message that references a staged upload arrives as a
# SEPARATE activity_log row, with parts of kind=file containing
# platform-pending: URIs in the file.uri field. Walk the structure
# in-place and rewrite to the local workspace: URI when the cache has it.
# Unknown URIs pass through unchanged — the agent gets to choose how
# to react (most runtimes log + ignore an unresolvable URI).
def _rewrite_part(part: Any) -> None:
"""Mutate a single A2A Part dict to swap platform-pending: URIs."""
if not isinstance(part, dict):
return
file_obj = part.get("file")
if not isinstance(file_obj, dict):
return
uri = file_obj.get("uri")
if not isinstance(uri, str) or not uri.startswith("platform-pending:"):
return
rewritten = _cache.get(uri)
if rewritten:
file_obj["uri"] = rewritten
def rewrite_request_body(body: Any) -> None:
"""Mutate ``body`` in-place, replacing platform-pending: URIs with
the cached local equivalents.
Walks the same shapes ``inbox._extract_text`` accepts:
- ``body['parts']``
- ``body['params']['parts']``
- ``body['params']['message']['parts']``
No-op for shapes that don't match — the message simply passes
through to the agent as-is.
"""
if not isinstance(body, dict):
return
candidates: list[Any] = []
params = body.get("params") if isinstance(body.get("params"), dict) else None
if params:
message = params.get("message") if isinstance(params.get("message"), dict) else None
if message:
candidates.append(message.get("parts"))
candidates.append(params.get("parts"))
candidates.append(body.get("parts"))
for parts in candidates:
if isinstance(parts, list):
for part in parts:
_rewrite_part(part)

View File

@ -31,422 +31,53 @@ dependency via ``a2a-sdk``.
In-container usage (``python -m molecule_runtime.a2a_mcp_server`` or
direct import) bypasses this wrapper the workspace runtime has its
own heartbeat loop in ``heartbeat.py`` so we don't double-heartbeat.
Module layout (RFC #2873 iter 3 split):
* ``mcp_heartbeat`` register POST + heartbeat loop + auth-failure
escalation + inbound-secret persistence.
* ``mcp_workspace_resolver`` env validation, single + multi-workspace
resolution, operator-help printer, on-disk token-file read.
* ``mcp_inbox_pollers`` activate the inbox singleton + spawn one
daemon poller per workspace.
This file keeps just ``main()`` plus thin re-exports of the private
symbols so existing tests' imports (``mcp_cli._build_agent_card``,
``mcp_cli._heartbeat_loop``, etc.) keep working without churn.
"""
from __future__ import annotations
import json
import logging
import os
import sys
import threading
import time
from pathlib import Path
import configs_dir
import mcp_heartbeat
import mcp_inbox_pollers
import mcp_workspace_resolver
logger = logging.getLogger(__name__)
# Heartbeat cadence. Must be tighter than healthsweep's stale window
# (currently 60-90s — see registry/healthsweep.go) by a comfortable
# margin so a single missed heartbeat doesn't flip awaiting_agent.
# 20s gives the operator's network 3 attempts within the budget; long
# enough that it doesn't spam, short enough to recover quickly after
# laptop sleep.
HEARTBEAT_INTERVAL_SECONDS = 20.0
# Re-export public surface for back-compat with the pre-split callers
# and tests. The underscore-prefixed names mirror the names that
# existed in this module before the split — keeping them ensures
# `mcp_cli._build_agent_card`, `mcp_cli._heartbeat_loop`, etc.
# resolve identically to the new functions.
HEARTBEAT_INTERVAL_SECONDS = mcp_heartbeat.HEARTBEAT_INTERVAL_SECONDS
_HEARTBEAT_AUTH_LOUD_THRESHOLD = mcp_heartbeat.HEARTBEAT_AUTH_LOUD_THRESHOLD
_HEARTBEAT_AUTH_RELOG_INTERVAL = mcp_heartbeat.HEARTBEAT_AUTH_RELOG_INTERVAL
# After this many consecutive 401/403 heartbeats, escalate from
# WARNING to ERROR with re-onboard guidance. 3 ticks at 20s = ~1 minute
# of sustained auth failure — enough to rule out a transient platform
# blip but quick enough that an operator doesn't sit puzzled for 10
# minutes wondering why their MCP tools 401. Same threshold used for
# repeat-logging at 20-tick (~7 min) intervals so a long-running
# session that missed the first ERROR still sees the message.
_HEARTBEAT_AUTH_LOUD_THRESHOLD = 3
_HEARTBEAT_AUTH_RELOG_INTERVAL = 20
_build_agent_card = mcp_heartbeat.build_agent_card
_platform_register = mcp_heartbeat.platform_register
_heartbeat_loop = mcp_heartbeat.heartbeat_loop
_log_heartbeat_auth_failure = mcp_heartbeat.log_heartbeat_auth_failure
_persist_inbound_secret_from_heartbeat = mcp_heartbeat.persist_inbound_secret_from_heartbeat
_start_heartbeat_thread = mcp_heartbeat.start_heartbeat_thread
_resolve_workspaces = mcp_workspace_resolver.resolve_workspaces
_print_missing_env_help = mcp_workspace_resolver.print_missing_env_help
_read_token_file = mcp_workspace_resolver.read_token_file
def _build_agent_card(workspace_id: str) -> dict:
"""Build the ``agent_card`` payload sent to /registry/register.
Three optional env vars override the defaults so an operator can
surface human-readable identity + capabilities to peers and the
canvas Skills tab without code changes:
* ``MOLECULE_AGENT_NAME`` display name (defaults to
``molecule-mcp-{id[:8]}``). Surfaced in canvas workspace cards
and ``list_peers`` output.
* ``MOLECULE_AGENT_DESCRIPTION`` one-liner about the agent's
purpose. Rendered in canvas Details + Skills tabs.
* ``MOLECULE_AGENT_SKILLS`` comma-separated skill names
(e.g. ``research,code-review,memory-curation``). Each name is
expanded to a ``{"name": ...}`` skill object the minimum
shape that satisfies both ``shared_runtime.summarize_peers``
(uses ``s["name"]``) and the canvas SkillsTab.tsx schema
(id falls back to name when omitted). Empty / whitespace
entries are dropped.
Defaults match the previous hardcoded behaviour exactly so this
is a strict superset an operator who sets none of the env vars
sees no change.
"""
name = (os.environ.get("MOLECULE_AGENT_NAME") or "").strip()
if not name:
name = f"molecule-mcp-{workspace_id[:8]}"
description = (os.environ.get("MOLECULE_AGENT_DESCRIPTION") or "").strip()
skills_raw = (os.environ.get("MOLECULE_AGENT_SKILLS") or "").strip()
skills: list[dict] = []
if skills_raw:
for s in skills_raw.split(","):
label = s.strip()
if label:
skills.append({"name": label})
card: dict = {"name": name, "skills": skills}
if description:
card["description"] = description
return card
def _platform_register(platform_url: str, workspace_id: str, token: str) -> None:
"""One-shot register at startup; fails fast on auth errors.
Lifts the workspace from ``awaiting_agent`` to ``online`` for
operators who never ran the curl-register snippet. Safe to call
repeatedly: the platform's register handler is an upsert that
just refreshes ``url``, ``agent_card``, and ``status``.
Failure model (post-review):
- 401 / 403 ``sys.exit(3)`` immediately. The operator's
token is wrong; silently looping in a broken state would
make this hard to diagnose because the MCP tools would 401
on every call too. Hard-fail is the kindest option.
- Other 4xx/5xx log a warning + continue. The heartbeat
thread will surface persistent failures; transient platform
blips shouldn't abort the MCP loop.
- Network / transport errors log + continue. Same reasoning.
Origin header is required by the SaaS edge WAF; without it
/registry/register currently still works (it's on the WAF
allowlist), but the heartbeat path needs Origin and we want one
consistent header set across both calls.
"""
try:
import httpx
except ImportError:
# httpx is a transitive dep via a2a-sdk; if missing, the MCP
# server won't import either. Let the caller's later import
# surface the real error.
return
payload = {
"id": workspace_id,
"url": "",
"agent_card": _build_agent_card(workspace_id),
"delivery_mode": "poll",
}
headers = {
"Authorization": f"Bearer {token}",
"Origin": platform_url,
"Content-Type": "application/json",
}
try:
with httpx.Client(timeout=10.0) as client:
resp = client.post(
f"{platform_url}/registry/register",
json=payload,
headers=headers,
)
if resp.status_code in (401, 403):
print(
f"molecule-mcp: register rejected with HTTP {resp.status_code}"
f"the token in MOLECULE_WORKSPACE_TOKEN is invalid for workspace "
f"{workspace_id}. Regenerate from the canvas → Tokens tab.",
file=sys.stderr,
)
sys.exit(3)
if resp.status_code >= 400:
logger.warning(
"molecule-mcp: register POST returned HTTP %d: %s",
resp.status_code,
(resp.text or "")[:200],
)
else:
logger.info(
"molecule-mcp: registered workspace %s with platform",
workspace_id,
)
except SystemExit:
raise
except Exception as exc: # noqa: BLE001
logger.warning("molecule-mcp: register POST failed: %s", exc)
def _heartbeat_loop(
platform_url: str,
workspace_id: str,
token: str,
interval: float = HEARTBEAT_INTERVAL_SECONDS,
) -> None:
"""Daemon thread body: POST /registry/heartbeat every ``interval``s.
Failures are logged at WARNING and the loop continues. The thread
exits when the main process does (daemon=True). Each iteration
rebuilds the payload + headers cheap and ensures token rotation
via env var (rare but possible) is picked up on the next tick.
"""
try:
import httpx
except ImportError:
return
start_time = time.time()
consecutive_auth_failures = 0
while True:
body = {
"workspace_id": workspace_id,
"error_rate": 0.0,
"sample_error": "",
"active_tasks": 0,
"uptime_seconds": int(time.time() - start_time),
}
headers = {
"Authorization": f"Bearer {token}",
"Origin": platform_url,
"Content-Type": "application/json",
}
try:
with httpx.Client(timeout=10.0) as client:
resp = client.post(
f"{platform_url}/registry/heartbeat",
json=body,
headers=headers,
)
if resp.status_code in (401, 403):
consecutive_auth_failures += 1
_log_heartbeat_auth_failure(
consecutive_auth_failures, workspace_id, resp.status_code,
)
elif resp.status_code >= 400:
# Non-auth HTTP error — log, but DO NOT touch the
# auth-failure counter (5xx blips, 429, etc. are
# transient and unrelated to token validity).
logger.warning(
"molecule-mcp: heartbeat HTTP %d: %s",
resp.status_code,
(resp.text or "")[:200],
)
else:
consecutive_auth_failures = 0
_persist_inbound_secret_from_heartbeat(resp)
except Exception as exc: # noqa: BLE001
logger.warning("molecule-mcp: heartbeat failed: %s", exc)
time.sleep(interval)
def _log_heartbeat_auth_failure(count: int, workspace_id: str, status_code: int) -> None:
"""Escalate consecutive heartbeat 401/403s from quiet WARNING to
actionable ERROR.
The operator's first sign of trouble shouldn't be "tools 401 with no
explanation" — that was the failure mode that motivated this code,
triggered by a workspace being deleted server-side and its tokens
revoked while the runtime kept heartbeating in silence.
Cadence:
* count < threshold: WARNING per tick (transient could be a
platform blip, don't shout yet)
* count == threshold: ERROR with re-onboard instructions
(the first signal the operator can't miss)
* count > threshold and (count - threshold) % relog == 0: re-log
ERROR (so a session that started after the first ERROR still
sees the message scrolling past in their logs)
"""
if count < _HEARTBEAT_AUTH_LOUD_THRESHOLD:
logger.warning(
"molecule-mcp: heartbeat HTTP %d (auth failure %d/%d) — "
"token may be revoked. Will retry; if persistent, regenerate "
"from canvas → Tokens.",
status_code, count, _HEARTBEAT_AUTH_LOUD_THRESHOLD,
)
return
# At or past the threshold — this is the loud actionable error.
if count == _HEARTBEAT_AUTH_LOUD_THRESHOLD or (
count - _HEARTBEAT_AUTH_LOUD_THRESHOLD
) % _HEARTBEAT_AUTH_RELOG_INTERVAL == 0:
logger.error(
"molecule-mcp: %d consecutive heartbeat auth failures (HTTP %d) — "
"the token in MOLECULE_WORKSPACE_TOKEN has been REVOKED, likely "
"because workspace %s was deleted server-side. The MCP server is "
"still running but every platform call will fail. Regenerate the "
"workspace + token from the canvas (Tokens tab), update your MCP "
"config, and restart your runtime.",
count, status_code, workspace_id,
)
def _persist_inbound_secret_from_heartbeat(resp: object) -> None:
"""Persist ``platform_inbound_secret`` from a heartbeat response, if any.
The platform's heartbeat handler returns the secret on every beat
(mirroring /registry/register) so a workspace that lazy-healed the
secret on the platform side typical recovery path for a workspace
whose row had a NULL ``platform_inbound_secret`` after a partial
bootstrap picks it up within one heartbeat tick instead of
requiring a runtime restart.
Without this delivery path the chat-upload code path's "secret was
just minted, will pick up on next heartbeat" 503 message is a lie
and the workspace stays 401-forever until the operator restarts
the runtime. Caught 2026-04-30 on hongmingwang tenant.
Failure is non-fatal: if the body isn't JSON, doesn't carry the
field, or the disk write fails, the next heartbeat retries. This
matches the cold-start register flow in main.py:319-323.
"""
try:
body = resp.json()
except Exception: # noqa: BLE001
return
if not isinstance(body, dict):
return
secret = body.get("platform_inbound_secret")
if not secret:
return
try:
from platform_inbound_auth import save_inbound_secret
save_inbound_secret(secret)
except Exception as exc: # noqa: BLE001
logger.warning(
"molecule-mcp: persist inbound secret from heartbeat failed: %s", exc
)
def _start_heartbeat_thread(
platform_url: str,
workspace_id: str,
token: str,
) -> threading.Thread:
"""Start the heartbeat daemon thread. Returns the Thread handle.
The MCP stdio loop runs in the foreground (asyncio); this thread
runs alongside it. ``daemon=True`` so when the operator hits
Ctrl-C / closes the runtime, the heartbeat dies with it instead
of leaking and writing to a stale workspace.
"""
t = threading.Thread(
target=_heartbeat_loop,
args=(platform_url, workspace_id, token),
name="molecule-mcp-heartbeat",
daemon=True,
)
t.start()
return t
def _resolve_workspaces() -> tuple[list[tuple[str, str]], list[str]]:
"""Return the list of ``(workspace_id, token)`` pairs to register.
Resolution order:
1. ``MOLECULE_WORKSPACES`` env var JSON array of
``{"id": "...", "token": "..."}`` objects. Activates the
multi-workspace external-agent path (one process registered into
N workspaces). When set, ``WORKSPACE_ID`` / ``MOLECULE_WORKSPACE_TOKEN``
are IGNORED the JSON is the source of truth.
2. Single-workspace fallback ``WORKSPACE_ID`` env var + token from
``MOLECULE_WORKSPACE_TOKEN`` or ``${CONFIGS_DIR}/.auth_token``.
This is the pre-existing path; back-compat exact.
Returns ``(workspaces, errors)``:
* ``workspaces``: list of ``(workspace_id, token)`` non-empty
on the happy path.
* ``errors``: human-readable strings describing what's missing /
malformed. ``main()`` surfaces these with the same shape as
``_print_missing_env_help`` so the operator's first run gives
actionable output.
Why JSON env (not file): ergonomic for Claude Code MCP config (one
string in ``mcpServers.molecule.env`` instead of a sidecar file)
and for CI / launchers. A separate config-file path can be added
later without breaking this.
"""
raw = os.environ.get("MOLECULE_WORKSPACES", "").strip()
if raw:
try:
parsed = json.loads(raw)
except json.JSONDecodeError as exc:
return [], [
f"MOLECULE_WORKSPACES is not valid JSON ({exc.msg} at pos "
f"{exc.pos}). Expected: '[{{\"id\":\"<wsid>\",\"token\":"
f"\"<tok>\"}},{{...}}]'"
]
if not isinstance(parsed, list) or not parsed:
return [], [
"MOLECULE_WORKSPACES must be a non-empty JSON array of "
"{\"id\":\"...\",\"token\":\"...\"} objects"
]
out: list[tuple[str, str]] = []
seen: set[str] = set()
errors: list[str] = []
for i, entry in enumerate(parsed):
if not isinstance(entry, dict):
errors.append(
f"MOLECULE_WORKSPACES[{i}] is not an object — got {type(entry).__name__}"
)
continue
wsid = str(entry.get("id", "")).strip()
tok = str(entry.get("token", "")).strip()
if not wsid or not tok:
errors.append(
f"MOLECULE_WORKSPACES[{i}] missing 'id' or 'token'"
)
continue
if wsid in seen:
errors.append(
f"MOLECULE_WORKSPACES[{i}] duplicate workspace id {wsid!r}"
)
continue
seen.add(wsid)
out.append((wsid, tok))
if errors:
return [], errors
return out, []
# Single-workspace back-compat path.
wsid = os.environ.get("WORKSPACE_ID", "").strip()
if not wsid:
return [], ["WORKSPACE_ID (or MOLECULE_WORKSPACES) is required"]
tok = os.environ.get("MOLECULE_WORKSPACE_TOKEN", "").strip()
if not tok:
tok = _read_token_file()
if not tok:
return [], [
"MOLECULE_WORKSPACE_TOKEN (or CONFIGS_DIR/.auth_token) is required"
]
return [(wsid, tok)], []
def _print_missing_env_help(missing: list[str], have_token_file: bool) -> None:
print("molecule-mcp: missing required environment.\n", file=sys.stderr)
print("Set the following before running molecule-mcp:", file=sys.stderr)
print(" WORKSPACE_ID — your workspace UUID (from canvas)", file=sys.stderr)
print(
" PLATFORM_URL — base URL of your Molecule platform "
"(e.g. https://your-tenant.staging.moleculesai.app)",
file=sys.stderr,
)
if not have_token_file:
print(
" MOLECULE_WORKSPACE_TOKEN — bearer token for this workspace "
"(canvas → Tokens tab)",
file=sys.stderr,
)
print("", file=sys.stderr)
print(f"Currently missing: {', '.join(missing)}", file=sys.stderr)
_start_inbox_pollers = mcp_inbox_pollers.start_inbox_pollers
def main() -> None:
@ -558,69 +189,5 @@ def main() -> None:
cli_main()
def _start_inbox_pollers(platform_url: str, workspace_ids: list[str]) -> None:
"""Activate the inbox singleton + spawn one poller daemon thread per workspace.
Done lazily here (not at module import) because importing inbox
pulls in platform_auth, which only resolves cleanly AFTER env
validation succeeds. Activation is idempotent within a process,
so a stray double-call (e.g. test harness re-entering main) is
harmless.
The poller threads are daemon=True die with the main process.
Single-workspace path: one poller, single cursor file at the legacy
location (``.mcp_inbox_cursor``). Cursor-key resolution falls back
to the empty string for back-compat with operators whose existing
on-disk cursor was written by the pre-multi-workspace code.
Multi-workspace path: N pollers, each with its own cursor file
keyed by ``workspace_id[:8]``. Cursors live next to each other in
configs_dir so an operator inspecting state sees all of them
together.
"""
try:
import inbox
except ImportError as exc:
logger.warning("molecule-mcp: inbox module unavailable: %s", exc)
return
if len(workspace_ids) <= 1:
# Back-compat exact: single-workspace mode reuses the legacy
# cursor filename + cursor_path constructor arg, so an existing
# operator's on-disk state isn't invalidated by upgrade.
wsid = workspace_ids[0]
state = inbox.InboxState(cursor_path=inbox.default_cursor_path())
inbox.activate(state)
inbox.start_poller_thread(state, platform_url, wsid)
return
# Multi-workspace: per-workspace cursor file, one shared queue.
cursor_paths = {wsid: inbox.default_cursor_path(wsid) for wsid in workspace_ids}
state = inbox.InboxState(cursor_paths=cursor_paths)
inbox.activate(state)
for wsid in workspace_ids:
inbox.start_poller_thread(state, platform_url, wsid)
def _read_token_file() -> str:
"""Read the token from the resolved configs dir's ``.auth_token`` if
present.
Mirrors platform_auth._token_file's location resolution but without
importing the heavy module here (that import triggers a2a_client's
WORKSPACE_ID guard which is fine after env validation, but cheaper
to inline a 4-line file read than pull in the whole stack just for
the path).
"""
path = configs_dir.resolve() / ".auth_token"
if not path.is_file():
return ""
try:
return path.read_text().strip()
except OSError:
return ""
if __name__ == "__main__": # pragma: no cover
main()

325
workspace/mcp_heartbeat.py Normal file
View File

@ -0,0 +1,325 @@
"""Heartbeat + register thread for the standalone ``molecule-mcp`` wrapper.
Extracted from ``mcp_cli.py`` (RFC #2873 iter 3) so the heartbeat /
register concern lives in its own module. The console-script entry
``mcp_cli:main`` still drives the spawn, but the loop body, auth-failure
escalation, and inbound-secret persistence now live here so they can be
read, tested, and replaced independently of the orchestrator.
Public surface:
* ``HEARTBEAT_INTERVAL_SECONDS`` cadence constant.
* ``build_agent_card(workspace_id)`` payload helper.
* ``platform_register(platform_url, workspace_id, token)`` one-shot
POST /registry/register at startup.
* ``start_heartbeat_thread(platform_url, workspace_id, token)`` spawn
the daemon thread.
"""
from __future__ import annotations
import logging
import os
import sys
import threading
import time
logger = logging.getLogger(__name__)
# Heartbeat cadence. Must be tighter than healthsweep's stale window
# (currently 60-90s — see registry/healthsweep.go) by a comfortable
# margin so a single missed heartbeat doesn't flip awaiting_agent.
# 20s gives the operator's network 3 attempts within the budget; long
# enough that it doesn't spam, short enough to recover quickly after
# laptop sleep.
HEARTBEAT_INTERVAL_SECONDS = 20.0
# After this many consecutive 401/403 heartbeats, escalate from
# WARNING to ERROR with re-onboard guidance. 3 ticks at 20s = ~1 minute
# of sustained auth failure — enough to rule out a transient platform
# blip but quick enough that an operator doesn't sit puzzled for 10
# minutes wondering why their MCP tools 401. Same threshold used for
# repeat-logging at 20-tick (~7 min) intervals so a long-running
# session that missed the first ERROR still sees the message.
HEARTBEAT_AUTH_LOUD_THRESHOLD = 3
HEARTBEAT_AUTH_RELOG_INTERVAL = 20
def build_agent_card(workspace_id: str) -> dict:
"""Build the ``agent_card`` payload sent to /registry/register.
Three optional env vars override the defaults so an operator can
surface human-readable identity + capabilities to peers and the
canvas Skills tab without code changes:
* ``MOLECULE_AGENT_NAME`` display name (defaults to
``molecule-mcp-{id[:8]}``). Surfaced in canvas workspace cards
and ``list_peers`` output.
* ``MOLECULE_AGENT_DESCRIPTION`` one-liner about the agent's
purpose. Rendered in canvas Details + Skills tabs.
* ``MOLECULE_AGENT_SKILLS`` comma-separated skill names
(e.g. ``research,code-review,memory-curation``). Each name is
expanded to a ``{"name": ...}`` skill object the minimum
shape that satisfies both ``shared_runtime.summarize_peers``
(uses ``s["name"]``) and the canvas SkillsTab.tsx schema
(id falls back to name when omitted). Empty / whitespace
entries are dropped.
Defaults match the previous hardcoded behaviour exactly so this
is a strict superset an operator who sets none of the env vars
sees no change.
"""
name = (os.environ.get("MOLECULE_AGENT_NAME") or "").strip()
if not name:
name = f"molecule-mcp-{workspace_id[:8]}"
description = (os.environ.get("MOLECULE_AGENT_DESCRIPTION") or "").strip()
skills_raw = (os.environ.get("MOLECULE_AGENT_SKILLS") or "").strip()
skills: list[dict] = []
if skills_raw:
for s in skills_raw.split(","):
label = s.strip()
if label:
skills.append({"name": label})
card: dict = {"name": name, "skills": skills}
if description:
card["description"] = description
return card
def platform_register(platform_url: str, workspace_id: str, token: str) -> None:
"""One-shot register at startup; fails fast on auth errors.
Lifts the workspace from ``awaiting_agent`` to ``online`` for
operators who never ran the curl-register snippet. Safe to call
repeatedly: the platform's register handler is an upsert that
just refreshes ``url``, ``agent_card``, and ``status``.
Failure model (post-review):
- 401 / 403 ``sys.exit(3)`` immediately. The operator's
token is wrong; silently looping in a broken state would
make this hard to diagnose because the MCP tools would 401
on every call too. Hard-fail is the kindest option.
- Other 4xx/5xx log a warning + continue. The heartbeat
thread will surface persistent failures; transient platform
blips shouldn't abort the MCP loop.
- Network / transport errors log + continue. Same reasoning.
Origin header is required by the SaaS edge WAF; without it
/registry/register currently still works (it's on the WAF
allowlist), but the heartbeat path needs Origin and we want one
consistent header set across both calls.
"""
try:
import httpx
except ImportError:
# httpx is a transitive dep via a2a-sdk; if missing, the MCP
# server won't import either. Let the caller's later import
# surface the real error.
return
payload = {
"id": workspace_id,
"url": "",
"agent_card": build_agent_card(workspace_id),
"delivery_mode": "poll",
}
headers = {
"Authorization": f"Bearer {token}",
"Origin": platform_url,
"Content-Type": "application/json",
}
try:
with httpx.Client(timeout=10.0) as client:
resp = client.post(
f"{platform_url}/registry/register",
json=payload,
headers=headers,
)
if resp.status_code in (401, 403):
print(
f"molecule-mcp: register rejected with HTTP {resp.status_code}"
f"the token in MOLECULE_WORKSPACE_TOKEN is invalid for workspace "
f"{workspace_id}. Regenerate from the canvas → Tokens tab.",
file=sys.stderr,
)
sys.exit(3)
if resp.status_code >= 400:
logger.warning(
"molecule-mcp: register POST returned HTTP %d: %s",
resp.status_code,
(resp.text or "")[:200],
)
else:
logger.info(
"molecule-mcp: registered workspace %s with platform",
workspace_id,
)
except SystemExit:
raise
except Exception as exc: # noqa: BLE001
logger.warning("molecule-mcp: register POST failed: %s", exc)
def heartbeat_loop(
platform_url: str,
workspace_id: str,
token: str,
interval: float = HEARTBEAT_INTERVAL_SECONDS,
) -> None:
"""Daemon thread body: POST /registry/heartbeat every ``interval``s.
Failures are logged at WARNING and the loop continues. The thread
exits when the main process does (daemon=True). Each iteration
rebuilds the payload + headers cheap and ensures token rotation
via env var (rare but possible) is picked up on the next tick.
"""
try:
import httpx
except ImportError:
return
start_time = time.time()
consecutive_auth_failures = 0
while True:
body = {
"workspace_id": workspace_id,
"error_rate": 0.0,
"sample_error": "",
"active_tasks": 0,
"uptime_seconds": int(time.time() - start_time),
}
headers = {
"Authorization": f"Bearer {token}",
"Origin": platform_url,
"Content-Type": "application/json",
}
try:
with httpx.Client(timeout=10.0) as client:
resp = client.post(
f"{platform_url}/registry/heartbeat",
json=body,
headers=headers,
)
if resp.status_code in (401, 403):
consecutive_auth_failures += 1
log_heartbeat_auth_failure(
consecutive_auth_failures, workspace_id, resp.status_code,
)
elif resp.status_code >= 400:
# Non-auth HTTP error — log, but DO NOT touch the
# auth-failure counter (5xx blips, 429, etc. are
# transient and unrelated to token validity).
logger.warning(
"molecule-mcp: heartbeat HTTP %d: %s",
resp.status_code,
(resp.text or "")[:200],
)
else:
consecutive_auth_failures = 0
persist_inbound_secret_from_heartbeat(resp)
except Exception as exc: # noqa: BLE001
logger.warning("molecule-mcp: heartbeat failed: %s", exc)
time.sleep(interval)
def log_heartbeat_auth_failure(count: int, workspace_id: str, status_code: int) -> None:
"""Escalate consecutive heartbeat 401/403s from quiet WARNING to
actionable ERROR.
The operator's first sign of trouble shouldn't be "tools 401 with no
explanation" — that was the failure mode that motivated this code,
triggered by a workspace being deleted server-side and its tokens
revoked while the runtime kept heartbeating in silence.
Cadence:
* count < threshold: WARNING per tick (transient could be a
platform blip, don't shout yet)
* count == threshold: ERROR with re-onboard instructions
(the first signal the operator can't miss)
* count > threshold and (count - threshold) % relog == 0: re-log
ERROR (so a session that started after the first ERROR still
sees the message scrolling past in their logs)
"""
if count < HEARTBEAT_AUTH_LOUD_THRESHOLD:
logger.warning(
"molecule-mcp: heartbeat HTTP %d (auth failure %d/%d) — "
"token may be revoked. Will retry; if persistent, regenerate "
"from canvas → Tokens.",
status_code, count, HEARTBEAT_AUTH_LOUD_THRESHOLD,
)
return
# At or past the threshold — this is the loud actionable error.
if count == HEARTBEAT_AUTH_LOUD_THRESHOLD or (
count - HEARTBEAT_AUTH_LOUD_THRESHOLD
) % HEARTBEAT_AUTH_RELOG_INTERVAL == 0:
logger.error(
"molecule-mcp: %d consecutive heartbeat auth failures (HTTP %d) — "
"the token in MOLECULE_WORKSPACE_TOKEN has been REVOKED, likely "
"because workspace %s was deleted server-side. The MCP server is "
"still running but every platform call will fail. Regenerate the "
"workspace + token from the canvas (Tokens tab), update your MCP "
"config, and restart your runtime.",
count, status_code, workspace_id,
)
def persist_inbound_secret_from_heartbeat(resp: object) -> None:
"""Persist ``platform_inbound_secret`` from a heartbeat response, if any.
The platform's heartbeat handler returns the secret on every beat
(mirroring /registry/register) so a workspace that lazy-healed the
secret on the platform side typical recovery path for a workspace
whose row had a NULL ``platform_inbound_secret`` after a partial
bootstrap picks it up within one heartbeat tick instead of
requiring a runtime restart.
Without this delivery path the chat-upload code path's "secret was
just minted, will pick up on next heartbeat" 503 message is a lie
and the workspace stays 401-forever until the operator restarts
the runtime. Caught 2026-04-30 on hongmingwang tenant.
Failure is non-fatal: if the body isn't JSON, doesn't carry the
field, or the disk write fails, the next heartbeat retries. This
matches the cold-start register flow in main.py:319-323.
"""
try:
body = resp.json()
except Exception: # noqa: BLE001
return
if not isinstance(body, dict):
return
secret = body.get("platform_inbound_secret")
if not secret:
return
try:
from platform_inbound_auth import save_inbound_secret
save_inbound_secret(secret)
except Exception as exc: # noqa: BLE001
logger.warning(
"molecule-mcp: persist inbound secret from heartbeat failed: %s", exc
)
def start_heartbeat_thread(
platform_url: str,
workspace_id: str,
token: str,
) -> threading.Thread:
"""Start the heartbeat daemon thread. Returns the Thread handle.
The MCP stdio loop runs in the foreground (asyncio); this thread
runs alongside it. ``daemon=True`` so when the operator hits
Ctrl-C / closes the runtime, the heartbeat dies with it instead
of leaking and writing to a stale workspace.
"""
t = threading.Thread(
target=heartbeat_loop,
args=(platform_url, workspace_id, token),
name="molecule-mcp-heartbeat",
daemon=True,
)
t.start()
return t

View File

@ -0,0 +1,63 @@
"""Inbox-poller spawn helpers for the standalone ``molecule-mcp`` wrapper.
Extracted from ``mcp_cli.py`` (RFC #2873 iter 3). The poller is the
INBOUND side of the standalone path without it, the universal MCP
server is outbound-only (can call ``delegate_task`` /
``send_message_to_user``, never observes canvas-user / peer-agent
messages).
Public surface:
* ``start_inbox_pollers(platform_url, workspace_ids)`` activate the
inbox singleton and spawn one daemon poller per workspace.
"""
from __future__ import annotations
import logging
logger = logging.getLogger(__name__)
def start_inbox_pollers(platform_url: str, workspace_ids: list[str]) -> None:
"""Activate the inbox singleton + spawn one poller daemon thread per workspace.
Done lazily here (not at module import) because importing inbox
pulls in platform_auth, which only resolves cleanly AFTER env
validation succeeds. Activation is idempotent within a process,
so a stray double-call (e.g. test harness re-entering main) is
harmless.
The poller threads are daemon=True die with the main process.
Single-workspace path: one poller, single cursor file at the legacy
location (``.mcp_inbox_cursor``). Cursor-key resolution falls back
to the empty string for back-compat with operators whose existing
on-disk cursor was written by the pre-multi-workspace code.
Multi-workspace path: N pollers, each with its own cursor file
keyed by ``workspace_id[:8]``. Cursors live next to each other in
configs_dir so an operator inspecting state sees all of them
together.
"""
try:
import inbox
except ImportError as exc:
logger.warning("molecule-mcp: inbox module unavailable: %s", exc)
return
if len(workspace_ids) <= 1:
# Back-compat exact: single-workspace mode reuses the legacy
# cursor filename + cursor_path constructor arg, so an existing
# operator's on-disk state isn't invalidated by upgrade.
wsid = workspace_ids[0]
state = inbox.InboxState(cursor_path=inbox.default_cursor_path())
inbox.activate(state)
inbox.start_poller_thread(state, platform_url, wsid)
return
# Multi-workspace: per-workspace cursor file, one shared queue.
cursor_paths = {wsid: inbox.default_cursor_path(wsid) for wsid in workspace_ids}
state = inbox.InboxState(cursor_paths=cursor_paths)
inbox.activate(state)
for wsid in workspace_ids:
inbox.start_poller_thread(state, platform_url, wsid)

View File

@ -0,0 +1,146 @@
"""Env validation + workspace resolution for the standalone ``molecule-mcp``.
Extracted from ``mcp_cli.py`` (RFC #2873 iter 3). Deals with the two
shapes ``molecule-mcp`` accepts:
* Single-workspace legacy shape: ``WORKSPACE_ID`` + token from
``MOLECULE_WORKSPACE_TOKEN`` or ``${CONFIGS_DIR}/.auth_token``.
* Multi-workspace JSON shape: ``MOLECULE_WORKSPACES`` env var carries a
JSON array of ``{"id": ..., "token": ...}`` entries.
Public surface:
* ``resolve_workspaces()`` ``(workspaces, errors)``.
* ``read_token_file()`` token text or ``""``.
* ``print_missing_env_help(missing, have_token_file)`` operator-help
printer.
"""
from __future__ import annotations
import json
import os
import sys
import configs_dir
def resolve_workspaces() -> tuple[list[tuple[str, str]], list[str]]:
"""Return the list of ``(workspace_id, token)`` pairs to register.
Resolution order:
1. ``MOLECULE_WORKSPACES`` env var JSON array of
``{"id": "...", "token": "..."}`` objects. Activates the
multi-workspace external-agent path (one process registered into
N workspaces). When set, ``WORKSPACE_ID`` / ``MOLECULE_WORKSPACE_TOKEN``
are IGNORED the JSON is the source of truth.
2. Single-workspace fallback ``WORKSPACE_ID`` env var + token from
``MOLECULE_WORKSPACE_TOKEN`` or ``${CONFIGS_DIR}/.auth_token``.
This is the pre-existing path; back-compat exact.
Returns ``(workspaces, errors)``:
* ``workspaces``: list of ``(workspace_id, token)`` non-empty
on the happy path.
* ``errors``: human-readable strings describing what's missing /
malformed. ``main()`` surfaces these with the same shape as
``print_missing_env_help`` so the operator's first run gives
actionable output.
Why JSON env (not file): ergonomic for Claude Code MCP config (one
string in ``mcpServers.molecule.env`` instead of a sidecar file)
and for CI / launchers. A separate config-file path can be added
later without breaking this.
"""
raw = os.environ.get("MOLECULE_WORKSPACES", "").strip()
if raw:
try:
parsed = json.loads(raw)
except json.JSONDecodeError as exc:
return [], [
f"MOLECULE_WORKSPACES is not valid JSON ({exc.msg} at pos "
f"{exc.pos}). Expected: '[{{\"id\":\"<wsid>\",\"token\":"
f"\"<tok>\"}},{{...}}]'"
]
if not isinstance(parsed, list) or not parsed:
return [], [
"MOLECULE_WORKSPACES must be a non-empty JSON array of "
"{\"id\":\"...\",\"token\":\"...\"} objects"
]
out: list[tuple[str, str]] = []
seen: set[str] = set()
errors: list[str] = []
for i, entry in enumerate(parsed):
if not isinstance(entry, dict):
errors.append(
f"MOLECULE_WORKSPACES[{i}] is not an object — got {type(entry).__name__}"
)
continue
wsid = str(entry.get("id", "")).strip()
tok = str(entry.get("token", "")).strip()
if not wsid or not tok:
errors.append(
f"MOLECULE_WORKSPACES[{i}] missing 'id' or 'token'"
)
continue
if wsid in seen:
errors.append(
f"MOLECULE_WORKSPACES[{i}] duplicate workspace id {wsid!r}"
)
continue
seen.add(wsid)
out.append((wsid, tok))
if errors:
return [], errors
return out, []
# Single-workspace back-compat path.
wsid = os.environ.get("WORKSPACE_ID", "").strip()
if not wsid:
return [], ["WORKSPACE_ID (or MOLECULE_WORKSPACES) is required"]
tok = os.environ.get("MOLECULE_WORKSPACE_TOKEN", "").strip()
if not tok:
tok = read_token_file()
if not tok:
return [], [
"MOLECULE_WORKSPACE_TOKEN (or CONFIGS_DIR/.auth_token) is required"
]
return [(wsid, tok)], []
def print_missing_env_help(missing: list[str], have_token_file: bool) -> None:
print("molecule-mcp: missing required environment.\n", file=sys.stderr)
print("Set the following before running molecule-mcp:", file=sys.stderr)
print(" WORKSPACE_ID — your workspace UUID (from canvas)", file=sys.stderr)
print(
" PLATFORM_URL — base URL of your Molecule platform "
"(e.g. https://your-tenant.staging.moleculesai.app)",
file=sys.stderr,
)
if not have_token_file:
print(
" MOLECULE_WORKSPACE_TOKEN — bearer token for this workspace "
"(canvas → Tokens tab)",
file=sys.stderr,
)
print("", file=sys.stderr)
print(f"Currently missing: {', '.join(missing)}", file=sys.stderr)
def read_token_file() -> str:
"""Read the token from the resolved configs dir's ``.auth_token`` if
present.
Mirrors platform_auth._token_file's location resolution but without
importing the heavy module here (that import triggers a2a_client's
WORKSPACE_ID guard which is fine after env validation, but cheaper
to inline a 4-line file read than pull in the whole stack just for
the path).
"""
path = configs_dir.resolve() / ".auth_token"
if not path.is_file():
return ""
try:
return path.read_text().strip()
except OSError:
return ""

View File

@ -555,16 +555,34 @@ def test_poll_once_self_notify_does_not_fire_notification(state: inbox.InboxStat
def test_start_poller_thread_is_daemon(state: inbox.InboxState):
"""Daemon flag is required so the poller dies with the parent
process; a non-daemon poller would leak across `claude` restarts
and write to a stale workspace."""
and write to a stale workspace.
Stop_event is plumbed so the thread cleans up at the end of the
test instead of leaking into later tests. Without cleanup, the
daemon's ~10ms tick races with later tests that patch httpx.Client
the leaked thread sees their patched response and runs an
unwanted iteration of _poll_once that double-counts mocked calls
(caught when test_batch_fetcher_owns_client_when_not_supplied
surfaced this on Python 3.11 CI but not 3.13 local).
"""
resp = _make_response(200, [])
p, _ = _patch_httpx(resp)
stop_event = threading.Event()
with p, patch("platform_auth.auth_headers", return_value={}):
# Use a very short interval so the loop body runs at least once
# before we exit the test.
t = inbox.start_poller_thread(state, "http://platform", "ws-1", interval=0.01)
t = inbox.start_poller_thread(
state, "http://platform", "ws-1", interval=0.01, stop_event=stop_event
)
time.sleep(0.05)
assert t.daemon is True
assert t.is_alive()
assert t.daemon is True
assert t.is_alive()
# Signal shutdown + wait for the thread to actually exit before
# we leave the test scope. Without this join, the leaked thread
# races with later tests' httpx patches.
stop_event.set()
t.join(timeout=2.0)
assert not t.is_alive(), "poller thread did not exit on stop_event"
# ---------------------------------------------------------------------------
@ -577,6 +595,219 @@ def test_default_cursor_path_uses_configs_dir(monkeypatch, tmp_path: Path):
assert inbox.default_cursor_path() == tmp_path / ".mcp_inbox_cursor"
# ---------------------------------------------------------------------------
# Phase 5b — BatchFetcher integration with the poll loop
# ---------------------------------------------------------------------------
#
# These tests pin the cross-module contract between inbox._poll_once and
# inbox_uploads.BatchFetcher: chat_upload_receive rows must be submitted
# to a single BatchFetcher AND drained (URI cache populated) before any
# subsequent message row is processed. Without the drain, the
# rewrite_request_body path inside message_from_activity surfaces the
# un-rewritten ``platform-pending:`` URI to the agent.
def _upload_row(act_id: str, file_id: str) -> dict:
return {
"id": act_id,
"source_id": None,
"method": "chat_upload_receive",
"summary": f"chat_upload_receive: {file_id}.pdf",
"request_body": {
"file_id": file_id,
"name": f"{file_id}.pdf",
"uri": f"platform-pending:ws-1/{file_id}",
"mimeType": "application/pdf",
"size": 3,
},
"created_at": "2026-05-04T10:00:00Z",
}
def _message_row_referencing(act_id: str, file_id: str) -> dict:
return {
"id": act_id,
"source_id": None,
"method": "message/send",
"summary": None,
"request_body": {
"params": {
"message": {
"parts": [
{"kind": "text", "text": "have a look"},
{
"kind": "file",
"file": {
"uri": f"platform-pending:ws-1/{file_id}",
"name": f"{file_id}.pdf",
},
},
]
}
}
},
"created_at": "2026-05-04T10:00:01Z",
}
def _patch_httpx_routing(activity_rows: list[dict], upload_bytes: bytes = b"PDF"):
"""Replace ``httpx.Client`` so:
- GET /activity returns ``activity_rows``
- GET /workspaces/.../content returns ``upload_bytes`` with content-type
- POST /ack returns 200
Returns the patch context manager; tests use ``with p:``. Each new
Client(...) gets a fresh MagicMock so the test can verify
constructor-count expectations without pinning singletons.
"""
def _client_factory(*args, **kwargs):
c = MagicMock()
c.__enter__ = MagicMock(return_value=c)
c.__exit__ = MagicMock(return_value=False)
def _get(url, params=None, headers=None):
if "/activity" in url:
resp = MagicMock()
resp.status_code = 200
resp.json.return_value = activity_rows
resp.text = ""
return resp
if "/pending-uploads/" in url and "/content" in url:
resp = MagicMock()
resp.status_code = 200
resp.content = upload_bytes
resp.headers = {"content-type": "application/pdf"}
resp.text = ""
return resp
resp = MagicMock()
resp.status_code = 404
resp.text = ""
return resp
def _post(url, headers=None):
resp = MagicMock()
resp.status_code = 200
resp.text = ""
return resp
c.get = MagicMock(side_effect=_get)
c.post = MagicMock(side_effect=_post)
c.close = MagicMock()
return c
return patch("httpx.Client", side_effect=_client_factory)
def test_poll_once_drains_uploads_before_processing_message_row(state: inbox.InboxState, tmp_path):
"""The chat-message row's file.uri MUST be rewritten to the local
workspace: URI by the time it lands in the InboxState queue. This
requires BatchFetcher.wait_all() to run before message_from_activity
on the second row.
"""
import inbox_uploads
inbox_uploads.get_cache().clear()
# Sandbox the on-disk staging dir so the test can't pollute the
# workspace's real chat-uploads.
real_dir = inbox_uploads.CHAT_UPLOAD_DIR
inbox_uploads.CHAT_UPLOAD_DIR = str(tmp_path / "chat-uploads")
try:
rows = [
_upload_row("act-1", "file-A"),
_message_row_referencing("act-2", "file-A"),
]
state.save_cursor("act-old")
with _patch_httpx_routing(rows, upload_bytes=b"PDF-bytes"):
n = inbox._poll_once(state, "http://platform", "ws-1", {})
finally:
inbox_uploads.CHAT_UPLOAD_DIR = real_dir
inbox_uploads.get_cache().clear()
assert n == 1, "exactly one message row should be enqueued (the upload row is a side-effect, not a message)"
queued = state.peek(10)
assert len(queued) == 1
# The contract this test exists to pin: the platform-pending: URI
# was rewritten to workspace: BEFORE the message landed in the
# state queue. message_from_activity mutates row['request_body']
# in-place, so the rewritten URI is observable on the row dict
# we passed in.
rewritten_part = rows[1]["request_body"]["params"]["message"]["parts"][1]
assert rewritten_part["file"]["uri"].startswith("workspace:"), (
f"upload barrier broken: file.uri = {rewritten_part['file']['uri']!r}; "
"rewrite_request_body ran before BatchFetcher.wait_all populated the cache"
)
# Cursor advanced past BOTH rows — upload-receive (act-1) is
# acknowledged via the inbox cursor regardless of fetch outcome.
assert state.load_cursor() == "act-2"
def test_poll_once_with_only_upload_rows_drains_at_loop_end(state: inbox.InboxState, tmp_path):
"""End-of-batch drain: a poll that contains ONLY upload rows (no
chat-message row to trigger the inline drain) must still drain the
BatchFetcher before _poll_once returns. Otherwise a future poll
that picks up the corresponding chat-message row would race with
in-flight fetches from the previous batch.
"""
import inbox_uploads
inbox_uploads.get_cache().clear()
real_dir = inbox_uploads.CHAT_UPLOAD_DIR
inbox_uploads.CHAT_UPLOAD_DIR = str(tmp_path / "chat-uploads")
try:
rows = [_upload_row("act-1", "file-A"), _upload_row("act-2", "file-B")]
state.save_cursor("act-old")
with _patch_httpx_routing(rows, upload_bytes=b"PDF"):
n = inbox._poll_once(state, "http://platform", "ws-1", {})
# By the time _poll_once returned, the URI cache must be hot
# for both file_ids — proves the end-of-loop drain ran.
assert inbox_uploads.get_cache().get("platform-pending:ws-1/file-A") is not None
assert inbox_uploads.get_cache().get("platform-pending:ws-1/file-B") is not None
finally:
inbox_uploads.CHAT_UPLOAD_DIR = real_dir
inbox_uploads.get_cache().clear()
# Upload rows are NOT message rows; queue stays empty.
assert n == 0
# Cursor advances past both upload rows.
assert state.load_cursor() == "act-2"
def test_poll_once_no_uploads_does_not_construct_batch_fetcher(state: inbox.InboxState):
"""A batch with no upload-receive rows must not pay the BatchFetcher
construction cost the executor + httpx client allocation is
deferred until the first upload row appears.
"""
import inbox_uploads
constructed: list[Any] = []
def _patched_init(self, **kwargs):
constructed.append(kwargs)
# Don't actually run __init__; we never hit submit/wait_all.
self._closed = False
self._futures = []
self._executor = MagicMock()
self._client = MagicMock()
self._own_client = False
rows = [
{
"id": "act-1",
"source_id": None,
"method": "message/send",
"summary": None,
"request_body": {"parts": [{"type": "text", "text": "hi"}]},
"created_at": "2026-04-30T22:00:00Z",
},
]
state.save_cursor("act-old")
resp = _make_response(200, rows)
p, _ = _patch_httpx(resp)
with patch.object(inbox_uploads.BatchFetcher, "__init__", _patched_init), p:
n = inbox._poll_once(state, "http://platform", "ws-1", {})
assert n == 1
assert constructed == [], "BatchFetcher must not be constructed when no upload rows are present"
def test_default_cursor_path_falls_back_to_default(tmp_path, monkeypatch):
"""When CONFIGS_DIR is unset, the cursor path resolves through
configs_dir.resolve() /configs in-container, ~/.molecule-workspace
@ -701,3 +932,165 @@ def test_set_notification_callback_none_clears(state: inbox.InboxState):
state.record(_msg("act-1"))
assert received == []
# ---------------------------------------------------------------------------
# Phase 2 — chat_upload_receive rows route to inbox_uploads.fetch_and_stage
# ---------------------------------------------------------------------------
def test_poll_once_skips_chat_upload_row_from_queue(state: inbox.InboxState, monkeypatch, tmp_path):
"""A row with method='chat_upload_receive' must NOT enqueue as a
chat message it's a side-effect telling the workspace to fetch
bytes. Pin the contract so a refactor that flattens the row loop
can't silently re-enqueue these as 'empty A2A message' rows."""
import inbox_uploads
monkeypatch.setattr(inbox_uploads, "CHAT_UPLOAD_DIR", str(tmp_path / "chat-uploads"))
inbox_uploads.get_cache().clear()
rows = [
{
"id": "act-1",
"source_id": None,
"method": "chat_upload_receive",
"summary": "chat_upload_receive: foo.pdf",
"request_body": {
"file_id": "abc123",
"name": "foo.pdf",
"mimeType": "application/pdf",
"size": 4,
"uri": "platform-pending:ws-1/abc123",
},
"created_at": "2026-05-04T10:00:00Z",
},
]
resp = _make_response(200, rows)
p, _ = _patch_httpx(resp)
fetch_called = []
def fake_fetch(row, **kwargs):
fetch_called.append((row.get("id"), kwargs["workspace_id"]))
return "workspace:/local/foo.pdf"
with p, patch.object(inbox_uploads, "fetch_and_stage", fake_fetch):
n = inbox._poll_once(state, "http://platform", "ws-1", {})
# Not enqueued + cursor advanced.
assert n == 0
assert state.peek(10) == []
assert state.load_cursor() == "act-1"
# fetch_and_stage was invoked with the row and workspace_id.
assert fetch_called == [("act-1", "ws-1")]
def test_poll_once_chat_upload_row_then_chat_message_rewrites_uri(state: inbox.InboxState, monkeypatch, tmp_path):
"""The classic ordering: upload-receive row first (lower id), chat
message referencing platform-pending: URI second. The chat message
that lands in the inbox must have its URI rewritten to the local
workspace: URI before the agent sees it.
"""
import inbox_uploads
monkeypatch.setattr(inbox_uploads, "CHAT_UPLOAD_DIR", str(tmp_path / "chat-uploads"))
cache = inbox_uploads.get_cache()
cache.clear()
# Pretend the fetch already populated the cache. (The real flow
# populates it inside fetch_and_stage; we patch that to keep the
# test focused on the rewrite contract.)
cache.set("platform-pending:ws-1/abc123", "workspace:/workspace/.molecule/chat-uploads/xx-foo.pdf")
rows = [
{
"id": "act-1",
"source_id": None,
"method": "chat_upload_receive",
"summary": "chat_upload_receive: foo.pdf",
"request_body": {
"file_id": "abc123",
"name": "foo.pdf",
"mimeType": "application/pdf",
"size": 4,
"uri": "platform-pending:ws-1/abc123",
},
"created_at": "2026-05-04T10:00:00Z",
},
{
"id": "act-2",
"source_id": None,
"method": "message/send",
"summary": None,
"request_body": {
"params": {
"message": {
"parts": [
{"kind": "text", "text": "look at this"},
{
"kind": "file",
"file": {
"uri": "platform-pending:ws-1/abc123",
"name": "foo.pdf",
},
},
]
}
}
},
"created_at": "2026-05-04T10:00:01Z",
},
]
resp = _make_response(200, rows)
p, _ = _patch_httpx(resp)
def fake_fetch(row, **kwargs):
return "workspace:/workspace/.molecule/chat-uploads/xx-foo.pdf"
with p, patch.object(inbox_uploads, "fetch_and_stage", fake_fetch):
n = inbox._poll_once(state, "http://platform", "ws-1", {})
# Only the chat message is enqueued.
assert n == 1
queue = state.peek(10)
assert len(queue) == 1
msg = queue[0]
assert msg.activity_id == "act-2"
# The URI in the row's request_body was mutated by message_from_activity
# → rewrite_request_body. Re-extracting reveals the rewritten value.
rewritten = rows[1]["request_body"]["params"]["message"]["parts"][1]["file"]["uri"]
assert rewritten == "workspace:/workspace/.molecule/chat-uploads/xx-foo.pdf"
def test_poll_once_chat_upload_row_advances_cursor_even_on_fetch_failure(
state: inbox.InboxState, monkeypatch, tmp_path
):
"""A permanent network failure on /content must NOT stall the cursor
otherwise one bad upload blocks all real chat traffic for the
workspace. fetch_and_stage returns None on failure, but the row is
still considered handled from the cursor's perspective."""
import inbox_uploads
monkeypatch.setattr(inbox_uploads, "CHAT_UPLOAD_DIR", str(tmp_path / "chat-uploads"))
rows = [
{
"id": "act-broken",
"source_id": None,
"method": "chat_upload_receive",
"summary": "chat_upload_receive: doomed.pdf",
"request_body": {
"file_id": "doom",
"name": "doomed.pdf",
"uri": "platform-pending:ws-1/doom",
},
"created_at": "2026-05-04T10:00:00Z",
},
]
resp = _make_response(200, rows)
p, _ = _patch_httpx(resp)
def fake_fetch(row, **kwargs):
return None # network failure
with p, patch.object(inbox_uploads, "fetch_and_stage", fake_fetch):
inbox._poll_once(state, "http://platform", "ws-1", {})
assert state.peek(10) == []
assert state.load_cursor() == "act-broken"

File diff suppressed because it is too large Load Diff

View File

@ -13,6 +13,7 @@ from pathlib import Path
import pytest
import mcp_cli
import mcp_heartbeat
@pytest.fixture(autouse=True)
@ -739,8 +740,13 @@ def test_heartbeat_loop_calls_persist_on_success(monkeypatch):
def fake_persist(resp):
saw.append(resp)
# Patch on mcp_heartbeat — that's where heartbeat_loop's internal
# name resolution looks up persist_inbound_secret_from_heartbeat
# after the RFC #2873 iter 3 split. The mcp_cli._persist_…_from_heartbeat
# back-compat re-export still exists, but patching it here would not
# affect the loop body.
monkeypatch.setattr(
mcp_cli, "_persist_inbound_secret_from_heartbeat", fake_persist
mcp_heartbeat, "persist_inbound_secret_from_heartbeat", fake_persist
)
class FakeResp:
@ -786,8 +792,8 @@ def test_heartbeat_loop_skips_persist_on_4xx(monkeypatch):
"""Heartbeat 4xx error path must NOT invoke persist (no body to trust)."""
saw: list[object] = []
monkeypatch.setattr(
mcp_cli,
"_persist_inbound_secret_from_heartbeat",
mcp_heartbeat,
"persist_inbound_secret_from_heartbeat",
lambda r: saw.append(r),
)
@ -899,7 +905,7 @@ def test_heartbeat_single_401_logs_warning_not_error(monkeypatch, caplog):
transient platform blip. Log at WARNING; don't shout."""
import logging
caplog.set_level(logging.WARNING, logger="mcp_cli")
caplog.set_level(logging.WARNING, logger="mcp_heartbeat")
_multi_iter_runner(monkeypatch, [401])
@ -923,7 +929,7 @@ def test_heartbeat_three_consecutive_401s_escalates_to_error(monkeypatch, caplog
LOUD ERROR with re-onboard guidance not buried at WARNING."""
import logging
caplog.set_level(logging.WARNING, logger="mcp_cli")
caplog.set_level(logging.WARNING, logger="mcp_heartbeat")
_multi_iter_runner(monkeypatch, [401, 401, 401])
@ -949,7 +955,7 @@ def test_heartbeat_403_treated_same_as_401(monkeypatch, caplog):
not authorized for this workspace). Same escalation path."""
import logging
caplog.set_level(logging.WARNING, logger="mcp_cli")
caplog.set_level(logging.WARNING, logger="mcp_heartbeat")
_multi_iter_runner(monkeypatch, [403, 403, 403])
@ -963,7 +969,7 @@ def test_heartbeat_recovery_resets_consecutive_counter(monkeypatch, caplog):
later should NOT immediately escalate."""
import logging
caplog.set_level(logging.WARNING, logger="mcp_cli")
caplog.set_level(logging.WARNING, logger="mcp_heartbeat")
# Two 401s, then 200, then one 401. If counter resets correctly,
# the final 401 is "1 consecutive" and should NOT escalate.
@ -982,7 +988,7 @@ def test_heartbeat_500_does_not_increment_auth_counter(monkeypatch, caplog):
misleading the operator."""
import logging
caplog.set_level(logging.WARNING, logger="mcp_cli")
caplog.set_level(logging.WARNING, logger="mcp_heartbeat")
_multi_iter_runner(monkeypatch, [500, 500, 500])

View File

@ -0,0 +1,231 @@
"""RFC #2873 iter 3 — drift gate + behavior tests for the post-split surface.
The bulk of the heartbeat / resolver behavior is exercised by
``test_mcp_cli.py`` and ``test_mcp_cli_multi_workspace.py`` through the
``mcp_cli._symbol`` back-compat aliases. This file pins:
1. The split is **behavior-neutral via aliasing** every previously-
exposed ``mcp_cli._foo`` symbol is the SAME callable as the new
module's authoritative function. If a refactor accidentally drops
an alias or points it at a stale copy, this fails.
2. ``mcp_inbox_pollers.start_inbox_pollers`` works for both single-
workspace (legacy back-compat) and multi-workspace shapes.
``mcp_cli`` had no direct test for this branch before the split.
"""
from __future__ import annotations
import sys
import types
import pytest
import mcp_cli
import mcp_heartbeat
import mcp_inbox_pollers
import mcp_workspace_resolver
# ============== Drift gate: back-compat aliases point at the real fn ==============
class TestBackCompatAliases:
"""Pin that ``mcp_cli._foo is real_fn``. A test that re-implements
the alias would still pass the ``is`` check guarantees we didn't
create a wrapper that drifts."""
def test_heartbeat_aliases(self):
assert mcp_cli._build_agent_card is mcp_heartbeat.build_agent_card
assert mcp_cli._platform_register is mcp_heartbeat.platform_register
assert mcp_cli._heartbeat_loop is mcp_heartbeat.heartbeat_loop
assert mcp_cli._log_heartbeat_auth_failure is mcp_heartbeat.log_heartbeat_auth_failure
assert (
mcp_cli._persist_inbound_secret_from_heartbeat
is mcp_heartbeat.persist_inbound_secret_from_heartbeat
)
assert mcp_cli._start_heartbeat_thread is mcp_heartbeat.start_heartbeat_thread
def test_resolver_aliases(self):
assert mcp_cli._resolve_workspaces is mcp_workspace_resolver.resolve_workspaces
assert mcp_cli._print_missing_env_help is mcp_workspace_resolver.print_missing_env_help
assert mcp_cli._read_token_file is mcp_workspace_resolver.read_token_file
def test_inbox_pollers_alias(self):
assert mcp_cli._start_inbox_pollers is mcp_inbox_pollers.start_inbox_pollers
def test_constants_match(self):
assert (
mcp_cli.HEARTBEAT_INTERVAL_SECONDS
== mcp_heartbeat.HEARTBEAT_INTERVAL_SECONDS
)
assert (
mcp_cli._HEARTBEAT_AUTH_LOUD_THRESHOLD
== mcp_heartbeat.HEARTBEAT_AUTH_LOUD_THRESHOLD
)
assert (
mcp_cli._HEARTBEAT_AUTH_RELOG_INTERVAL
== mcp_heartbeat.HEARTBEAT_AUTH_RELOG_INTERVAL
)
# ============== mcp_inbox_pollers — both shapes + degraded import ==============
class _FakeInboxState:
def __init__(self, **kwargs):
self.kwargs = kwargs
def _install_fake_inbox(monkeypatch):
"""Inject a fake ``inbox`` module so we observe the spawn calls
without pulling in the real platform_auth dependency tree."""
activations: list[_FakeInboxState] = []
spawned: list[tuple[_FakeInboxState, str, str]] = []
cursor_paths: list[str] = []
def default_cursor_path(wsid=None):
# Mirror the real signature: optional wsid → distinct path per id,
# absent → legacy single path.
path = f"/tmp/.mcp_inbox_cursor.{wsid[:8]}" if wsid else "/tmp/.mcp_inbox_cursor"
cursor_paths.append(path)
return path
def activate(state):
activations.append(state)
def start_poller_thread(state, platform_url, wsid):
spawned.append((state, platform_url, wsid))
fake = types.ModuleType("inbox")
fake.InboxState = _FakeInboxState
fake.activate = activate
fake.default_cursor_path = default_cursor_path
fake.start_poller_thread = start_poller_thread
monkeypatch.setitem(sys.modules, "inbox", fake)
return activations, spawned, cursor_paths
class TestStartInboxPollers:
def test_single_workspace_uses_legacy_cursor_path(self, monkeypatch):
"""Back-compat exact: single-workspace mode reuses the legacy
cursor filename so an existing operator's on-disk state isn't
invalidated by upgrade."""
activations, spawned, cursor_paths = _install_fake_inbox(monkeypatch)
mcp_inbox_pollers.start_inbox_pollers(
"https://test.moleculesai.app", ["ws-only-one"]
)
assert len(activations) == 1, "exactly one inbox.activate call"
assert len(spawned) == 1, "exactly one poller thread spawned"
# Single-workspace path uses default_cursor_path() with no arg —
# the cursor_path captured here must be the legacy filename
# (no per-ws suffix).
assert cursor_paths == ["/tmp/.mcp_inbox_cursor"]
# State carries cursor_path, not cursor_paths
state = activations[0]
assert state.kwargs == {"cursor_path": "/tmp/.mcp_inbox_cursor"}
# Spawned poller is for the right workspace
assert spawned[0] == (state, "https://test.moleculesai.app", "ws-only-one")
def test_multi_workspace_uses_per_workspace_cursor_paths(self, monkeypatch):
"""Multi-workspace path: per-workspace cursor file, one shared
InboxState. N pollers, each pointed at the same state so the
agent's inbox_peek/pop sees a merged view."""
activations, spawned, _ = _install_fake_inbox(monkeypatch)
wsids = ["ws-aaaaaaaa", "ws-bbbbbbbb", "ws-cccccccc"]
mcp_inbox_pollers.start_inbox_pollers(
"https://test.moleculesai.app", wsids
)
# One state, one activate, three pollers
assert len(activations) == 1
assert len(spawned) == 3
state = activations[0]
# Multi-workspace state carries cursor_paths (mapping)
assert "cursor_paths" in state.kwargs
assert set(state.kwargs["cursor_paths"].keys()) == set(wsids)
# All pollers share the same state
for s, _url, _wsid in spawned:
assert s is state
# All workspace ids covered
assert sorted(t[2] for t in spawned) == sorted(wsids)
def test_inbox_module_unavailable_logs_and_returns(self, monkeypatch, caplog):
"""If ``import inbox`` fails (older install or stripped
runtime), spawn must NOT raise log a warning and continue.
The MCP server can still serve outbound tools."""
import logging
# Force ImportError by injecting a module sentinel that raises.
class _Boom:
def __getattr__(self, _name):
raise ImportError("inbox stripped from this build")
# Setting sys.modules["inbox"] to a broken object isn't enough —
# the import statement reads sys.modules first; if the entry is
# truthy, Python returns it. We need to force the import to raise.
# Easiest: pre-poison sys.modules so the `import inbox` line
# raises by setting the entry to None (Python special-cases None
# as "explicit ImportError").
monkeypatch.setitem(sys.modules, "inbox", None)
caplog.set_level(logging.WARNING, logger="mcp_inbox_pollers")
# Should not raise.
mcp_inbox_pollers.start_inbox_pollers(
"https://test.moleculesai.app", ["ws-1"]
)
warnings = [r for r in caplog.records if r.levelno == logging.WARNING]
assert any("inbox module unavailable" in r.message for r in warnings), (
f"expected a 'inbox module unavailable' warning, got: "
f"{[r.message for r in warnings]}"
)
# ============== mcp_heartbeat.build_agent_card — short direct tests ==============
class TestBuildAgentCardDirect:
"""Spot-check the new module's public surface; the full test matrix
lives in ``test_mcp_cli.py`` reaching through ``mcp_cli._build_agent_card``.
"""
def test_default_card_shape(self, monkeypatch):
for v in ("MOLECULE_AGENT_NAME", "MOLECULE_AGENT_DESCRIPTION", "MOLECULE_AGENT_SKILLS"):
monkeypatch.delenv(v, raising=False)
card = mcp_heartbeat.build_agent_card("8dad3e29-c32a-4ec7-9ea7-94fe2d2d98ec")
assert card == {"name": "molecule-mcp-8dad3e29", "skills": []}
def test_skills_csv_split_and_trim(self, monkeypatch):
monkeypatch.setenv("MOLECULE_AGENT_SKILLS", "research, , code-review,memory-curation, ")
card = mcp_heartbeat.build_agent_card("ws-1")
assert card["skills"] == [
{"name": "research"},
{"name": "code-review"},
{"name": "memory-curation"},
]
# ============== mcp_workspace_resolver — short direct tests ==============
class TestResolveWorkspacesDirect:
@pytest.fixture(autouse=True)
def _isolate(self, monkeypatch, tmp_path):
for v in ("WORKSPACE_ID", "MOLECULE_WORKSPACE_TOKEN", "MOLECULE_WORKSPACES"):
monkeypatch.delenv(v, raising=False)
monkeypatch.setenv("CONFIGS_DIR", str(tmp_path))
yield
def test_single_workspace_via_env(self, monkeypatch):
monkeypatch.setenv("WORKSPACE_ID", "ws-1")
monkeypatch.setenv("MOLECULE_WORKSPACE_TOKEN", "tok")
out, errors = mcp_workspace_resolver.resolve_workspaces()
assert out == [("ws-1", "tok")]
assert errors == []
def test_multi_workspace_via_json_env(self, monkeypatch):
monkeypatch.setenv(
"MOLECULE_WORKSPACES",
'[{"id":"ws-a","token":"a"},{"id":"ws-b","token":"b"}]',
)
out, errors = mcp_workspace_resolver.resolve_workspaces()
assert out == [("ws-a", "a"), ("ws-b", "b")]
assert errors == []

View File

@ -63,7 +63,7 @@ async def test_commit_memory_success(monkeypatch):
mcp = _load_mcp()
client = FakeClient()
monkeypatch.setattr("a2a_tools.httpx.AsyncClient", lambda **kw: client)
monkeypatch.setattr("a2a_tools_memory.httpx.AsyncClient", lambda **kw: client)
result = await mcp.handle_tool_call("commit_memory", {
"content": "Architecture decision: use Go for backend",
@ -92,7 +92,7 @@ async def test_commit_memory_default_scope(monkeypatch):
mcp = _load_mcp()
client = FakeClient()
monkeypatch.setattr("a2a_tools.httpx.AsyncClient", lambda **kw: client)
monkeypatch.setattr("a2a_tools_memory.httpx.AsyncClient", lambda **kw: client)
result = await mcp.handle_tool_call("commit_memory", {
"content": "Some note",
@ -108,7 +108,7 @@ async def test_recall_memory_success(monkeypatch):
mcp = _load_mcp()
client = FakeClient()
monkeypatch.setattr("a2a_tools.httpx.AsyncClient", lambda **kw: client)
monkeypatch.setattr("a2a_tools_memory.httpx.AsyncClient", lambda **kw: client)
result = await mcp.handle_tool_call("recall_memory", {"query": "architecture"})
@ -127,7 +127,7 @@ async def test_recall_memory_empty(monkeypatch):
async def get(self, url, params=None, headers=None, **kwargs):
return FakeResponse(200, [])
monkeypatch.setattr("a2a_tools.httpx.AsyncClient", lambda **kw: EmptyClient())
monkeypatch.setattr("a2a_tools_memory.httpx.AsyncClient", lambda **kw: EmptyClient())
result = await mcp.handle_tool_call("recall_memory", {})
assert "No memories found" in result
@ -139,7 +139,7 @@ async def test_recall_memory_with_scope_filter(monkeypatch):
mcp = _load_mcp()
client = FakeClient()
monkeypatch.setattr("a2a_tools.httpx.AsyncClient", lambda **kw: client)
monkeypatch.setattr("a2a_tools_memory.httpx.AsyncClient", lambda **kw: client)
await mcp.handle_tool_call("recall_memory", {"scope": "TEAM"})

View File

@ -357,7 +357,7 @@ class TestA2AToolCommitMemoryRedactsSecrets:
fake_client.post = _capture
with patch("a2a_tools.httpx.AsyncClient", return_value=fake_client):
with patch("a2a_tools_memory.httpx.AsyncClient", return_value=fake_client):
await a2a_tools.tool_commit_memory(content_with_secret)
stored = captured.get("content", "")
@ -385,7 +385,7 @@ class TestA2AToolCommitMemoryRedactsSecrets:
fake_client.post = _capture
with patch("a2a_tools.httpx.AsyncClient", return_value=fake_client):
with patch("a2a_tools_memory.httpx.AsyncClient", return_value=fake_client):
await a2a_tools.tool_commit_memory(f"key={key}")
stored = captured.get("content", "")