34179e64a3
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 8s
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 10s
CI / Python Lint & Test (pull_request) Successful in 7s
CI / Detect changes (pull_request) Successful in 9s
E2E API Smoke Test / detect-changes (pull_request) Successful in 9s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (pull_request) Successful in 7s
E2E Chat / detect-changes (pull_request) Successful in 11s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 13s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (local) (pull_request) Successful in 55s
E2E Staging SaaS (full lifecycle) / pr-validate (pull_request) Successful in 47s
Harness Replays / detect-changes (pull_request) Successful in 3s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 7s
Lint curl status-code capture / Scan workflows for curl status-capture pollution (pull_request) Successful in 5s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 12s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 16s
lint-continue-on-error-tracking / lint-continue-on-error-tracking (pull_request) Successful in 1m16s
Lint pre-flip continue-on-error / Verify continue-on-error flips have run-log proof (pull_request) Successful in 1m15s
lint-required-workflows-docker-host-pinned / Lint docker-host pin on docker-touching workflows (pull_request) Successful in 5s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m3s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 6s
lint-required-context-exists-in-bp / lint-required-context-exists-in-bp (pull_request) Successful in 1m28s
gate-check-v3 / gate-check (pull_request) Successful in 4s
qa-review / approved (pull_request) Successful in 4s
security-review / approved (pull_request) Successful in 3s
sop-checklist / na-declarations (pull_request) N/A: (none)
sop-checklist / all-items-acked (pull_request) Successful in 3s
sop-checklist / review-refire (pull_request) Has been skipped
Lint workflow YAML (Gitea-1.22.6-hostile shapes) / Lint workflow YAML for Gitea-1.22.6-hostile shapes (pull_request) Successful in 1m19s
sop-tier-check / tier-check (pull_request) Successful in 4s
E2E Staging External Runtime / E2E Staging External Runtime (pull_request) Successful in 5m10s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (pull_request) Successful in 5m2s
Ops Scripts Tests / Ops scripts (unittest) (pull_request) Successful in 1m34s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 24s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 1m46s
E2E Chat / E2E Chat (pull_request) Successful in 4s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 2s
Harness Replays / Harness Replays (pull_request) Successful in 4s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 1m58s
CI / Platform (Go) (pull_request) Successful in 5m11s
CI / Canvas (Next.js) (pull_request) Successful in 7m3s
CI / all-required (pull_request) Successful in 18m55s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
audit-force-merge / audit (pull_request) Successful in 6s
194 lines
8.8 KiB
TypeScript
194 lines
8.8 KiB
TypeScript
import { getTenantSlug } from "./tenant";
|
|
|
|
// When NEXT_PUBLIC_PLATFORM_URL is set to "" (empty string), the canvas
|
|
// uses relative paths — correct for the combined tenant image where Go
|
|
// platform + canvas run on the same port via reverse proxy. The `??`
|
|
// operator preserves "" as a valid value; `||` would fall through to
|
|
// the localhost default.
|
|
export const PLATFORM_URL =
|
|
process.env.NEXT_PUBLIC_PLATFORM_URL ?? "http://localhost:8080";
|
|
|
|
// 35s is long enough for the slowest server-side path (EIC SSH
|
|
// tunnel for tenant EC2 file operations, bounded server-side by
|
|
// `eicFileOpTimeout = 30 * time.Second` in
|
|
// workspace-server/internal/handlers/template_files_eic.go) so the
|
|
// canvas surfaces the server's real error instead of aborting first
|
|
// with a generic timeout. Shorter values caused "Save & Restart" to
|
|
// time out at the client before the backend returned its 5xx. The
|
|
// abort still propagates through AbortController so React components
|
|
// can render a retry affordance. Callers that know an endpoint is
|
|
// intentionally slow (org import walks a tree of workspaces with
|
|
// server-side pacing) can pass `timeoutMs` to override.
|
|
const DEFAULT_TIMEOUT_MS = 35_000;
|
|
|
|
export interface RequestOptions {
|
|
timeoutMs?: number;
|
|
headers?: Record<string, string>;
|
|
}
|
|
|
|
/**
|
|
* Build the platform auth header set used by every authenticated fetch
|
|
* from the canvas. Returns a fresh object so callers can mutate (e.g.
|
|
* append `Content-Type` for JSON requests, omit it for FormData).
|
|
*
|
|
* SaaS cross-origin shape:
|
|
* - `X-Molecule-Org-Slug` — derived from `window.location.hostname`
|
|
* by `getTenantSlug()`. Control plane uses it for fly-replay
|
|
* routing. Empty on localhost / non-tenant hosts — safe to omit.
|
|
* - `Authorization: Bearer <token>` — `NEXT_PUBLIC_ADMIN_TOKEN` baked
|
|
* into the canvas build (see canvas/Dockerfile L8/L11). Required by
|
|
* the workspace-server when `ADMIN_TOKEN` is set on the server side
|
|
* (Tier-2b AdminAuth gate, wsauth_middleware.go ~L245). Empty when
|
|
* no admin token was provisioned — the Tier-1 session-cookie path
|
|
* handles that case via `credentials:"include"`.
|
|
*
|
|
* Why a shared helper: the two-line "read env, attach bearer; read
|
|
* slug, attach header" pattern was duplicated across `request()` and
|
|
* 7 raw-fetch callsites (chat uploads/download + 5 Attachment*
|
|
* components) before this consolidation. A new poller or raw fetch
|
|
* that forgets one of the two headers silently 401s against
|
|
* workspace-server when ADMIN_TOKEN is set — the exact bug shape
|
|
* called out in #178 / closes the post-#176 self-review gap.
|
|
*
|
|
* Callers that want JSON Content-Type should spread this and add it
|
|
* themselves; FormData callers should NOT add Content-Type (the
|
|
* browser sets the multipart boundary). Centralizing the auth pair
|
|
* but leaving Content-Type up to the caller is the minimum viable
|
|
* shared shape.
|
|
*/
|
|
export function platformAuthHeaders(): Record<string, string> {
|
|
const headers: Record<string, string> = {};
|
|
const slug = getTenantSlug();
|
|
if (slug) headers["X-Molecule-Org-Slug"] = slug;
|
|
const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
|
|
if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
|
|
return headers;
|
|
}
|
|
|
|
async function request<T>(
|
|
method: string,
|
|
path: string,
|
|
body?: unknown,
|
|
retryCount = 0,
|
|
options?: RequestOptions,
|
|
): Promise<T> {
|
|
// JSON-bodied request — Content-Type is JSON. Auth pair comes from
|
|
// the shared helper; see its doc comment for the SaaS-shape rationale.
|
|
const headers: Record<string, string> = {
|
|
"Content-Type": "application/json",
|
|
...platformAuthHeaders(),
|
|
...(options?.headers ?? {}),
|
|
};
|
|
// Re-read slug locally for the 401 handler below — `headers` already
|
|
// has it, but the 401 branch needs the bare value to gate the
|
|
// session-probe + redirect logic on tenant context.
|
|
const slug = getTenantSlug();
|
|
|
|
const res = await fetch(`${PLATFORM_URL}${path}`, {
|
|
method,
|
|
headers,
|
|
body: body ? JSON.stringify(body) : undefined,
|
|
credentials: "include",
|
|
signal: AbortSignal.timeout(options?.timeoutMs ?? DEFAULT_TIMEOUT_MS),
|
|
});
|
|
// Transient rate-limit recovery. A single IP bucket can momentarily
|
|
// spike on page load (several panels hydrate simultaneously). Instead
|
|
// of bubbling up a 429 that blanks the Canvas, wait the
|
|
// Retry-After window and try once — any further 429 surfaces normally.
|
|
// GET / idempotent methods only; never auto-retry mutations.
|
|
if (res.status === 429 && retryCount === 0 && method === "GET") {
|
|
const retryAfterHeader = res.headers.get("Retry-After");
|
|
const retryAfter = retryAfterHeader ? parseInt(retryAfterHeader, 10) : NaN;
|
|
const delayMs = Number.isFinite(retryAfter) ? Math.min(retryAfter, 20) * 1000 : 2000;
|
|
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
|
return request<T>(method, path, body, retryCount + 1, options);
|
|
}
|
|
if (res.status === 401) {
|
|
// Distinguish "session is dead" from "this endpoint refused this
|
|
// token." Old behaviour blanket-redirected on every 401, so a
|
|
// single transient 401 from a workspace-scoped endpoint
|
|
// (/workspaces/:id/peers, /plugins, etc. that need a workspace
|
|
// token rather than the tenant admin bearer) yanked the user
|
|
// back to AuthKit even when their session was perfectly fine.
|
|
// That broke the staging-tabs E2E for the entire 2026-04-25
|
|
// night; #2073/#2074 worked around the symptom in the test by
|
|
// mocking 401→200 for every fetch, but the user-facing bug
|
|
// stayed.
|
|
//
|
|
// The canonical "session is dead" signal is /cp/auth/me
|
|
// returning 401. For any 401 on a non-auth path, probe
|
|
// /cp/auth/me before deciding to redirect:
|
|
// - probe 401 → session is actually dead → redirect
|
|
// - probe 200 → session is fine, the endpoint just refused
|
|
// our specific token → throw a real error,
|
|
// caller renders an error state
|
|
// - probe network error → assume session-fine (conservative;
|
|
// better to throw than to redirect on a
|
|
// transient probe failure)
|
|
//
|
|
// Self-hosted / localhost / reserved subdomains still throw
|
|
// without redirecting (slug is empty in those cases) — same
|
|
// policy as before.
|
|
const isAuthPath = path.startsWith("/cp/auth/");
|
|
let sessionDead = isAuthPath;
|
|
if (!isAuthPath && slug) {
|
|
try {
|
|
const probe = await fetch(`${PLATFORM_URL}/cp/auth/me`, {
|
|
credentials: "include",
|
|
signal: AbortSignal.timeout(5000),
|
|
});
|
|
sessionDead = probe.status === 401;
|
|
} catch {
|
|
// Probe failed (network/timeout) — fall through to throw.
|
|
}
|
|
}
|
|
if (sessionDead && slug) {
|
|
const { redirectToLogin } = await import("./auth");
|
|
redirectToLogin("sign-in");
|
|
throw new Error("Session expired — redirecting to login");
|
|
}
|
|
throw new Error(`API ${method} ${path}: 401 ${await res.text()}`);
|
|
}
|
|
if (!res.ok) {
|
|
const text = await res.text();
|
|
// Recognise the platform's structured "datastore unreachable"
|
|
// shape (returned by wsauth_middleware.abortAuthLookupError when
|
|
// Postgres/Redis is down). Surface as a typed error so callers
|
|
// can render a dedicated diagnostic instead of a generic toast.
|
|
if (res.status === 503 && text) {
|
|
try {
|
|
const parsed = JSON.parse(text) as { code?: string; error?: string };
|
|
if (parsed.code === "platform_unavailable") {
|
|
throw new PlatformUnavailableError(parsed.error || "platform datastore unavailable");
|
|
}
|
|
} catch (err) {
|
|
// Re-throw the typed error if that's what we just constructed.
|
|
// JSON.parse failures fall through to the generic Error below.
|
|
if (err instanceof PlatformUnavailableError) throw err;
|
|
}
|
|
}
|
|
throw new Error(`API ${method} ${path}: ${res.status} ${text}`);
|
|
}
|
|
return res.json();
|
|
}
|
|
|
|
/** Thrown when the platform reports its datastore (Postgres/Redis) is
|
|
* unreachable. Surface with a dedicated diagnostic UI rather than a
|
|
* generic API-error toast — the user's next action is to check local
|
|
* services, not to retry the API call. */
|
|
export class PlatformUnavailableError extends Error {
|
|
readonly code = "platform_unavailable" as const;
|
|
constructor(message: string) {
|
|
super(message);
|
|
this.name = "PlatformUnavailableError";
|
|
}
|
|
}
|
|
|
|
export const api = {
|
|
get: <T>(path: string, options?: RequestOptions) => request<T>("GET", path, undefined, 0, options),
|
|
post: <T>(path: string, body?: unknown, options?: RequestOptions) => request<T>("POST", path, body, 0, options),
|
|
patch: <T>(path: string, body?: unknown, options?: RequestOptions) => request<T>("PATCH", path, body, 0, options),
|
|
put: <T>(path: string, body?: unknown, options?: RequestOptions) => request<T>("PUT", path, body, 0, options),
|
|
del: <T>(path: string, options?: RequestOptions) => request<T>("DELETE", path, undefined, 0, options),
|
|
};
|