Merge pull request #993 from Molecule-AI/staging

promote: staging → main — canary infra + /orgs + env refresh + perf
This commit is contained in:
Hongming Wang 2026-04-19 04:26:13 -07:00 committed by GitHub
commit 43880f580b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 1145 additions and 14 deletions

113
.github/workflows/canary-verify.yml vendored Normal file
View File

@ -0,0 +1,113 @@
name: canary-verify
# Runs the canary smoke suite against the staging canary tenant fleet
# after a new :staging-<sha> image lands in GHCR. On green, promotes
# :staging-<sha> → :latest so the prod tenant fleet's 5-minute
# auto-updater picks up the verified digest. On red, :latest stays
# on the prior known-good digest and prod is untouched.
#
# Dependencies:
# - publish-workspace-server-image.yml publishes :staging-<sha>
# (NOT :latest) on main merge
# - canary tenants are configured to pull :staging-<sha> as their
# tenant image (set TENANT_IMAGE=ghcr.io/…:staging-<sha> on the
# canary provisioner code path OR rotate via an admin endpoint)
# - Repo secrets CANARY_TENANT_URLS / CANARY_ADMIN_TOKENS /
# CANARY_CP_SHARED_SECRET are populated
on:
workflow_run:
workflows: ["publish-workspace-server-image"]
types: [completed]
workflow_dispatch:
permissions:
contents: read
packages: write
actions: read
env:
IMAGE_NAME: ghcr.io/molecule-ai/platform
TENANT_IMAGE_NAME: ghcr.io/molecule-ai/platform-tenant
jobs:
canary-smoke:
# Skip when the upstream workflow failed — no image to test against.
if: ${{ github.event.workflow_run.conclusion == 'success' || github.event_name == 'workflow_dispatch' }}
runs-on: ubuntu-latest
outputs:
sha: ${{ steps.compute.outputs.sha }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Compute sha
id: compute
run: echo "sha=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT"
- name: Wait for canary tenants to pick up :staging-<sha>
# Tenant auto-updater runs every 5 min. Sleep 6 min to give every
# canary time to pull + restart. Cheaper than polling.
run: sleep 360
- name: Run canary smoke suite
env:
CANARY_TENANT_URLS: ${{ secrets.CANARY_TENANT_URLS }}
CANARY_ADMIN_TOKENS: ${{ secrets.CANARY_ADMIN_TOKENS }}
CANARY_CP_BASE_URL: https://staging-api.moleculesai.app
CANARY_CP_SHARED_SECRET: ${{ secrets.CANARY_CP_SHARED_SECRET }}
run: bash scripts/canary-smoke.sh
- name: Summary on failure
if: ${{ failure() }}
run: |
{
echo "## Canary smoke FAILED"
echo
echo "Canary tenants rejected image \`staging-${{ steps.compute.outputs.sha }}\`."
echo ":latest stays pinned to the prior good digest — prod is untouched."
echo
echo "Fix forward and merge again, or investigate the specific failed"
echo "assertions in the canary-smoke step log above."
} >> "$GITHUB_STEP_SUMMARY"
promote-to-latest:
# On green, retag :staging-<sha> → :latest for BOTH images.
# crane is a lightweight registry client (no Docker daemon needed on
# the runner) that can retag remotely with a single API call each.
needs: canary-smoke
if: ${{ needs.canary-smoke.result == 'success' }}
runs-on: ubuntu-latest
steps:
- name: Install crane
run: |
curl -fsSL https://github.com/google/go-containerregistry/releases/download/v0.20.2/go-containerregistry_Linux_x86_64.tar.gz | \
tar xz -C /usr/local/bin crane
- name: GHCR login
run: |
echo "${{ secrets.GITHUB_TOKEN }}" | \
crane auth login ghcr.io -u "${{ github.actor }}" --password-stdin
- name: Retag platform :staging-<sha> → :latest
run: |
crane tag \
"${IMAGE_NAME}:staging-${{ needs.canary-smoke.outputs.sha }}" \
latest
- name: Retag tenant :staging-<sha> → :latest
run: |
crane tag \
"${TENANT_IMAGE_NAME}:staging-${{ needs.canary-smoke.outputs.sha }}" \
latest
- name: Summary
run: |
{
echo "## Canary verified — :latest promoted"
echo
echo "- \`${IMAGE_NAME}:staging-${{ needs.canary-smoke.outputs.sha }}\` → \`${IMAGE_NAME}:latest\`"
echo "- \`${TENANT_IMAGE_NAME}:staging-${{ needs.canary-smoke.outputs.sha }}\` → \`${TENANT_IMAGE_NAME}:latest\`"
echo
echo "Prod tenant fleet will pick up the new digest on its next 5-min auto-update cycle."
} >> "$GITHUB_STEP_SUMMARY"

View File

@ -55,7 +55,17 @@ jobs:
run: |
echo "sha=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT"
- name: Build & push platform image to GHCR
# Canary-gated release: we publish :staging-<sha> ONLY here. The
# :latest tag (which existing prod tenants auto-pull every 5 min)
# is promoted by .github/workflows/canary-verify.yml after the
# staging canary fleet green-lights this digest.
# That means:
# - Every main merge produces a :staging-<sha> image
# - Canary tenants (configured to pull :staging-<sha>) pick it up
# - canary-verify.yml runs smoke tests against them
# - On green → canary-verify retags :staging-<sha> → :latest
# - On red → :latest stays on the prior good digest, prod is safe
- name: Build & push platform image to GHCR (staging-<sha> only)
uses: docker/build-push-action@v6
with:
context: .
@ -63,16 +73,15 @@ jobs:
platforms: linux/amd64
push: true
tags: |
${{ env.IMAGE_NAME }}:latest
${{ env.IMAGE_NAME }}:sha-${{ steps.tags.outputs.sha }}
${{ env.IMAGE_NAME }}:staging-${{ steps.tags.outputs.sha }}
cache-from: type=gha
cache-to: type=gha,mode=max
labels: |
org.opencontainers.image.source=https://github.com/${{ github.repository }}
org.opencontainers.image.revision=${{ github.sha }}
org.opencontainers.image.description=Molecule AI platform (Go API server)
org.opencontainers.image.description=Molecule AI platform (Go API server) — pending canary verify
- name: Build & push tenant image to GHCR
- name: Build & push tenant image to GHCR (staging-<sha> only)
uses: docker/build-push-action@v6
with:
context: .
@ -80,11 +89,10 @@ jobs:
platforms: linux/amd64
push: true
tags: |
${{ env.TENANT_IMAGE_NAME }}:latest
${{ env.TENANT_IMAGE_NAME }}:sha-${{ steps.tags.outputs.sha }}
${{ env.TENANT_IMAGE_NAME }}:staging-${{ steps.tags.outputs.sha }}
cache-from: type=gha
cache-to: type=gha,mode=max
labels: |
org.opencontainers.image.source=https://github.com/${{ github.repository }}
org.opencontainers.image.revision=${{ github.sha }}
org.opencontainers.image.description=Molecule AI tenant platform + canvas (one EC2 instance per org)
org.opencontainers.image.description=Molecule AI tenant platform + canvas — pending canary verify

View File

@ -0,0 +1,278 @@
"use client";
// /orgs — the post-signup landing page.
//
// The control plane's Callback handler (authorized via WorkOS) redirects
// every new session to APP_URL/orgs after login/signup succeeds. Before
// this route existed that redirect 404'd and new users were stranded.
// Now:
// - Signed-out browsers are bounced back to /cp/auth/login
// - Zero-org users see a slug-picker → POST /cp/orgs → refresh
// - `awaiting_payment` orgs get a "Complete payment" CTA → /pricing
// - `running` orgs show a link to the tenant URL
// - `provisioning` / `failed` surface the state so the user knows
// why their tenant isn't available yet
//
// Everything here is intentionally server-light: one GET /cp/orgs,
// zero WebSocket, no canvas store hydration — the whole point is a
// quick bounce between signup and either Checkout or the tenant UI.
import { useEffect, useState } from "react";
import { fetchSession, redirectToLogin, type Session } from "@/lib/auth";
import { PLATFORM_URL } from "@/lib/api";
type OrgStatus = "awaiting_payment" | "provisioning" | "running" | "failed" | string;
interface Org {
id: string;
slug: string;
name: string;
plan: string;
status: OrgStatus;
created_at: string;
updated_at: string;
}
export default function OrgsPage() {
const [session, setSession] = useState<Session | null | "loading">("loading");
const [orgs, setOrgs] = useState<Org[] | null>(null);
const [error, setError] = useState<string | null>(null);
useEffect(() => {
let cancelled = false;
(async () => {
try {
const sess = await fetchSession();
if (cancelled) return;
if (!sess) {
redirectToLogin();
return;
}
setSession(sess);
const res = await fetch(`${PLATFORM_URL}/cp/orgs`, {
credentials: "include",
signal: AbortSignal.timeout(15_000),
});
if (!res.ok) {
throw new Error(`GET /cp/orgs: ${res.status}`);
}
const body = (await res.json()) as { orgs?: Org[] } | Org[];
const list = Array.isArray(body) ? body : body.orgs ?? [];
if (!cancelled) setOrgs(list);
} catch (err) {
if (!cancelled) {
setError(err instanceof Error ? err.message : String(err));
}
}
})();
return () => {
cancelled = true;
};
}, []);
if (session === "loading" || (orgs === null && error === null)) {
return <Shell><p className="text-zinc-400">Loading</p></Shell>;
}
if (error) {
return (
<Shell>
<p className="text-red-400">Error: {error}</p>
<button
onClick={() => window.location.reload()}
className="mt-4 rounded bg-zinc-800 px-4 py-2 text-sm text-zinc-200 hover:bg-zinc-700"
>
Retry
</button>
</Shell>
);
}
if (!orgs || orgs.length === 0) {
return <EmptyState />;
}
return (
<Shell>
<ul className="space-y-3">
{orgs.map((o) => (
<OrgRow key={o.id} org={o} />
))}
</ul>
<div className="mt-8 border-t border-zinc-800 pt-6">
<CreateOrgForm
onCreated={(slug) => {
// Refresh the list so the new org appears + its CTA fires.
window.location.reload();
void slug;
}}
/>
</div>
</Shell>
);
}
function Shell({ children }: { children: React.ReactNode }) {
return (
<main className="min-h-screen bg-zinc-950 text-zinc-100">
<div className="mx-auto max-w-2xl px-6 pt-20 pb-12">
<h1 className="text-3xl font-bold text-white">Your organizations</h1>
<p className="mt-2 text-zinc-400">
Each org is an isolated Molecule workspace.
</p>
<div className="mt-8">{children}</div>
</div>
</main>
);
}
function OrgRow({ org }: { org: Org }) {
return (
<li className="rounded-lg border border-zinc-800 bg-zinc-900 p-4">
<div className="flex items-center justify-between">
<div>
<div className="font-medium text-white">{org.name}</div>
<div className="text-sm text-zinc-400">
{org.slug} · <StatusLabel status={org.status} /> · {org.plan || "free"}
</div>
</div>
<OrgCTA org={org} />
</div>
</li>
);
}
function StatusLabel({ status }: { status: OrgStatus }) {
const cls =
status === "running"
? "text-emerald-400"
: status === "awaiting_payment"
? "text-amber-400"
: status === "failed"
? "text-red-400"
: "text-sky-400";
const label =
status === "awaiting_payment"
? "awaiting payment"
: status;
return <span className={cls}>{label}</span>;
}
function OrgCTA({ org }: { org: Org }) {
if (org.status === "running") {
const host = typeof window !== "undefined" ? window.location.hostname : "moleculesai.app";
const appDomain = host.endsWith(".moleculesai.app")
? host.split(".").slice(-2).join(".")
: "moleculesai.app";
const href = `https://${org.slug}.${appDomain}`;
return (
<a
href={href}
className="rounded bg-emerald-600 px-4 py-2 text-sm font-medium text-white hover:bg-emerald-500"
>
Open
</a>
);
}
if (org.status === "awaiting_payment") {
return (
<a
href={`/pricing?org=${encodeURIComponent(org.slug)}`}
className="rounded bg-amber-600 px-4 py-2 text-sm font-medium text-white hover:bg-amber-500"
>
Complete payment
</a>
);
}
if (org.status === "failed") {
return (
<a
href="mailto:support@moleculesai.app"
className="rounded bg-zinc-700 px-4 py-2 text-sm font-medium text-zinc-200 hover:bg-zinc-600"
>
Contact support
</a>
);
}
// provisioning / unknown — non-interactive
return <span className="text-sm text-zinc-500">{org.status}</span>;
}
function EmptyState() {
return (
<Shell>
<p className="text-zinc-300">
You don&apos;t have any organizations yet. Create one to get started your
workspace spins up automatically once billing is set up.
</p>
<div className="mt-6">
<CreateOrgForm
onCreated={() => {
window.location.reload();
}}
/>
</div>
</Shell>
);
}
function CreateOrgForm({ onCreated }: { onCreated: (slug: string) => void }) {
const [slug, setSlug] = useState("");
const [name, setName] = useState("");
const [submitting, setSubmitting] = useState(false);
const [err, setErr] = useState<string | null>(null);
async function submit(e: React.FormEvent) {
e.preventDefault();
setSubmitting(true);
setErr(null);
try {
const res = await fetch(`${PLATFORM_URL}/cp/orgs`, {
method: "POST",
credentials: "include",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ slug, name }),
signal: AbortSignal.timeout(15_000),
});
if (!res.ok) {
const body = await res.text();
throw new Error(`${res.status}: ${body}`);
}
onCreated(slug);
} catch (e) {
setErr(e instanceof Error ? e.message : String(e));
setSubmitting(false);
}
}
return (
<form onSubmit={submit} className="space-y-3">
<label className="block">
<span className="text-sm text-zinc-300">Slug (URL)</span>
<input
value={slug}
onChange={(e) => setSlug(e.target.value.toLowerCase())}
pattern="^[a-z][a-z0-9-]{2,31}$"
placeholder="acme"
required
className="mt-1 w-full rounded border border-zinc-700 bg-zinc-800 px-3 py-2 text-sm text-zinc-100"
/>
</label>
<label className="block">
<span className="text-sm text-zinc-300">Display name</span>
<input
value={name}
onChange={(e) => setName(e.target.value)}
placeholder="Acme Corp"
required
className="mt-1 w-full rounded border border-zinc-700 bg-zinc-800 px-3 py-2 text-sm text-zinc-100"
/>
</label>
{err && <p className="text-sm text-red-400">{err}</p>}
<button
type="submit"
disabled={submitting}
className="rounded bg-blue-600 px-4 py-2 text-sm font-medium text-white hover:bg-blue-500 disabled:opacity-50"
>
{submitting ? "Creating…" : "Create organization"}
</button>
</form>
);
}

View File

@ -8,6 +8,12 @@ import { getTenantSlug } from "./tenant";
export const PLATFORM_URL =
process.env.NEXT_PUBLIC_PLATFORM_URL ?? "http://localhost:8080";
// 15s is long enough for slow CP queries but short enough that a
// hung backend doesn't leave the UI spinning forever. The abort
// propagates through AbortController so React components can observe
// the error and render a retry affordance.
const DEFAULT_TIMEOUT_MS = 15_000;
async function request<T>(
method: string,
path: string,
@ -28,6 +34,7 @@ async function request<T>(
headers,
body: body ? JSON.stringify(body) : undefined,
credentials: "include",
signal: AbortSignal.timeout(DEFAULT_TIMEOUT_MS),
});
if (!res.ok) {
const text = await res.text();

View File

@ -0,0 +1,79 @@
# Canary release pipeline
How a workspace-server code change reaches the prod tenant fleet — and how to stop it if something's wrong.
## The loop
```
PR merged to staging → main
publish-workspace-server-image.yml ← pushes :staging-<sha> ONLY
│ (NOT :latest — prod is untouched)
Canary tenants auto-update to :staging-<sha>
│ (5-min auto-updater cycle on each canary EC2)
canary-verify.yml waits 6 min, runs scripts/canary-smoke.sh
├─► GREEN → crane tag :staging-<sha> → :latest
│ │
│ ▼
│ Prod tenants auto-update within 5 min
└─► RED → :latest stays on prior good digest
GitHub Step Summary flags the rejected sha
Ops fixes forward OR rolls back manually
```
## Canary fleet
Lives in a separate AWS account (`molecule-canary`, `004947743811`) via an assumed role (`MoleculeStagingProvisioner`). The CP's `is_canary` org flag routes provisioning there; every other org goes to the default staging account. See `docs/architecture/saas-prod-migration-2026-04-19.md` for the account bootstrap.
Canary tenants are configured to pull `:staging-<sha>` (not `:latest`) via `TENANT_IMAGE` on their provisioner, so they ingest each new build before prod does.
## Smoke suite
`scripts/canary-smoke.sh` hits each canary tenant (URL + ADMIN_TOKEN pair) and asserts:
- `/admin/liveness` returns a subsystems map (tenant booted, AdminAuth reachable)
- `/workspaces` returns a JSON array (wsAuth + DB healthy)
- `/memories/commit` + `/memories/search` round-trip (encryption + scrubber)
- `/events` admin read (C4 fail-closed proof)
- `/admin/liveness` without bearer → 401 (C4 regression gate)
Expand by editing the script — each `check "name" "expected" "$response"` call is one line.
## Adding a canary tenant
1. `POST /cp/orgs` — create the org normally (is_canary defaults to false)
2. `POST /cp/admin/orgs/<slug>/canary` with `{"is_canary": true}` — admin only, refuses to flip if already provisioned
3. Re-trigger provision (or delete + recreate if the org was already provisioned into staging) — the fresh EC2 lands in account `004947743811`
Then set repo secrets:
- `CANARY_TENANT_URLS` — append the new tenant's URL
- `CANARY_ADMIN_TOKENS` — append its ADMIN_TOKEN in the same position
## Rolling back `:latest`
When canary was green but something surfaces post-promotion, retag `:latest` to a prior digest:
```bash
export GITHUB_TOKEN=ghp_... # write:packages
scripts/rollback-latest.sh 4c1d56e # retags both platform + tenant images
```
`scripts/rollback-latest.sh` pre-checks that `:staging-<sha>` exists before moving `:latest`, and verifies the digest after the move. Prod tenants pick up the rolled-back image on their next 5-min auto-update.
A post-mortem should always include:
- the commit sha that broke
- why canary didn't catch it (new code path the smoke suite doesn't exercise?)
- whether the smoke suite should grow a new check to prevent the same class of bug
## What this gate doesn't catch
- Bugs that only surface under prod-only data (customer workloads with scale or shape canary doesn't produce). Canary uses real traffic shapes but can't simulate weeks of accumulated state.
- Config drift between canary and prod (different env-var values, different feature flags). Keep canary's config deltas minimal and documented.
- Cross-tenant interactions — canary tenants run in their own AWS account, so a bug that only appears when two tenants compete for a shared resource won't reproduce here.
When these miss, `rollback-latest.sh` is the escape hatch.

View File

@ -0,0 +1,72 @@
# SaaS prod migration — 2026-04-19
Promoted staging → main on both `Molecule-AI/molecule-controlplane` and `Molecule-AI/molecule-core`. This note captures the prod cutover deltas so ops can cross-check against the running system.
## What changed
Ten PRs landed, split across the two repos:
**Control plane (`molecule-controlplane`)**
- PR #50 — C1/C2/C3: bearer auth on `/cp/workspaces/*`, shell-escape tenant user-data, per-tenant security group
- PR #51 — H1/H2: crash-safe `SECRETS_ENCRYPTION_KEY` log, dropped `admin_token` from `/instance` SELECT
- PR #52 — SSRF guard on `platform_url`
- PR #53 — CP injects `MOLECULE_CP_SHARED_SECRET` + `MOLECULE_CP_URL` into tenant env
- PR #54 — Stripe webhook body capped at 1 MiB
**Core (`molecule-core` / this repo)**
- PR #978 — H3/H4: LimitReader on Discord webhook + workspace config PATCH
- PR #979 — C4: `AdminAuth` fail-closed on fresh install when `ADMIN_TOKEN` is set
- PR #980 — log-scrub: dropped token prefix logging, stopped logging raw upstream response bodies
- PR #981 — tenant `CPProvisioner` attaches the CP bearer on every outbound `/cp/workspaces/*` call
- PR #982 — Canvas API fetch timeout (15s)
- PR #984 — E2E smoke test sync for #966 (public GET no longer exposes `current_task`)
## New prod env vars (Railway, project `molecule-platform`, env `production`)
Set before the CP merge landed:
| Variable | Value shape | Purpose |
|---|---|---|
| `PROVISION_SHARED_SECRET` | 32-byte hex | Gates `/cp/workspaces/*` on CP. Routes refuse to mount when unset — C1 fail-closed. |
| `EC2_VPC_ID` | `vpc-…` | Enables per-tenant SG creation (C3). Shared-SG fallback emits a startup warning. |
| `CP_BASE_URL` | `https://api.moleculesai.app` | Injected into newly-provisioned tenant containers as `MOLECULE_CP_URL`. |
The live prod `PROVISION_SHARED_SECRET` value is held only in Railway; not committed anywhere. Rotate by `railway variables --set` + redeploy.
## Existing-tenant migration (the sharp edge)
Tenants provisioned **before** this cutover are still running the previous workspace-server image. When they pull the new image on their next boot or auto-update cycle, their `CPProvisioner` will start expecting `MOLECULE_CP_SHARED_SECRET` in the container env — but the existing tenant EC2s don't have that variable in their user-data (the CP only started injecting it from PR #53 onward).
**Symptom**: a pre-cutover tenant can still serve its users' existing workspaces, but any attempt to **provision a new workspace** from inside the tenant UI will hit the CP's new bearer gate and get `401` or `404` back, surfacing as "workspace provision failed" with a generic error.
**Fix per existing tenant (pick one)**:
1. **SSH in + add the env var**
- Copy `PROVISION_SHARED_SECRET` from Railway prod env.
- `ssh ubuntu@<tenant-ip>` and append to the running container's env (`docker stop && docker run … -e MOLECULE_CP_SHARED_SECRET='…' -e MOLECULE_CP_URL=https://api.moleculesai.app …`). Rolling this into an auto-update hook is follow-up work.
2. **Re-provision the tenant**
- `DELETE /cp/orgs/:slug` → re-create via normal signup flow. Tenant-level data survives only if the tenant's own Postgres volume is preserved; workspace_id values change. This is the heavy hammer — only for tenants where existing data can be recreated easily.
3. **Wait for the auto-update + user-data refresh cycle**
- Tenant auto-updater (cron, 5-minute cadence) pulls the new container image but **does not refresh env vars** — those are frozen from the initial user-data. So option 3 alone doesn't fix this; it still needs option 1 or 2.
Script at `scripts/migrate-tenant-cp-secret.sh` (follow-up) will automate option 1 across all running tenants in the prod AWS account.
## Post-deploy verification checklist
- [ ] Railway prod deploy for `controlplane` lands on the new commit (check `https://railway.com/project/7ccc…/service/ae76…`)
- [ ] `curl https://api.moleculesai.app/health` → 200 `{service: molecule-cp, status: ok}`
- [ ] `curl -X POST https://api.moleculesai.app/cp/workspaces/provision` (no bearer) → 401 (**not** 404 — proves the env var is live and routes mounted)
- [ ] GHCR publishes new `workspace-server` image for the core main commit
- [ ] Vercel canvas prod deploy lands
## Rollback
If prod is on fire:
1. `gh pr revert 46 -R Molecule-AI/molecule-controlplane` — reverts all 6 CP PRs together.
2. `gh pr revert 983 -R Molecule-AI/molecule-core` — reverts the core bundle.
3. Both reverts auto-deploy via Railway / GHCR / Vercel.
Existing tenants aren't affected by a rollback — they're running whichever tenant image tag they booted with. Only newly-provisioned tenants pick up the reverted control plane code.

120
scripts/canary-smoke.sh Executable file
View File

@ -0,0 +1,120 @@
#!/bin/bash
# canary-smoke.sh — runs the post-deploy smoke suite against the
# staging canary tenant fleet. Called by the canary-verify.yml GitHub
# Actions workflow after a new workspace-server image gets pushed to
# GHCR; exits non-zero on any failure so the workflow can skip the
# :staging-sha → :latest retag that would otherwise release broken
# code to the prod tenant fleet.
#
# Environment:
# CANARY_TENANT_URLS space-sep list of canary tenant base URLs
# (e.g. "https://canary-pm.staging.moleculesai.app
# https://canary-mcp.staging.moleculesai.app")
# CANARY_ADMIN_TOKENS space-sep list of ADMIN_TOKENs, positionally
# matched to CANARY_TENANT_URLS. Canary tenants
# are provisioned with known ADMIN_TOKENs so CI
# can hit their admin-gated endpoints.
# CANARY_CP_BASE_URL CP base URL the canaries call back to
# (https://staging-api.moleculesai.app)
# CANARY_CP_SHARED_SECRET matches CP's PROVISION_SHARED_SECRET so this
# script can also exercise /cp/workspaces/* via
# the canary's own CPProvisioner identity.
#
# Exit codes: 0 = all green, 1 = assertion failure, 2 = setup/env problem.
set -euo pipefail
# ── Setup ────────────────────────────────────────────────────────────────
: "${CANARY_TENANT_URLS:?space-sep list of canary base URLs required}"
: "${CANARY_ADMIN_TOKENS:?space-sep list of ADMIN_TOKENs required, same order as URLs}"
: "${CANARY_CP_BASE_URL:?CP base URL required}"
read -r -a URLS <<< "$CANARY_TENANT_URLS"
read -r -a TOKENS <<< "$CANARY_ADMIN_TOKENS"
if [ "${#URLS[@]}" -ne "${#TOKENS[@]}" ]; then
echo "ERROR: URLS(${#URLS[@]}) and TOKENS(${#TOKENS[@]}) length mismatch" >&2
exit 2
fi
if [ "${#URLS[@]}" -eq 0 ]; then
echo "ERROR: no canary URLs configured" >&2
exit 2
fi
PASS=0
FAIL=0
# ── Helpers ──────────────────────────────────────────────────────────────
check() {
local desc="$1" expected="$2" actual="$3"
if echo "$actual" | grep -qF "$expected"; then
printf " PASS %s\n" "$desc"
PASS=$((PASS + 1))
else
printf " FAIL %s\n expected to contain: %s\n got: %s\n" "$desc" "$expected" "$actual" >&2
FAIL=$((FAIL + 1))
fi
}
# acurl does an admin-authenticated GET/POST/etc. against a canary tenant.
# Takes +BASE_URL +ADMIN_TOKEN as its first two positional args; the rest
# are passed through to curl. Keeps the two values paired so the wrong
# tenant never gets the wrong token.
acurl() {
local base="$1" token="$2"; shift 2
curl -sS --max-time 20 -H "Authorization: Bearer $token" "$@" -- "$base${CANARY_ACURL_PATH:-}"
}
# ── Checks (run per canary tenant) ───────────────────────────────────────
for i in "${!URLS[@]}"; do
base="${URLS[$i]}"
token="${TOKENS[$i]}"
printf "\n── %s ──\n" "$base"
# 1. Liveness — the tenant is up and responding to admin auth.
CANARY_ACURL_PATH="/admin/liveness" resp=$(acurl "$base" "$token" || true)
check "liveness returns a subsystems map" '"subsystems"' "$resp"
# 2. CP env refresh — the workspace-server fetched MOLECULE_CP_SHARED_SECRET
# from CP on startup. We can't read env directly, but we can assert the
# liveness + workspace list both work, which together imply the binary
# booted without crashing on the refresh call. A startup failure in
# refreshEnvFromCP logs but still boots (best-effort semantics), so
# this is a sanity check, not a proof.
CANARY_ACURL_PATH="/workspaces" resp=$(acurl "$base" "$token" || true)
check "workspace list is JSON array" "[" "$resp"
# 3. Memory commit round-trip — scope=LOCAL so test data stays on this
# tenant. Verifies encryption + scrubber + retrieval end-to-end.
probe_id="canary-smoke-$(date +%s)-$i"
body=$(printf '{"scope":"LOCAL","namespace":"canary-smoke","content":"probe-%s"}' "$probe_id")
CANARY_ACURL_PATH="/memories/commit" resp=$(curl -sS --max-time 20 \
-X POST -H "Content-Type: application/json" -H "Authorization: Bearer $token" \
--data "$body" "$base/memories/commit" || true)
check "memory commit accepted" '"id"' "$resp"
CANARY_ACURL_PATH="/memories/search?query=probe-${probe_id}" \
resp=$(curl -sS --max-time 20 -H "Authorization: Bearer $token" \
"$base/memories/search?query=probe-${probe_id}" || true)
check "memory search finds the probe" "probe-${probe_id}" "$resp"
# 4. Events admin read — AdminAuth path (C4 fail-closed proof on SaaS).
CANARY_ACURL_PATH="/events" resp=$(acurl "$base" "$token" || true)
check "events endpoint returns JSON" "[" "$resp"
# 5. Negative: unauth'd admin call must 401 (C4 regression gate).
unauth_code=$(curl -sS -o /dev/null -w '%{http_code}' --max-time 10 "$base/admin/liveness" || echo "000")
check "unauth'd /admin/liveness returns 401" "401" "$unauth_code"
done
# ── Summary ──────────────────────────────────────────────────────────────
printf "\n=== CANARY SMOKE RESULTS ===\n"
printf " PASS: %d\n FAIL: %d\n" "$PASS" "$FAIL"
if [ "$FAIL" -gt 0 ]; then
exit 1
fi

80
scripts/rollback-latest.sh Executable file
View File

@ -0,0 +1,80 @@
#!/bin/bash
# rollback-latest.sh — moves the :latest tag on ghcr.io/molecule-ai/platform
# (and the matching tenant image) back to a prior :staging-<sha> digest
# without rebuilding anything. Prod tenants auto-pull :latest every 5
# min, so this is the fast path when a canary-verified image turns out
# to have a runtime regression that canary didn't catch.
#
# Usage:
# scripts/rollback-latest.sh <sha>
# scripts/rollback-latest.sh 4c1d56e
#
# Prereqs:
# - crane on $PATH (brew install crane OR download from
# https://github.com/google/go-containerregistry/releases)
# - GHCR token exported as GITHUB_TOKEN with write:packages scope
#
# What it does (per image — platform + tenant):
# crane digest ghcr.io/…:<sha> # verify the target sha exists
# crane tag ghcr.io/…:<sha> latest # retag remotely, single API call
# crane digest ghcr.io/…:latest # confirm the move
#
# Exit codes: 0 = both retagged, 1 = tag missing / crane error, 2 = bad args.
set -euo pipefail
if [ "${1:-}" = "" ]; then
echo "usage: $0 <staging-sha>" >&2
echo " e.g. $0 4c1d56e — retags :latest to :staging-4c1d56e" >&2
exit 2
fi
TARGET_SHA="$1"
PLATFORM=ghcr.io/molecule-ai/platform
TENANT=ghcr.io/molecule-ai/platform-tenant
if ! command -v crane >/dev/null; then
echo "ERROR: crane not installed. brew install crane" >&2
exit 1
fi
if [ -z "${GITHUB_TOKEN:-}" ]; then
echo "ERROR: GITHUB_TOKEN unset. export it with write:packages scope." >&2
exit 1
fi
# Log in once. crane stores creds in a config file keyed by registry;
# re-running is cheap.
printf '%s\n' "$GITHUB_TOKEN" | crane auth login ghcr.io -u "${GITHUB_ACTOR:-$(whoami)}" --password-stdin >/dev/null
roll() {
local image="$1"
local src="$image:staging-$TARGET_SHA"
local dst="$image:latest"
echo "$image"
# Abort rollout if the target tag doesn't exist in the registry.
# Otherwise crane tag would error anyway, but a pre-check gives a
# clearer message for ops.
if ! crane digest "$src" >/dev/null 2>&1; then
echo " FAIL: $src not found in registry. Did you type the wrong sha?" >&2
return 1
fi
src_digest=$(crane digest "$src")
crane tag "$src" latest
new_digest=$(crane digest "$dst")
if [ "$new_digest" != "$src_digest" ]; then
echo " FAIL: $dst digest $new_digest does not match expected $src_digest" >&2
return 1
fi
echo " OK $dst$new_digest"
}
roll "$PLATFORM"
roll "$TENANT"
echo
echo "=== ROLLBACK COMPLETE ==="
echo "Both images now point :latest at staging-$TARGET_SHA."
echo "Prod tenants will pick up the rollback within their 5-min auto-update cycle."

View File

@ -1 +1,2 @@
server
# The compiled binary, not the cmd/server package.
/server

View File

@ -0,0 +1,107 @@
package main
import (
"context"
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"os"
"time"
)
// refreshEnvFromCP pulls the tenant's current config-plane env vars
// from the control plane and applies them via os.Setenv BEFORE any
// other code calls os.Getenv on them.
//
// Why:
// - user-data on the tenant EC2 bakes env vars into `docker run` at
// provision time. Those values are frozen. When we rotate a secret
// on CP (e.g. PROVISION_SHARED_SECRET) there's no way to push the
// new value into already-provisioned tenants.
// - the Docker image auto-updater already pulls the latest workspace-
// server image every 5 min. If THAT image knows how to refresh its
// own env from the CP on startup, every tenant heals itself within
// the update cycle — no ssh, no re-provision, no ops toil.
//
// Contract (paired with cp-side GET /cp/tenants/config):
// Request: GET {MOLECULE_CP_URL or https://api.moleculesai.app}/cp/tenants/config
// Authorization: Bearer <ADMIN_TOKEN>
// X-Molecule-Org-Id: <MOLECULE_ORG_ID>
// Response: 200 {"MOLECULE_CP_SHARED_SECRET":"…","MOLECULE_CP_URL":"…", …}
// 401 on bearer mismatch or unknown org
//
// Best-effort: any failure logs and returns — main() keeps booting.
// Self-hosted deploys without MOLECULE_ORG_ID or ADMIN_TOKEN set
// short-circuit silently so this function is a no-op there.
func refreshEnvFromCP() error {
orgID := os.Getenv("MOLECULE_ORG_ID")
adminToken := os.Getenv("ADMIN_TOKEN")
if orgID == "" || adminToken == "" {
// Not a SaaS tenant (self-hosted dev or not yet provisioned).
return nil
}
base := os.Getenv("MOLECULE_CP_URL")
if base == "" {
// Default to prod for any tenant that lost track of its CP URL
// (e.g. older user-data that only set MOLECULE_ORG_ID).
base = "https://api.moleculesai.app"
}
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
req, err := http.NewRequestWithContext(ctx, "GET", base+"/cp/tenants/config", nil)
if err != nil {
return fmt.Errorf("build request: %w", err)
}
req.Header.Set("Authorization", "Bearer "+adminToken)
req.Header.Set("X-Molecule-Org-Id", orgID)
resp, err := http.DefaultClient.Do(req)
if err != nil {
return fmt.Errorf("do request: %w", err)
}
defer resp.Body.Close()
// 64 KiB cap — the CP only returns small JSON blobs here. An
// unbounded read would be weaponizable if a compromised upstream
// ever echoed back a gigabyte.
body, err := io.ReadAll(io.LimitReader(resp.Body, 64<<10))
if err != nil {
return fmt.Errorf("read body: %w", err)
}
if resp.StatusCode != http.StatusOK {
// 401 on first boot-after-restart is expected for tenants still
// running under old user-data where admin_token on-disk hasn't
// had its corresponding row seeded. Don't treat as fatal — just
// log so operators can spot repeat offenders in logs.
return fmt.Errorf("cp returned %d", resp.StatusCode)
}
var cfg map[string]string
if err := json.Unmarshal(body, &cfg); err != nil {
return fmt.Errorf("decode: %w", err)
}
// Apply only strings; reject oversized values defensively. An
// operator-supplied config should never exceed 4 KiB per key —
// workspace-server env vars are URLs, hex secrets, short identifiers.
const maxValueBytes = 4 << 10
applied := 0
for k, v := range cfg {
if k == "" || len(v) > maxValueBytes {
continue
}
if err := os.Setenv(k, v); err != nil {
log.Printf("CP env refresh: setenv %s: %v", k, err)
continue
}
applied++
}
log.Printf("CP env refresh: applied %d values from %s/cp/tenants/config", applied, base)
return nil
}

View File

@ -0,0 +1,100 @@
package main
import (
"fmt"
"net/http"
"net/http/httptest"
"os"
"testing"
)
// TestRefreshEnvFromCP_NoopWhenNotSaaS: without MOLECULE_ORG_ID or
// ADMIN_TOKEN, the function short-circuits silently — self-hosted dev
// must not fail or log spam here.
func TestRefreshEnvFromCP_NoopWhenNotSaaS(t *testing.T) {
t.Setenv("MOLECULE_ORG_ID", "")
t.Setenv("ADMIN_TOKEN", "")
if err := refreshEnvFromCP(); err != nil {
t.Errorf("expected nil on non-SaaS, got %v", err)
}
}
// TestRefreshEnvFromCP_AppliesCPResponse: wire a stub CP, run refresh,
// confirm the returned env vars ended up in os.Environ().
func TestRefreshEnvFromCP_AppliesCPResponse(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if got := r.Header.Get("Authorization"); got != "Bearer tenant-admin-token" {
t.Errorf("bearer: got %q", got)
}
if got := r.Header.Get("X-Molecule-Org-Id"); got != "org-abc" {
t.Errorf("org id header: got %q", got)
}
w.Header().Set("Content-Type", "application/json")
fmt.Fprint(w, `{"MOLECULE_CP_SHARED_SECRET":"new-secret","MOLECULE_CP_URL":"https://api.moleculesai.app"}`)
}))
defer srv.Close()
t.Setenv("MOLECULE_ORG_ID", "org-abc")
t.Setenv("ADMIN_TOKEN", "tenant-admin-token")
t.Setenv("MOLECULE_CP_URL", srv.URL)
t.Setenv("MOLECULE_CP_SHARED_SECRET", "") // clear before refresh
if err := refreshEnvFromCP(); err != nil {
t.Fatalf("refreshEnvFromCP: %v", err)
}
if got := os.Getenv("MOLECULE_CP_SHARED_SECRET"); got != "new-secret" {
t.Errorf("SHARED_SECRET: want new-secret, got %q", got)
}
}
// TestRefreshEnvFromCP_CPUnreachableDoesNotFailBoot: network errors must
// return non-nil BUT main.go treats that as warn-and-continue. We assert
// the function returns an error (not a panic) so the caller can log.
func TestRefreshEnvFromCP_CPUnreachableDoesNotFailBoot(t *testing.T) {
t.Setenv("MOLECULE_ORG_ID", "org-abc")
t.Setenv("ADMIN_TOKEN", "t")
t.Setenv("MOLECULE_CP_URL", "http://127.0.0.1:1") // closed port
err := refreshEnvFromCP()
if err == nil {
t.Error("expected an error when CP is unreachable")
}
}
// TestRefreshEnvFromCP_NonOKPropagates: CP returns 500 → error.
func TestRefreshEnvFromCP_NonOKPropagates(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
http.Error(w, "boom", http.StatusInternalServerError)
}))
defer srv.Close()
t.Setenv("MOLECULE_ORG_ID", "org-abc")
t.Setenv("ADMIN_TOKEN", "t")
t.Setenv("MOLECULE_CP_URL", srv.URL)
if err := refreshEnvFromCP(); err == nil {
t.Error("expected error on 500, got nil")
}
}
// TestRefreshEnvFromCP_RejectsOversizedValue: a single-value-over-4KiB
// payload must NOT poison the environment.
func TestRefreshEnvFromCP_RejectsOversizedValue(t *testing.T) {
giant := make([]byte, 5<<10)
for i := range giant {
giant[i] = 'x'
}
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
fmt.Fprintf(w, `{"MOLECULE_CP_SHARED_SECRET":%q}`, string(giant))
}))
defer srv.Close()
t.Setenv("MOLECULE_ORG_ID", "org-abc")
t.Setenv("ADMIN_TOKEN", "t")
t.Setenv("MOLECULE_CP_URL", srv.URL)
t.Setenv("MOLECULE_CP_SHARED_SECRET", "original")
if err := refreshEnvFromCP(); err != nil {
t.Fatalf("refreshEnvFromCP: %v", err)
}
if got := os.Getenv("MOLECULE_CP_SHARED_SECRET"); got != "original" {
t.Errorf("oversized value was applied — want %q, got %d bytes",
"original", len(got))
}
}

View File

@ -30,6 +30,16 @@ import (
)
func main() {
// CP self-refresh: pull any operator-rotated config (e.g. a new
// MOLECULE_CP_SHARED_SECRET) before any other code reads env.
// Best-effort — if the CP is unreachable we keep booting with the
// env we were provisioned with. Older SaaS tenants predate PR #53
// and can arrive here with MOLECULE_CP_SHARED_SECRET unset; this
// is how they heal without SSH.
if err := refreshEnvFromCP(); err != nil {
log.Printf("CP env refresh: %v (continuing with baked-in env)", err)
}
// Secrets encryption. In MOLECULE_ENV=prod, boot refuses to start
// without a valid SECRETS_ENCRYPTION_KEY (fail-secure — Top-5 #5).
// In any other environment, missing keys just log a warning and

View File

@ -0,0 +1,150 @@
package provisioner
import (
"context"
"encoding/json"
"io"
"net/http"
"net/http/httptest"
"strings"
"testing"
)
// TestNewCPProvisioner_RequiresOrgID — self-hosted deployments don't
// have a MOLECULE_ORG_ID, and the provisioner must refuse to construct
// rather than silently phone home to the prod CP with an empty tenant.
func TestNewCPProvisioner_RequiresOrgID(t *testing.T) {
t.Setenv("MOLECULE_ORG_ID", "")
if _, err := NewCPProvisioner(); err == nil {
t.Error("want error when MOLECULE_ORG_ID is unset, got nil")
}
}
// TestNewCPProvisioner_FallsBackToProvisionSharedSecret — operators
// may set PROVISION_SHARED_SECRET on both sides of the wire with a
// single value; the tenant accepts that name as a fallback for
// MOLECULE_CP_SHARED_SECRET. The fallback is documented in
// NewCPProvisioner; this test is the regression gate.
func TestNewCPProvisioner_FallsBackToProvisionSharedSecret(t *testing.T) {
t.Setenv("MOLECULE_ORG_ID", "org-abc")
t.Setenv("MOLECULE_CP_SHARED_SECRET", "")
t.Setenv("PROVISION_SHARED_SECRET", "from-fallback")
p, err := NewCPProvisioner()
if err != nil {
t.Fatalf("NewCPProvisioner: %v", err)
}
if p.sharedSecret != "from-fallback" {
t.Errorf("sharedSecret = %q, want %q", p.sharedSecret, "from-fallback")
}
}
// TestAuthHeader_NoopWhenSecretEmpty — the self-hosted path that
// doesn't gate /cp/workspaces/* must not add a stray Authorization
// header (bearer-like content would be surprising to non-bearer
// intermediaries).
func TestAuthHeader_NoopWhenSecretEmpty(t *testing.T) {
p := &CPProvisioner{sharedSecret: ""}
req := httptest.NewRequest("GET", "http://x/", nil)
p.authHeader(req)
if got := req.Header.Get("Authorization"); got != "" {
t.Errorf("Authorization set to %q with empty secret; want unset", got)
}
}
// TestAuthHeader_SetsBearerWhenSecretSet — happy path.
func TestAuthHeader_SetsBearerWhenSecretSet(t *testing.T) {
p := &CPProvisioner{sharedSecret: "the-secret"}
req := httptest.NewRequest("GET", "http://x/", nil)
p.authHeader(req)
if got := req.Header.Get("Authorization"); got != "Bearer the-secret" {
t.Errorf("Authorization = %q, want %q", got, "Bearer the-secret")
}
}
// TestStart_HappyPath — Start posts to the stubbed CP, passes the
// bearer, and parses the returned instance_id.
func TestStart_HappyPath(t *testing.T) {
var sawBearer string
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
sawBearer = r.Header.Get("Authorization")
if r.URL.Path != "/cp/workspaces/provision" {
t.Errorf("unexpected path %s", r.URL.Path)
}
// Verify the request body round-trips our fields
var body cpProvisionRequest
_ = json.NewDecoder(r.Body).Decode(&body)
if body.WorkspaceID != "ws-1" || body.Runtime != "python" {
t.Errorf("body mismatch: %+v", body)
}
w.WriteHeader(http.StatusCreated)
_, _ = io.WriteString(w, `{"instance_id":"i-abc123","state":"pending"}`)
}))
defer srv.Close()
p := &CPProvisioner{
baseURL: srv.URL,
orgID: "org-1",
sharedSecret: "s3cret",
httpClient: srv.Client(),
}
id, err := p.Start(context.Background(), WorkspaceConfig{
WorkspaceID: "ws-1", Runtime: "python", Tier: 1, PlatformURL: "http://tenant",
})
if err != nil {
t.Fatalf("Start: %v", err)
}
if id != "i-abc123" {
t.Errorf("instance id = %q, want i-abc123", id)
}
if sawBearer != "Bearer s3cret" {
t.Errorf("server saw Authorization = %q, want Bearer s3cret", sawBearer)
}
}
// TestStart_Non201ReturnsStructuredError — when CP returns 401 with a
// structured {"error":"..."} body, Start surfaces that error message.
// Verifies the defense against log-leaking raw upstream bodies.
func TestStart_Non201ReturnsStructuredError(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusUnauthorized)
_, _ = io.WriteString(w, `{"error":"invalid credentials"}`)
}))
defer srv.Close()
p := &CPProvisioner{baseURL: srv.URL, orgID: "org-1", httpClient: srv.Client()}
_, err := p.Start(context.Background(), WorkspaceConfig{WorkspaceID: "ws-1", Runtime: "py"})
if err == nil {
t.Fatal("expected error on 401, got nil")
}
if !strings.Contains(err.Error(), "invalid credentials") {
t.Errorf("error message %q should include upstream error field", err.Error())
}
}
// TestStart_NoStructuredErrorFallsBackToSize — the anti-leak path:
// when upstream returns non-JSON, we refuse to log the body and
// report only the byte count, preventing Authorization header echoes
// from landing in our logs.
func TestStart_NoStructuredErrorFallsBackToSize(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
_, _ = io.WriteString(w, "raw proxy error page, could contain echoed headers")
}))
defer srv.Close()
p := &CPProvisioner{baseURL: srv.URL, orgID: "org-1", httpClient: srv.Client()}
_, err := p.Start(context.Background(), WorkspaceConfig{WorkspaceID: "ws-1", Runtime: "py"})
if err == nil {
t.Fatal("expected error on 500, got nil")
}
if strings.Contains(err.Error(), "raw proxy error") {
t.Errorf("error leaked raw body: %q", err.Error())
}
if !strings.Contains(err.Error(), "<unstructured body") {
t.Errorf("expected byte-count fallback, got %q", err.Error())
}
}

View File

@ -310,14 +310,20 @@ func (s *Scheduler) fireSchedule(ctx context.Context, sched scheduleRow) {
// consecutive empties and escalate to 'stale' after 3 in a row.
isEmpty := isEmptyResponse(respBody)
if lastStatus == "ok" && isEmpty {
db.DB.ExecContext(ctx, `
// One query instead of UPDATE-then-SELECT: RETURNING hands back
// the post-increment value so the stale-threshold check doesn't
// cost a second roundtrip. This handler fires once per cron tick
// per schedule; at 100 tenants × dozens of schedules the saved
// query matters.
var consecEmpty int
if err := db.DB.QueryRowContext(ctx, `
UPDATE workspace_schedules
SET consecutive_empty_runs = consecutive_empty_runs + 1,
updated_at = now()
WHERE id = $1`, sched.ID)
// Check if we've crossed the stale threshold
var consecEmpty int
db.DB.QueryRowContext(ctx, `SELECT consecutive_empty_runs FROM workspace_schedules WHERE id = $1`, sched.ID).Scan(&consecEmpty)
WHERE id = $1
RETURNING consecutive_empty_runs`, sched.ID).Scan(&consecEmpty); err != nil {
log.Printf("Scheduler: '%s' empty-run bump failed: %v", sched.Name, err)
}
if consecEmpty >= 3 {
lastStatus = "stale"
lastError = fmt.Sprintf("empty response %d consecutive times — agent may be phantom-producing (#795)", consecEmpty)