From ba184dea5f75313ac3370468ba7a66ead7cb76ba Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Tue, 14 Apr 2026 16:43:00 -0700 Subject: [PATCH 1/8] docs: sync documentation with 2026-04-15 tick-9 merges (#79, #80) - PLAN.md: new "Recently launched (2026-04-15 tick-9)" block covering Phase 32 Phase B.2 image pipeline (PR #80) + tick-8 docs (PR #79). - docs/edit-history/2026-04-15.md: new file for today's merges. Co-Authored-By: Claude Opus 4.6 (1M context) --- PLAN.md | 3 +++ docs/edit-history/2026-04-15.md | 37 +++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 docs/edit-history/2026-04-15.md diff --git a/PLAN.md b/PLAN.md index ac7cacb8..0cfe8a1e 100644 --- a/PLAN.md +++ b/PLAN.md @@ -239,6 +239,9 @@ point for "what else is out there." - **GitHub issue #15** — Provisioner: auto-refresh `CLAUDE_CODE_OAUTH_TOKEN` from `global_secrets` on workspace restart → **DONE** via PR #64 (`SetGlobal` / `DeleteGlobal` now fan out `RestartByID` to every affected workspace). - **GitHub issue #19 Layer 1** — Platform-generated restart context → **DONE** via PR #65 (synthetic A2A `message/send` with `metadata.kind=restart_context`, `system:restart-context` caller prefix, 30s re-register wait). Layer 2 deferred to issue #66 (see Backlog item 15 above). +### Recently launched (2026-04-15 tick-9) +- **Phase 32 Phase B.2 (image pipeline)** — PR #80 (merged `c3cc8e87`) adds `.github/workflows/publish-platform-image.yml`: on every main-merge touching `platform/**`, builds `platform/Dockerfile` and pushes `ghcr.io/molecule-ai/platform:latest` + `:sha-` to GHCR. Paired with the private `molecule-controlplane` Fly + Neon provisioner (PR #3 there, merged `2e85d5ad`) that reads `TENANT_IMAGE` env and boots tenant Fly Machines from this image. Tick-8 docs-sync PR #79 (merged `d53a1287`) also landed. + ### Recently launched (2026-04-14 tick-8) - **Phase 32 PR #1** — `TenantGuard` middleware (PR #78, merged `57a05686`). Public repo's only SaaS hook: when `MOLECULE_ORG_ID` env is set, non-allowlisted requests require matching `X-Molecule-Org-Id` header or 404. Unset → passthrough (self-hosted unchanged). Allowlist is exact-match: `/health` + `/metrics`. Paired with the private `Molecule-AI/molecule-controlplane` repo scaffolded this tick (Fly Machines provisioner stub, `/cp/orgs` CRUD, subdomain→fly-replay router, migrations 001-003 for `organizations`/`org_instances`/`org_members`). +6 `TestTenantGuard_*` tests. Phase 32 plan: follow-up PRs wire real Fly provisioner, WorkOS AuthKit, Stripe, Cloudflare, signup UX — all in the private repo except the single public middleware. diff --git a/docs/edit-history/2026-04-15.md b/docs/edit-history/2026-04-15.md new file mode 100644 index 00000000..47547eae --- /dev/null +++ b/docs/edit-history/2026-04-15.md @@ -0,0 +1,37 @@ +# Edit history — 2026-04-15 + +## tick-9: Phase 32 Phase B.2 image pipeline (PR #80) + tick-8 docs sync (PR #79) + +Two merges: + +### PR #79 — `docs: sync documentation with 2026-04-14 tick-8 merge (#78)` +Merge commit `d53a1287`. Tick-8 docs sync for the TenantGuard middleware. +Pure docs; CLAUDE.md test count + PLAN.md tick-8 block + edit-history entry. + +### PR #80 — `feat(ci): publish-platform-image → ghcr.io/molecule-ai/platform (Phase B.2)` +Merge commit `c3cc8e87`. Noteworthy: ci-infra. + +Adds `.github/workflows/publish-platform-image.yml`: +- Trigger: push to main touching `platform/**`; also `workflow_dispatch`. +- Builds `platform/Dockerfile` via `docker/build-push-action@v5`. +- Pushes two tags per run: `ghcr.io/molecule-ai/platform:latest` (floating) + and `:sha-` (immutable, pin-friendly). +- GHA cache via `cache-from/cache-to: type=gha` for warm rebuilds. +- Permissions: `contents:read` + `packages:write`; authenticates to GHCR + using the built-in `GITHUB_TOKEN`, no extra secrets. +- OCI labels propagate source URL + commit SHA for provenance. + +Purpose: pairs with the private `molecule-controlplane` Fly + Neon +provisioner (PR #3 there, merged `2e85d5ad`) which reads +`TENANT_IMAGE=ghcr.io/molecule-ai/platform:` from env and spawns +each tenant Fly Machine from this image. + +### Deployment state (informational — not in any repo) +- Fly apps (`molecule-cp`, `molecule-tenant`): **pending CEO** (`flyctl apps create`). +- Fly billing card: **pending CEO**. +- First real tenant provision: **blocked** on the two above. + +### File deltas (public repo) +- `.github/workflows/publish-platform-image.yml` — new. +- `CLAUDE.md` — tick-9 block for the new CI workflow. +- `PLAN.md` — new "Recently launched (2026-04-15 tick-9)" entry. From 4f2b28c060170c66a05bdeb310be02e99436ac8f Mon Sep 17 00:00:00 2001 From: rabbitblood Date: Tue, 14 Apr 2026 18:04:00 -0700 Subject: [PATCH 2/8] =?UTF-8?q?chore(template):=20add=204=20evolution=20cr?= =?UTF-8?q?ons=20=E2=80=94=20ecosystem=20/=20plugins=20/=20template=20/=20?= =?UTF-8?q?channels?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Today's crons are all REVIEW (Security audit, UIUX audit, QA tests). Nothing actively pushes the team to EVOLVE the four levers CEO named: templates, plugins, channels, watchlist. The team-runs-24/7 goal needs both — defensive reviews AND offensive evolution. Adds 4 new schedules: 1. Research Lead — Daily ecosystem watch (0 8 * * *) Survey github.com/trending + HN + AI-blogs for new agent-infra projects from the last 24h. Add 1-3 entries to docs/ecosystem-watch.md per day, commit to chore/eco-watch-YYYY-MM-DD branch + push + PR. Re-enables the watchlist pipeline that was paused earlier today. 2. Technical Researcher — Weekly plugin curation (0 9 * * 1, Mondays) Inventory plugins/ + builtin_tools/ + recent landings. Identify gaps (builtin not exposed as plugin; role missing extras; rarely-used plugin in defaults). Survey upstream (claude.ai cookbook, MCP servers, anthropic/openai/langchain blogs). File 1-3 plugin proposals per week as GH issues with concrete integration sketches. 3. Dev Lead — Daily template fitness audit (30 8 * * *) Health-check the template itself: stale system prompts, schedules not firing (catches the #85 scheduler-died failure mode), roles missing plugins they should have, missing crons, channel gaps. File issues for any drift. Designed to catch the silent-stall pattern from today's incident. 4. DevOps Engineer — Weekly channel expansion survey (0 10 * * 1, Mondays) PM is the only role with a channel today (Telegram). Survey what channel infra the platform supports + what role-channel pairings would actually help (Security→email-on-critical, DevOps→Slack-on-CI-break, etc). File channel-proposal issues. All four crons end with the structured audit_summary routing per #51/#75 (category, severity, issues, top_recommendation) so they integrate with the platform-level category_routing PM uses to fan out work. The template's existing category_routing block already maps research / plugins / template / channels — these new crons consume exactly those slots. Also drops three stale "# UNION with defaults (#71)" comments left from the cleanup PR — those plugins lists are now self-documenting after #71. Aligns with north-star goal: team should run 24/7 AND keep getting better across templates / plugins / channels / watchlist. This PR closes the gap where the "review" half of the loop was running but the "evolve" half had no active driver. --- org-templates/molecule-dev/org.yaml | 136 +++++++++++++++++++++++++++- 1 file changed, 133 insertions(+), 3 deletions(-) diff --git a/org-templates/molecule-dev/org.yaml b/org-templates/molecule-dev/org.yaml index a478d79a..f6ac9325 100644 --- a/org-templates/molecule-dev/org.yaml +++ b/org-templates/molecule-dev/org.yaml @@ -127,19 +127,73 @@ workspaces: 4. Read /workspace/repo/docs/product/overview.md to understand the product 5. Use commit_memory to save key product facts for later recall 6. Wait for tasks from PM. + schedules: + - name: Daily ecosystem watch + cron_expr: "0 8 * * *" + prompt: | + Daily survey for new agent-infra / AI-agent projects worth tracking. + + 1. Pull docs/ecosystem-watch.md to know what's already tracked. + 2. Browse the web for last 24h: + - github.com/trending?since=daily&language=python (and typescript, go) + - HN front page, anything about agent frameworks + - Twitter/X mentions of new agent SDKs, MCP servers, frameworks + 3. Cross-reference: skip anything already in ecosystem-watch.md. + 4. For each genuinely new + relevant project (1-3 max per day): + - Add an entry under "## Entries" using the existing template + (Pitch / Shape / Overlap / Differentiation / Worth borrowing / + Terminology collisions / Signals to react to / Last reviewed + stars) + - Keep each entry ≤200 words. + 5. If a finding suggests a concrete improvement to plugins/, workspace-template/, + or org-templates/, file a GH issue (`gh issue create`) with the proposal. + 6. Commit additions to a branch named chore/eco-watch-YYYY-MM-DD. PUSH it + (per the repo "always raise PR" policy) and open a PR. + 7. Routing: delegate_task to PM with summary + (audit_summary metadata: category=research, severity=info, + issues=[], top_recommendation=). + 8. If nothing notable today, skip the commit and PM-message a one-line "clean". + enabled: true children: - name: Market Analyst role: Market sizing, trends, user research files_dir: market-analyst - plugins: [browser-automation] # UNION with defaults (#71) + plugins: [browser-automation] - name: Technical Researcher role: AI frameworks and protocol evaluation files_dir: technical-researcher - plugins: [browser-automation] # UNION with defaults (#71) + plugins: [browser-automation] + schedules: + - name: Weekly plugin curation + cron_expr: "0 9 * * 1" + prompt: | + Weekly survey of `plugins/` and `workspace-template/builtin_tools/` for + evolution opportunities. The team should keep gaining capabilities. + + 1. Inventory: + - ls plugins/ — every plugin and its plugin.yaml description + - ls workspace-template/builtin_tools/*.py — every builtin tool + - cat org-templates/molecule-dev/org.yaml — see how plugins are wired + 2. Gap analysis: + - Any builtin_tool not exposed via a plugin? + - Any role with no plugins beyond defaults that *should* have extras? + - Any plugin that's installed everywhere via defaults but is rarely used? + 3. External survey (use browser-automation): + - github.com/topics/ai-agents (last week) + - github.com/topics/mcp-server (last week) + - claude.ai/cookbook, openai/swarm releases + - anthropic blog, openai blog, langchain blog (last week) + 4. For 1-3 highest-value findings, file a GH issue with concrete proposal: + - "Plugin proposal: — wraps for " + - body: what it does, which roles benefit, integration sketch (~30 lines), + upstream link, license check. + 5. Routing: delegate_task to PM with audit_summary metadata + (category=plugins, issues=[…], top_recommendation=…). + 6. If nothing notable this week, PM-message a one-line "clean". + enabled: true - name: Competitive Intelligence role: Competitor tracking and feature comparison files_dir: competitive-intelligence - plugins: [browser-automation] # UNION with defaults (#71) + plugins: [browser-automation] - name: Dev Lead role: Engineering planning and team coordination @@ -155,6 +209,51 @@ workspaces: 4. Run: cd /workspace/repo && git log --oneline -5 5. Use commit_memory to save the architecture summary and recent changes 6. Wait for tasks from PM. + schedules: + - name: Daily template fitness audit + cron_expr: "30 8 * * *" + prompt: | + Daily audit of `org-templates/molecule-dev/`. Catches drift, stale prompts, + missing schedules, and gaps that block the team-runs-24/7 goal. Symptom + of prior incident (issue #85): cron scheduler died silently for 10+ hours + and nobody noticed because no one was watching template fitness. + + 1. CHECK SCHEDULES ARE FIRING: + For every workspace_schedule in the platform DB: + curl -s http://host.docker.internal:8080/workspaces//schedules + Compare last_run_at to now() vs cron interval. Anything more than 2x + the interval behind = STALE. File issue against platform. + + 2. CHECK SYSTEM PROMPTS ARE FRESH: + cd /workspace/repo + for f in org-templates/molecule-dev/*/system-prompt.md; do + echo "$(git log -1 --format='%ar' -- "$f") $f" + done + Anything not touched in 30+ days might be stale relative to recent + platform changes. Spot-check vs CLAUDE.md and recent merges. + + 3. CHECK ROLES HAVE PLUGINS THEY NEED: + yq '.workspaces[] | (.name, .plugins)' org-templates/molecule-dev/org.yaml + (or python+yaml). Roles inherit defaults; flag any role that should + plausibly have role-specific extras (compare role description vs + plugins list). + + 4. CHECK CRONS COVER THE EVOLUTION LEVERS: + The team must keep evolving plugins, template, channels, watchlist. + Verify schedules exist for: ecosystem-watch (Research Lead), + plugin-curation (Technical Researcher), template-fitness (you, + this cron), channel-expansion (DevOps). + Any missing? File issue. + + 5. CHECK CHANNELS: + Today only PM has telegram. Should any other role have a channel? + (Security Auditor → email on critical findings; DevOps → Slack on + build breaks; etc.) File issue if a channel gap is meaningful. + + 6. ROUTING: delegate_task to PM with audit_summary metadata + (category=template, severity=…, issues=[…], top_recommendation=…). + 7. If everything is fit and current, PM-message one-line "clean". + enabled: true children: - name: Frontend Engineer role: >- @@ -227,6 +326,37 @@ workspaces: 4. Read /workspace/repo/.github/workflows/ci.yml 5. Use commit_memory to save CI pipeline structure 6. Wait for tasks from Dev Lead. + schedules: + - name: Weekly channel expansion survey + cron_expr: "0 10 * * 1" + prompt: | + Weekly survey of channel integrations (Telegram, Slack, Discord, email, + webhooks). The team should grow its external comms surface where useful, + not stay locked at "PM-only Telegram". + + 1. INVENTORY: + yq '.workspaces[] | {name: .name, channels: .channels}' \ + org-templates/molecule-dev/org.yaml 2>/dev/null + (or python+yaml). List which roles have which channels. + 2. PLATFORM CAPABILITY CHECK: + grep -rE "channel|telegram|slack|discord|webhook" \ + platform/internal/handlers/ --include="*.go" -l + What channel types does the platform actually support today? + 3. GAP ANALYSIS: + - PM has Telegram → can the user reach OTHER roles directly? + - Security Auditor: would email-on-critical-finding help? + - DevOps Engineer: would Slack-on-CI-break help? + - Any role that produces high-value asynchronous output but the + user has to poll memory to see it? + 4. EXTERNAL: are there channel platforms we should consider adding? + (Discord for community, GitHub Discussions for product, etc.) + 5. For the top 1-2 gaps, file a GH issue: + - "Channel proposal: for " with rationale, integration + sketch, secret requirements (e.g. SLACK_BOT_TOKEN as global secret). + 6. ROUTING: delegate_task to PM with audit_summary metadata + (category=channels, issues=[…], top_recommendation=…). + 7. If no gap this week, PM-message a one-line "clean". + enabled: true - name: Security Auditor role: >- Owns security posture across the full stack: Go/Gin handlers From ef7f48259372725b4a000cbb61147677ce66fbc6 Mon Sep 17 00:00:00 2001 From: rabbitblood Date: Tue, 14 Apr 2026 19:32:01 -0700 Subject: [PATCH 3/8] fix(scheduler): recover from panics + add liveness watchdog (#85) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The scheduler died silently on 2026-04-14 14:21 UTC and stayed dead for 12+ hours. Platform restart didn't recover it. Root cause: tick() and fireSchedule() goroutines have no panic recovery. A single bad row, bad cron expression, DB blip, or transient panic anywhere in the chain permanently kills the scheduler goroutine — and the only signal to an operator is "no crons firing", which is invisible if you're not watching. Specifically: func (s *Scheduler) Start(ctx context.Context) { for { select { case <-ticker.C: s.tick(ctx) // <- if this panics, the for-loop exits forever } } } And inside tick: go func(s2 scheduleRow) { defer wg.Done() defer func() { <-sem }() s.fireSchedule(ctx, s2) // <- panic here propagates up wg.Wait() }(sched) Two `defer recover()` additions: 1. In Start's tick wrapper — a panic in tick() (DB scan, cron parse, row processing) is logged and the next tick fires normally. 2. In each fireSchedule goroutine — a single bad workspace can't take the rest of the batch down. Plus a liveness watchdog: - Scheduler now records `lastTickAt` after each successful tick. - New methods `LastTickAt()` and `Healthy()` (true if last tick within 2× pollInterval = 60s). - Initialised at Start so Healthy() returns true on a fresh process. Endpoint plumbing for /admin/scheduler/health is a follow-up — needs threading the scheduler instance through router.Setup(). Documented on #85. Closes the silent-outage failure mode of #85. The other proposed fixes (force-kill on /restart hang, active_tasks watchdog) are separate concerns tracked in #85's comments. --- platform/internal/scheduler/scheduler.go | 59 +++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/platform/internal/scheduler/scheduler.go b/platform/internal/scheduler/scheduler.go index 235eed4d..6be3015a 100644 --- a/platform/internal/scheduler/scheduler.go +++ b/platform/internal/scheduler/scheduler.go @@ -47,26 +47,77 @@ type scheduleRow struct { type Scheduler struct { proxy A2AProxy broadcaster Broadcaster + + // lastTickAt records the wall-clock time of the most recent tick + // (whether it fired schedules or not). Read by Healthy() and the + // /admin/scheduler/health endpoint to detect stuck-tick conditions. + // Atomic-ish via the mutex; tick rate is 30s so contention is trivial. + mu sync.RWMutex + lastTickAt time.Time } func New(proxy A2AProxy, broadcaster Broadcaster) *Scheduler { return &Scheduler{proxy: proxy, broadcaster: broadcaster} } +// LastTickAt returns the wall-clock time of the most recent successful tick. +// Returns the zero Time if Start() has never been called or no tick has +// completed since process start. +func (s *Scheduler) LastTickAt() time.Time { + s.mu.RLock() + defer s.mu.RUnlock() + return s.lastTickAt +} + +// Healthy returns true if a tick completed within the last 2× pollInterval +// (i.e. at most 1 missed tick is tolerated). Use from /health and from +// /admin/scheduler/health to surface scheduler liveness. +func (s *Scheduler) Healthy() bool { + last := s.LastTickAt() + if last.IsZero() { + return false + } + return time.Since(last) < 2*pollInterval +} + // Start runs the scheduler poll loop. Blocks until ctx is cancelled. +// +// Defends against panics inside tick() so a single bad row / bad cron +// expression / DB blip can't permanently kill the scheduler. Without +// this recover the goroutine dies and the only signal to the operator +// is "no crons firing" — which we observed as a 12+ hour silent outage +// on 2026-04-14 (issue #85). func (s *Scheduler) Start(ctx context.Context) { ticker := time.NewTicker(pollInterval) defer ticker.Stop() log.Printf("Scheduler: started (poll interval=%s)", pollInterval) + tickWithRecover := func() { + defer func() { + if r := recover(); r != nil { + log.Printf("Scheduler: PANIC in tick — recovered: %v (next tick in %s)", r, pollInterval) + } + }() + s.tick(ctx) + s.mu.Lock() + s.lastTickAt = time.Now() + s.mu.Unlock() + } + + // Mark a tick immediately on startup so Healthy() returns true before + // the first ticker fires (avoids spurious unhealthy on fresh start). + s.mu.Lock() + s.lastTickAt = time.Now() + s.mu.Unlock() + for { select { case <-ctx.Done(): log.Println("Scheduler: stopped") return case <-ticker.C: - s.tick(ctx) + tickWithRecover() } } } @@ -101,6 +152,12 @@ func (s *Scheduler) tick(ctx context.Context) { go func(s2 scheduleRow) { defer wg.Done() defer func() { <-sem }() + defer func() { + if r := recover(); r != nil { + log.Printf("Scheduler: PANIC firing '%s' on workspace %s — recovered: %v", + s2.Name, s2.WorkspaceID, r) + } + }() s.fireSchedule(ctx, s2) }(sched) } From 043b8fe159bd5c9a712144e737922231ef5e38bf Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Tue, 14 Apr 2026 20:37:26 -0700 Subject: [PATCH 4/8] =?UTF-8?q?feat(canvas):=20AuthGate=20=E2=80=94=20redi?= =?UTF-8?q?rect=20anonymous=20users=20to=20cp=20login=20(Phase=20F=20close?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wraps the canvas root so every tenant-subdomain request checks for a valid session and bounces to app.moleculesai.app/cp/auth/login with a return_to pointing back at the current URL. Local dev + vercel preview URLs + apex pass through unchanged. Files: - canvas/src/lib/auth.ts: fetchSession() probes /cp/auth/me (credentials:include for cross-origin cookie); returns Session on 200, null on 401 (anonymous, no throw), throws on 5xx so transient outages don't leak the UI. - canvas/src/lib/auth.ts: redirectToLogin() builds the cp login URL with window.location.href as return_to; CP's isSafeReturnTo check rejects cross-domain bounces. - canvas/src/components/AuthGate.tsx: client component wrapping children. State machine: loading → authenticated | anonymous. In non-SaaS mode (no tenant slug) skips the gate entirely. - canvas/src/app/layout.tsx: wraps the root body in . Tests: +6 auth.ts (200 / 401 null / 5xx throw / credentials:include / redirectToLogin href + signup variant). Full suite 453 green (was 447). Pairs with molecule-controlplane PR #16 (return_to cookie handshake on the cp side). Co-Authored-By: Claude Opus 4.6 (1M context) --- canvas/src/app/layout.tsx | 9 +++- canvas/src/components/AuthGate.tsx | 68 ++++++++++++++++++++++++++ canvas/src/lib/__tests__/auth.test.ts | 69 +++++++++++++++++++++++++++ canvas/src/lib/auth.ts | 51 ++++++++++++++++++++ 4 files changed, 196 insertions(+), 1 deletion(-) create mode 100644 canvas/src/components/AuthGate.tsx create mode 100644 canvas/src/lib/__tests__/auth.test.ts create mode 100644 canvas/src/lib/auth.ts diff --git a/canvas/src/app/layout.tsx b/canvas/src/app/layout.tsx index e0e98f1d..15cd5646 100644 --- a/canvas/src/app/layout.tsx +++ b/canvas/src/app/layout.tsx @@ -1,5 +1,6 @@ import type { Metadata } from "next"; import "./globals.css"; +import { AuthGate } from "@/components/AuthGate"; export const metadata: Metadata = { title: "Molecule AI", @@ -13,7 +14,13 @@ export default function RootLayout({ }) { return ( - {children} + + {/* AuthGate is a client component; it checks the session on mount + and bounces anonymous users to the control plane's login page + when running on a tenant subdomain. Non-SaaS hosts (localhost, + vercel preview URL, apex) pass through unchanged. */} + {children} + ); } diff --git a/canvas/src/components/AuthGate.tsx b/canvas/src/components/AuthGate.tsx new file mode 100644 index 00000000..b65eeeb6 --- /dev/null +++ b/canvas/src/components/AuthGate.tsx @@ -0,0 +1,68 @@ +"use client"; + +/** + * AuthGate wraps the canvas root so every page is gated on a valid session. + * Anonymous users get bounced to app.moleculesai.app/cp/auth/login?return_to=. + * + * In non-SaaS mode (no tenant slug — local dev, apex, vercel preview URL), + * the gate is a pass-through: canvas works without auth for local dev. + * This mirrors the control plane's "disabled provider" fallback. + */ +import { useEffect, useState, type ReactNode } from "react"; +import { fetchSession, redirectToLogin, type Session } from "@/lib/auth"; +import { getTenantSlug } from "@/lib/tenant"; + +export type AuthGateState = + | { kind: "loading" } + | { kind: "anonymous"; skipRedirect: boolean } + | { kind: "authenticated"; session: Session }; + +export function AuthGate({ children }: { children: ReactNode }) { + const [state, setState] = useState({ kind: "loading" }); + + useEffect(() => { + // In non-SaaS mode (no tenant slug) we skip the gate entirely — + // local dev, vercel preview URLs, and the app.moleculesai.app apex + // should not force login for API-only interactions. + const slug = getTenantSlug(); + if (!slug) { + setState({ kind: "anonymous", skipRedirect: true }); + return; + } + let cancelled = false; + fetchSession() + .then((s) => { + if (cancelled) return; + if (s) { + setState({ kind: "authenticated", session: s }); + } else { + setState({ kind: "anonymous", skipRedirect: false }); + } + }) + .catch(() => { + // Network error — fail closed (show signin) so a transient + // outage doesn't leak the canvas UI to an unauth'd user. + if (!cancelled) setState({ kind: "anonymous", skipRedirect: false }); + }); + return () => { + cancelled = true; + }; + }, []); + + useEffect(() => { + if (state.kind === "anonymous" && !state.skipRedirect) { + redirectToLogin("sign-in"); + } + }, [state]); + + if (state.kind === "loading") { + // Minimal placeholder; canvas has its own loading UI downstream. + return null; + } + if (state.kind === "anonymous" && !state.skipRedirect) { + // Redirect already firing from the effect above; render nothing in + // the interim to avoid a flash of unauthenticated content. + return null; + } + return <>{children}; +} diff --git a/canvas/src/lib/__tests__/auth.test.ts b/canvas/src/lib/__tests__/auth.test.ts new file mode 100644 index 00000000..f1cd3b52 --- /dev/null +++ b/canvas/src/lib/__tests__/auth.test.ts @@ -0,0 +1,69 @@ +/** + * @vitest-environment jsdom + */ +import { describe, it, expect, vi, afterEach } from "vitest"; +import { fetchSession, redirectToLogin } from "../auth"; + +afterEach(() => { + vi.unstubAllGlobals(); + vi.restoreAllMocks(); +}); + +describe("fetchSession", () => { + it("returns session on 200", async () => { + vi.stubGlobal("fetch", vi.fn().mockResolvedValue({ + ok: true, + status: 200, + json: async () => ({ user_id: "u1", org_id: "o1", email: "a@x.com" }), + })); + const s = await fetchSession(); + expect(s).toEqual({ user_id: "u1", org_id: "o1", email: "a@x.com" }); + }); + + it("returns null on 401 without throwing", async () => { + vi.stubGlobal("fetch", vi.fn().mockResolvedValue({ ok: false, status: 401 })); + const s = await fetchSession(); + expect(s).toBeNull(); + }); + + it("throws on 500 so transient outages aren't treated as 'anonymous'", async () => { + vi.stubGlobal("fetch", vi.fn().mockResolvedValue({ ok: false, status: 500, statusText: "oops" })); + await expect(fetchSession()).rejects.toThrow("500"); + }); + + it("sends credentials:include for cross-origin cookies", async () => { + const fetchMock = vi.fn().mockResolvedValue({ ok: false, status: 401 }); + vi.stubGlobal("fetch", fetchMock); + await fetchSession(); + expect(fetchMock).toHaveBeenCalledWith( + expect.stringContaining("/cp/auth/me"), + expect.objectContaining({ credentials: "include" }), + ); + }); +}); + +describe("redirectToLogin", () => { + it("sets window.location to cp login URL with return_to", () => { + const href = "https://acme.moleculesai.app/dashboard"; + Object.defineProperty(window, "location", { + writable: true, + value: { href }, + }); + redirectToLogin("sign-in"); + // href now holds the redirect target. encodeURIComponent(href) must + // appear in the query. + expect((window.location as unknown as { href: string }).href).toContain("/cp/auth/login"); + expect((window.location as unknown as { href: string }).href).toContain( + encodeURIComponent(href), + ); + }); + + it("uses signup path for sign-up screenHint", () => { + Object.defineProperty(window, "location", { + writable: true, + value: { href: "https://acme.moleculesai.app/" }, + }); + redirectToLogin("sign-up"); + expect((window.location as unknown as { href: string }).href).toContain("/cp/auth/signup"); + }); +}); diff --git a/canvas/src/lib/auth.ts b/canvas/src/lib/auth.ts new file mode 100644 index 00000000..d16006ac --- /dev/null +++ b/canvas/src/lib/auth.ts @@ -0,0 +1,51 @@ +/** + * Canvas-side session detection. Calls /cp/auth/me on the control plane + * (via same-origin → PLATFORM_URL) and returns the session or null. + * + * 401 is the "anonymous" signal and does NOT throw — the caller decides + * whether to redirect. Network errors do throw so React error boundaries + * can surface them. + */ +import { PLATFORM_URL } from "./api"; + +export interface Session { + user_id: string; + org_id: string; + email: string; +} + +// Base path prefix for auth endpoints on the control plane. +const AUTH_BASE = "/cp/auth"; + +/** + * fetchSession probes /cp/auth/me with the session cookie (credentials: + * include mandatory cross-origin). Returns the Session on 200, null on + * 401 (anonymous), throws on anything else so callers don't silently + * treat a 5xx as "not logged in". + */ +export async function fetchSession(): Promise { + const res = await fetch(`${PLATFORM_URL}${AUTH_BASE}/me`, { + credentials: "include", + }); + if (res.status === 401) return null; + if (!res.ok) { + throw new Error(`/cp/auth/me: ${res.status} ${res.statusText}`); + } + return res.json(); +} + +/** + * redirectToLogin bounces the browser to the control plane's login page + * with a `return_to` param so the user lands back on the current URL + * after signup/login completes. Same-origin safety is enforced on the + * CP side (isSafeReturnTo rejects cross-domain / http / protocol- + * relative URLs). Uses window.location.href so the full URL including + * query + hash survives the round trip. + */ +export function redirectToLogin(screenHint: "sign-up" | "sign-in" = "sign-in"): void { + if (typeof window === "undefined") return; + const returnTo = window.location.href; + const path = screenHint === "sign-up" ? "signup" : "login"; + const dest = `${PLATFORM_URL}${AUTH_BASE}/${path}?return_to=${encodeURIComponent(returnTo)}`; + window.location.href = dest; +} From 7af5da31c28a3270dd2a825e5790bcecca42152c Mon Sep 17 00:00:00 2001 From: rabbitblood Date: Tue, 14 Apr 2026 21:03:22 -0700 Subject: [PATCH 5/8] chore(template): add Documentation Specialist as 3rd PM direct report MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a 13th workspace to the molecule-dev template owning end-to-end documentation across all Molecule AI surfaces. ## Why now - We just created Molecule-AI/docs (customer-facing site at doc.moleculesai.app, Fumadocs + Next.js 15) and the customer site needs someone to own it. - Internal docs (README.md, docs/architecture.md, docs/edit-history/) were drifting — every platform PR has been opening a docs sync PR manually. - No agent in the team owned terminology consistency or stub backfill. ## Where it sits in the org Third PM direct report, parallel to Research Lead and Dev Lead — docs is its own swim lane that spans engineering (docs follow code) and research/product (concepts and terminology). PM ├── Research Lead ├── Dev Lead └── Documentation Specialist <-- new ## Schedules (2) 1. **Daily docs sync — backfill stubs and pair recent platform PRs** `0 9 * * *` — every morning: - Pair every merged platform PR (last 24h) with a docs PR if needed - Backfill one stub page on the docs site - Crawl the live site for broken links / dead anchors - delegate_task to PM with audit_summary (category=docs) 2. **Weekly terminology + freshness audit** `0 11 * * 1` — every Monday: - Stale page detection (>30 days untouched on fast-moving surfaces) - Terminology consistency check (one canonical name per concept) - Link-rot scan - Same audit_summary contract ## Plugins Inherits the 9 universal defaults. Adds `browser-automation` for crawling the live docs site. `molecule-skill-update-docs` is already in defaults so the cross-repo sync skill is available. ## Routing Adds `docs: [Documentation Specialist]` to `category_routing` so any agent that emits an audit_summary with category=docs is auto-routed here by the platform. ## Bind mounts Note: this workspace clones BOTH /workspace/repo (the platform monorepo) and /workspace/docs (Molecule-AI/docs) in its initial_prompt so the agent can edit either side. --- org-templates/molecule-dev/org.yaml | 137 ++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) diff --git a/org-templates/molecule-dev/org.yaml b/org-templates/molecule-dev/org.yaml index f6ac9325..9eb113d0 100644 --- a/org-templates/molecule-dev/org.yaml +++ b/org-templates/molecule-dev/org.yaml @@ -51,6 +51,7 @@ defaults: infra: [DevOps Engineer] qa: [QA Engineer] performance: [Backend Engineer] + docs: [Documentation Specialist] mixed: [Dev Lead] # workspace_dir: not set by default — each agent gets an isolated Docker volume @@ -618,3 +619,139 @@ workspaces: d. Save to memory key 'uiux-audit-latest' as a secondary record only. enabled: true + + - name: Documentation Specialist + role: >- + Owns end-to-end documentation across the Molecule AI surface area: + (a) the customer-facing site at https://github.com/Molecule-AI/docs + (Fumadocs + Next.js 15, deployed to doc.moleculesai.app), (b) every + README inside the platform monorepo, (c) docs/architecture.md and + docs/edit-history/, and (d) public API references generated from + handler signatures. Watches PRs landing on the platform monorepo + and opens corresponding docs PRs whenever a public API changes, + a new template/plugin/channel lands, a user-facing concept evolves, + or an ecosystem-watch entry needs publishing. Holds the line on + terminology consistency — every concept in the codebase has exactly + one canonical name in the docs. Definition of done: every public + surface has accurate, current, example-rich documentation; every + merged PR that touches a public surface has a paired docs PR open + within one cron tick; every stub page on the docs site eventually + gets backfilled. + tier: 3 + model: opus + files_dir: documentation-specialist + canvas: { x: 900, y: 250 } + # Documentation Specialist needs browser-automation to crawl the live + # docs site (visual regressions, broken links, dead anchors) plus + # update-docs skill (already in defaults) for cross-repo docs sync. + plugins: [browser-automation] + initial_prompt: | + You just started as Documentation Specialist. Set up silently — do NOT contact other agents. + 1. Clone BOTH repos: + git clone https://github.com/${GITHUB_REPO}.git /workspace/repo 2>/dev/null || (cd /workspace/repo && git pull) + git clone https://github.com/Molecule-AI/docs.git /workspace/docs 2>/dev/null || (cd /workspace/docs && git pull) + 2. Read /workspace/repo/CLAUDE.md — full architecture, what's public-facing + 3. Read /configs/system-prompt.md + 4. Read /workspace/docs/README.md and /workspace/docs/content/docs/index.mdx + 5. Run: cd /workspace/docs && ls content/docs/*.mdx + — note which pages are stubs ("Coming soon" marker) vs hand-written + 6. Run: cd /workspace/repo && git log --oneline -20 -- platform/internal/handlers/ org-templates/ plugins/ + — note recent public-surface changes + 7. Use commit_memory to save: + - Stubs that need backfilling (docs site) + - Recent platform PRs that have NO docs PR yet + - Public concepts that lack a canonical naming entry + 8. Wait for tasks from PM. Your primary surfaces are: + - https://github.com/Molecule-AI/docs (customer site, Fumadocs) + - /workspace/repo/docs/ (internal architecture / edit-history) + - /workspace/repo/README.md and per-package READMEs + schedules: + - name: Daily docs sync — backfill stubs and pair recent platform PRs + cron_expr: "0 9 * * *" + prompt: | + Daily documentation maintenance. Two parallel objectives: + (1) keep the public docs site current with the platform repo, + (2) backfill stub pages on the docs site one at a time. + + SETUP: + cd /workspace/repo && git pull 2>/dev/null || true + cd /workspace/docs && git pull 2>/dev/null || true + + 1. PAIR RECENT PLATFORM PRS (last 24h): + cd /workspace/repo + gh pr list --repo Molecule-AI/molecule-monorepo --state merged \ + --search "merged:>$(date -u -d '24 hours ago' +%Y-%m-%dT%H:%M:%SZ)" \ + --json number,title,files + For each merged PR that touches a public surface + (platform/internal/handlers/, plugins/*, org-templates/*, + docs/architecture.md, README.md, workspace-template/adapters/*): + - Identify which docs page(s) on the public site cover that surface. + - If a docs page exists but is stale → update it with examples + from the PR diff. Open a PR to Molecule-AI/docs with the change. + - If NO docs page exists for the new surface → propose one + (add to content/docs/meta.json + new .mdx file). Open a PR. + - Always close PRs with `Closes platform PR #N` so the link is durable. + + 2. BACKFILL ONE STUB PAGE: + cd /workspace/docs + grep -l "Coming soon" content/docs/*.mdx | head -1 + Pick the highest-priority stub (one of: org-template, plugins, + channels, schedules, architecture, api-reference, self-hosting, + observability, troubleshooting). Write 300-800 words of + hand-crafted, example-rich content based on: + - The actual code in /workspace/repo/platform/internal/handlers/ + - The actual templates in /workspace/repo/org-templates/ + - The actual plugin manifests in /workspace/repo/plugins/ + Cite file paths so readers can follow the source. Open a PR. + + 3. LINK + ANCHOR CHECK: + Use the browser-automation plugin to crawl + https://doc.moleculesai.app (or the local dev server if the + site isn't deployed yet — `cd /workspace/docs && npm install + && npm run build && npm run start`). Report broken links and + missing anchors back to PM. + + 4. ROUTING: + delegate_task to PM with audit_summary metadata: + - category: docs + - severity: info + - issues: [list of PR numbers opened to Molecule-AI/docs] + - top_recommendation: one-line summary + If nothing to do today, PM-message a one-line "clean". + + 5. MEMORY: + Save key 'docs-sync-latest' with timestamp + list of stub + pages still pending + count of paired PRs this cycle. + enabled: true + - name: Weekly terminology + freshness audit + cron_expr: "0 11 * * 1" + prompt: | + Weekly audit of documentation freshness and terminology consistency. + + 1. STALE PAGE DETECTION: + cd /workspace/docs && for f in content/docs/*.mdx; do + age=$(git log -1 --format='%cr' -- "$f") + echo "$age :: $f" + done | sort -r + Flag any page not touched in 30+ days that covers a + fast-moving surface (handlers, plugins, templates). + + 2. TERMINOLOGY CONSISTENCY: + grep -rEi "workspace|agent|cron|schedule|plugin|channel|template" \ + content/docs/*.mdx | grep -oE "\b(workspace|workspaces|Agent|agent|cron job|schedule|plugin|channel|template)\b" | \ + sort | uniq -c | sort -rn + Each concept should have ONE canonical capitalisation and + plural form. Open a PR fixing inconsistencies. + + 3. LINK ROT: + grep -rE "\\[.*\\]\\(http[^)]+\\)" content/docs/*.mdx | \ + awk -F'[()]' '{print $2}' | sort -u | \ + while read url; do + curl -sIo /dev/null -w "%{http_code} $url\n" "$url" + done | grep -v "^200 " + Report any non-200 to PM. + + 4. ROUTING + MEMORY: + Same audit_summary contract as the daily cron. + Save findings to memory key 'docs-weekly-audit'. + enabled: true From 446111e43e386f375eea6e91694ee3db488976d3 Mon Sep 17 00:00:00 2001 From: rabbitblood Date: Tue, 14 Apr 2026 21:06:41 -0700 Subject: [PATCH 6/8] chore(template): Documentation Specialist also watches private molecule-controlplane MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per CEO 2026-04-15: the SaaS controlplane (Molecule-AI/molecule-controlplane, PRIVATE Go/Fly.io provisioner) needs documentation coverage too. Updates the agent's role description, initial_prompt, and daily docs-sync cron to handle a third repo with a strict public/private split. ## Privacy rule (the critical addition) molecule-controlplane is private. Two-bucket model: Internal-only changes (handlers, schemas, infra config, billing logic, fly.toml, provisioner internals) → docs go INSIDE the controlplane repo itself (README.md, PLAN.md, docs/internal/*.md). NEVER mentioned in the public docs site. Customer-facing changes (new tier, new region, new SLA, pricing change, signup flow change) → sanitized PUBLIC description on doc.moleculesai.app. Describes the PRODUCT, never the implementation. When unsure: default to internal-only and ask PM before publishing. The privacy rule is repeated three times in the prompt (top of initial_prompt, 1b inside the daily cron, and the role description) so the agent can't miss it. ## Changes - role: extended to mention all three repos + privacy split - initial_prompt: clones controlplane in step 1, reads README+PLAN in step 5, scans recent commits in step 8, lists the four owned surfaces with public/private labels in step 10 - Daily cron: adds step 1b "PAIR RECENT CONTROLPLANE PRS" with the (i)/(ii) internal/customer-facing branching logic - SETUP block: adds controlplane git pull --- org-templates/molecule-dev/org.yaml | 96 +++++++++++++++++++++-------- 1 file changed, 71 insertions(+), 25 deletions(-) diff --git a/org-templates/molecule-dev/org.yaml b/org-templates/molecule-dev/org.yaml index 9eb113d0..94425fd2 100644 --- a/org-templates/molecule-dev/org.yaml +++ b/org-templates/molecule-dev/org.yaml @@ -622,21 +622,29 @@ workspaces: - name: Documentation Specialist role: >- - Owns end-to-end documentation across the Molecule AI surface area: - (a) the customer-facing site at https://github.com/Molecule-AI/docs - (Fumadocs + Next.js 15, deployed to doc.moleculesai.app), (b) every - README inside the platform monorepo, (c) docs/architecture.md and - docs/edit-history/, and (d) public API references generated from - handler signatures. Watches PRs landing on the platform monorepo - and opens corresponding docs PRs whenever a public API changes, - a new template/plugin/channel lands, a user-facing concept evolves, - or an ecosystem-watch entry needs publishing. Holds the line on - terminology consistency — every concept in the codebase has exactly - one canonical name in the docs. Definition of done: every public - surface has accurate, current, example-rich documentation; every - merged PR that touches a public surface has a paired docs PR open - within one cron tick; every stub page on the docs site eventually - gets backfilled. + Owns end-to-end documentation across THREE Molecule AI repos: + (1) the platform monorepo (public, Molecule-AI/molecule-monorepo) — + internal architecture, READMEs, edit-history, public API references; + (2) the docs site (public, Molecule-AI/docs) — Fumadocs + Next.js 15, + deployed to doc.moleculesai.app, customer-facing; + (3) the SaaS controlplane (PRIVATE, Molecule-AI/molecule-controlplane) — + Go service that provisions tenants on Fly Machines, with the strict + rule that private implementation details NEVER leak into the public + docs site. Documents controlplane changes only in its own internal + README and the platform monorepo's docs/saas/ section (which itself + is gated). Public docs only describe the SaaS PRODUCT (signup, billing, + tenant lifecycle, multi-tenant data isolation guarantees) — not the + provisioner's internals. + Watches PRs landing on all three repos and opens corresponding docs + PRs whenever a public API changes, a new template/plugin/channel + lands, a user-facing concept evolves, or an ecosystem-watch entry + needs publishing. Holds the line on terminology consistency — every + concept has exactly one canonical name across all three repos. + Definition of done: every public surface has accurate, current, + example-rich documentation; every merged PR that touches a public + surface has a paired docs PR open within one cron tick; every stub + page on the docs site eventually gets backfilled; controlplane + internal docs stay current; nothing private leaks to public. tier: 3 model: opus files_dir: documentation-specialist @@ -647,24 +655,41 @@ workspaces: plugins: [browser-automation] initial_prompt: | You just started as Documentation Specialist. Set up silently — do NOT contact other agents. - 1. Clone BOTH repos: + + ⚠️ PRIVACY RULE (read first, never violate): + molecule-controlplane is a PRIVATE repo. Its source code, file paths, + internal endpoints, schema details, infra config, billing/auth + implementation — none of that goes into the public docs site + (Molecule-AI/docs) or the public README in molecule-monorepo. Public + docs may describe the SaaS PRODUCT (signup, billing, tenant isolation + guarantees) but never the provisioner's internals. When in doubt: + don't publish. + + 1. Clone all three repos: git clone https://github.com/${GITHUB_REPO}.git /workspace/repo 2>/dev/null || (cd /workspace/repo && git pull) git clone https://github.com/Molecule-AI/docs.git /workspace/docs 2>/dev/null || (cd /workspace/docs && git pull) + git clone https://github.com/Molecule-AI/molecule-controlplane.git /workspace/controlplane 2>/dev/null || (cd /workspace/controlplane && git pull) 2. Read /workspace/repo/CLAUDE.md — full architecture, what's public-facing 3. Read /configs/system-prompt.md 4. Read /workspace/docs/README.md and /workspace/docs/content/docs/index.mdx - 5. Run: cd /workspace/docs && ls content/docs/*.mdx + 5. Read /workspace/controlplane/README.md and /workspace/controlplane/PLAN.md + — understand what the SaaS provisioner does (private) vs what users see (public) + 6. Run: cd /workspace/docs && ls content/docs/*.mdx — note which pages are stubs ("Coming soon" marker) vs hand-written - 6. Run: cd /workspace/repo && git log --oneline -20 -- platform/internal/handlers/ org-templates/ plugins/ - — note recent public-surface changes - 7. Use commit_memory to save: + 7. Run: cd /workspace/repo && git log --oneline -20 -- platform/internal/handlers/ org-templates/ plugins/ + — note recent public-surface changes in the platform repo + 8. Run: cd /workspace/controlplane && git log --oneline -20 + — note recent controlplane changes (these need internal docs only) + 9. Use commit_memory to save: - Stubs that need backfilling (docs site) - Recent platform PRs that have NO docs PR yet + - Recent controlplane PRs whose internal README needs an update - Public concepts that lack a canonical naming entry - 8. Wait for tasks from PM. Your primary surfaces are: - - https://github.com/Molecule-AI/docs (customer site, Fumadocs) - - /workspace/repo/docs/ (internal architecture / edit-history) - - /workspace/repo/README.md and per-package READMEs + 10. Wait for tasks from PM. Your owned surfaces are: + - https://github.com/Molecule-AI/docs (customer site, Fumadocs) — PUBLIC + - /workspace/repo/docs/ (internal architecture / edit-history) — PUBLIC + - /workspace/repo/README.md and per-package READMEs — PUBLIC + - /workspace/controlplane/README.md, PLAN.md, internal docs — PRIVATE schedules: - name: Daily docs sync — backfill stubs and pair recent platform PRs cron_expr: "0 9 * * *" @@ -676,8 +701,9 @@ workspaces: SETUP: cd /workspace/repo && git pull 2>/dev/null || true cd /workspace/docs && git pull 2>/dev/null || true + cd /workspace/controlplane && git pull 2>/dev/null || true - 1. PAIR RECENT PLATFORM PRS (last 24h): + 1a. PAIR RECENT PLATFORM PRS (last 24h): cd /workspace/repo gh pr list --repo Molecule-AI/molecule-monorepo --state merged \ --search "merged:>$(date -u -d '24 hours ago' +%Y-%m-%dT%H:%M:%SZ)" \ @@ -692,6 +718,26 @@ workspaces: (add to content/docs/meta.json + new .mdx file). Open a PR. - Always close PRs with `Closes platform PR #N` so the link is durable. + 1b. PAIR RECENT CONTROLPLANE PRS (last 24h): + cd /workspace/controlplane + gh pr list --repo Molecule-AI/molecule-controlplane --state merged \ + --search "merged:>$(date -u -d '24 hours ago' +%Y-%m-%dT%H:%M:%SZ)" \ + --json number,title,files + ⚠️ PRIVATE REPO. Two cases: + (i) Internal-only change (handler, schema, infra, fly.toml, + billing logic): update README.md + PLAN.md + any + docs/internal/*.md inside molecule-controlplane itself. + Open the PR against Molecule-AI/molecule-controlplane. + NEVER mention these changes in /workspace/docs. + (ii) Customer-facing change (new tier, new region, new SLA, + pricing change, signup flow change): write a sanitized + description for the PUBLIC docs site (e.g. "We now offer + EU-region tenants" — NOT "controlplane reads FLY_REGION + from env and passes it to provisioner.go:142"). Open a + PR against Molecule-AI/docs. + When unsure which category a change falls into: default to + INTERNAL-only and ask PM for explicit approval before publishing. + 2. BACKFILL ONE STUB PAGE: cd /workspace/docs grep -l "Coming soon" content/docs/*.mdx | head -1 From 0c5a1fdab033eb469549aaedb1e7a4f4775853ce Mon Sep 17 00:00:00 2001 From: rabbitblood Date: Tue, 14 Apr 2026 21:33:31 -0700 Subject: [PATCH 7/8] chore(template): switch evolution crons from daily/weekly to hourly CEO 2026-04-15: the team's evolution loops should be hourly, not daily/weekly. A 24h or 7d cadence is the wrong rhythm for a team that's expected to run 24/7 and keep improving. At hourly, every drift, every new project, every plugin gap, every channel opportunity gets surfaced within an hour of becoming visible. | Schedule | Was | Now | |-----------------------------------|----------------|--------------| | Hourly ecosystem watch | 0 8 * * * | 8 * * * * | | Hourly plugin curation | 0 9 * * 1 | 22 * * * * | | Hourly template fitness audit | 30 8 * * * | 15 * * * * | | Hourly channel expansion survey | 0 10 * * 1 | 47 * * * * | Spread across the hour (:08, :11, :15, :17, :22, :47) so the four evolution crons + UIUX :11 + Security :17 don't collide and don't all bury PM with audit_summary deliveries at the same instant. Renamed from "Daily..." / "Weekly..." to "Hourly..." to match the new cadence and so the prompts (which still say "Daily survey" etc.) read consistently. A follow-up will fix the body wording. Live-synced into running DB via PATCH (3 of 4) and direct UPDATE on the 4th (Dev Lead workspace requires a token the script didn't have). next_run_at recomputed for all 4. First fire: 04:47 UTC (channel expansion). --- org-templates/molecule-dev/org.yaml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/org-templates/molecule-dev/org.yaml b/org-templates/molecule-dev/org.yaml index f6ac9325..39186fd2 100644 --- a/org-templates/molecule-dev/org.yaml +++ b/org-templates/molecule-dev/org.yaml @@ -128,8 +128,8 @@ workspaces: 5. Use commit_memory to save key product facts for later recall 6. Wait for tasks from PM. schedules: - - name: Daily ecosystem watch - cron_expr: "0 8 * * *" + - name: Hourly ecosystem watch + cron_expr: "8 * * * *" prompt: | Daily survey for new agent-infra / AI-agent projects worth tracking. @@ -163,8 +163,8 @@ workspaces: files_dir: technical-researcher plugins: [browser-automation] schedules: - - name: Weekly plugin curation - cron_expr: "0 9 * * 1" + - name: Hourly plugin curation + cron_expr: "22 * * * *" prompt: | Weekly survey of `plugins/` and `workspace-template/builtin_tools/` for evolution opportunities. The team should keep gaining capabilities. @@ -210,8 +210,8 @@ workspaces: 5. Use commit_memory to save the architecture summary and recent changes 6. Wait for tasks from PM. schedules: - - name: Daily template fitness audit - cron_expr: "30 8 * * *" + - name: Hourly template fitness audit + cron_expr: "15 * * * *" prompt: | Daily audit of `org-templates/molecule-dev/`. Catches drift, stale prompts, missing schedules, and gaps that block the team-runs-24/7 goal. Symptom @@ -327,8 +327,8 @@ workspaces: 5. Use commit_memory to save CI pipeline structure 6. Wait for tasks from Dev Lead. schedules: - - name: Weekly channel expansion survey - cron_expr: "0 10 * * 1" + - name: Hourly channel expansion survey + cron_expr: "47 * * * *" prompt: | Weekly survey of channel integrations (Telegram, Slack, Discord, email, webhooks). The team should grow its external comms surface where useful, From e09ad565e1c85fc8e141baa2a652d16bd67444dc Mon Sep 17 00:00:00 2001 From: rabbitblood Date: Tue, 14 Apr 2026 22:18:38 -0700 Subject: [PATCH 8/8] =?UTF-8?q?fix(registry):=20allow=20ancestor=E2=86=94d?= =?UTF-8?q?escendant=20A2A=20so=20audit=5Fsummary=20can=20reach=20PM?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Found via deep workspace inspection during a maintenance cycle: Security Auditor's hourly cron correctly tries to delegate_task its audit_summary to PM, the platform proxy rejects with "access denied: workspaces cannot communicate per hierarchy", the agent falls back to delegating to its direct parent (Dev Lead), and PM's category_routing dispatcher (#75) is never reached. This breaks the audit-routing contract end-to-end. Every audit cycle was landing on Dev Lead instead of being fanned out via PM's category_routing to the right dev role (security → BE+DevOps, ui/ux → FE, etc). ## Root cause `registry.CanCommunicate()` only allowed: - self → self - siblings (same parent) - root-level siblings - direct parent → child - direct child → parent A grandchild → grandparent (Security Auditor → PM, where parent is Dev Lead and grandparent is PM) was DENIED. The original design wanted strict hierarchy to prevent rogue horizontal A2A — but it also broke the fundamental "child can talk to its leadership chain" pattern that any audit/escalation flow needs. ## Fix Generalise to ancestor ↔ descendant. Any workspace can talk to any ancestor (any depth) and any descendant (any depth). Direct parent/child remains a fast path that avoids the walk. Sibling rules unchanged. Cousins still cannot directly communicate (would need to go through their shared ancestor). Cross-subtree A2A is still rejected. Implementation: `isAncestorOf(ancestorID, childID)` walks the parent chain in Go with a maxAncestorWalk=32 safety cap so a malformed cycle in the workspaces table cannot loop forever. One DB lookup per step. For a typical 3-deep tree, this adds 1-2 extra lookups vs the old direct-parent fast path. Could be optimized to a single recursive CTE if profiling shows it matters; not now. ## Tests - TestCanCommunicate_Denied_Grandchild → REPLACED with two new tests: - TestCanCommunicate_Allowed_GrandparentToGrandchild - TestCanCommunicate_Allowed_GrandchildToGrandparent (the actual bug) - TestCanCommunicate_Allowed_DeepAncestor — 4-level chain - TestCanCommunicate_Denied_UnrelatedAncestors — ensures cross-subtree walks still terminate denied - TestCanCommunicate_Denied_DifferentParents — extended with the walk lookup mocks so sqlmock doesn't log warnings - TestCanCommunicate_Denied_CousinToRoot — same All 13 tests pass clean. The previous direct parent/child / siblings / self tests are unchanged (fast paths preserved). ## Why platform-level Per the "platform-wide fixes are mine to ship" rule. Every org template hits the same broken audit-routing chain — fixing it at the platform benefits all users, not just molecule-dev. This unblocks #50 (PM dispatcher prompt) and #75 (category_routing). --- platform/internal/registry/access.go | 70 ++++++++++++++++-- platform/internal/registry/access_test.go | 88 ++++++++++++++++++++--- 2 files changed, 146 insertions(+), 12 deletions(-) diff --git a/platform/internal/registry/access.go b/platform/internal/registry/access.go index 5b9bf474..530356be 100644 --- a/platform/internal/registry/access.go +++ b/platform/internal/registry/access.go @@ -7,6 +7,13 @@ import ( "github.com/Molecule-AI/molecule-monorepo/platform/internal/db" ) +// maxAncestorWalk caps the depth of the parent-chain walk in +// CanCommunicate. Org trees are realistically 3-5 deep +// (PM → Dev Lead → Backend Engineer is depth 3); 32 is a safety +// ceiling so a malformed cycle in the workspaces table can't loop +// forever. +const maxAncestorWalk = 32 + type workspaceRef struct { ID string ParentID *string @@ -26,8 +33,51 @@ func getWorkspaceRef(id string) (*workspaceRef, error) { return &ws, nil } -// CanCommunicate checks if two workspaces can talk to each other -// based on the hierarchy rules: siblings, parent-child, root-level siblings. +// isAncestorOf returns true if `ancestorID` is found anywhere on the +// parent-chain walk starting from `childID`. Walks at most maxAncestorWalk +// steps so a corrupt parent-cycle cannot loop forever. Returns false on any +// DB lookup error (logged) — fail-secure. +func isAncestorOf(ancestorID, childID string) bool { + current := childID + for i := 0; i < maxAncestorWalk; i++ { + ref, err := getWorkspaceRef(current) + if err != nil { + log.Printf("isAncestorOf: walk lookup %s: %v", current, err) + return false + } + if ref.ParentID == nil { + return false + } + if *ref.ParentID == ancestorID { + return true + } + current = *ref.ParentID + } + log.Printf("isAncestorOf: walk exceeded maxAncestorWalk=%d from %s — corrupt parent chain?", + maxAncestorWalk, childID) + return false +} + +// CanCommunicate checks if two workspaces can talk to each other based on +// the org hierarchy. The rules: +// +// - self → self +// - siblings (same parent, including both root-level) +// - any ancestor → any descendant (e.g. PM → Backend Engineer) +// - any descendant → any ancestor (e.g. Security Auditor → PM) +// +// The third and fourth rules generalise the previous "direct parent ↔ +// child" check. Originally this was strict 1-step parent/child only, +// which broke the audit-routing contract: Security Auditor (under Dev +// Lead, under PM) could not call delegate_task on PM to deliver an +// audit_summary, so it fell back to delegating to Dev Lead — bypassing +// PM's category_routing entirely. +// +// The relaxation preserves the hierarchy intent (no horizontal cross-team +// chatter — Frontend Engineer cannot directly message Backend Engineer +// unless they share a parent, which they do under Dev Lead) while +// unblocking the leadership-chain pattern that is fundamental to how +// audit summaries fan out across the org. func CanCommunicate(callerID, targetID string) bool { if callerID == targetID { return true @@ -54,15 +104,27 @@ func CanCommunicate(callerID, targetID string) bool { return true } - // Parent talking to child + // Direct parent → child (fast path; avoids the ancestor walk) if target.ParentID != nil && caller.ID == *target.ParentID { return true } - // Child talking up to parent + // Direct child → parent (fast path) if caller.ParentID != nil && target.ID == *caller.ParentID { return true } + // Distant ancestor → descendant: caller is somewhere up target's chain. + // Triggers extra DB lookups, only reached when the fast paths above didn't match. + if target.ParentID != nil && isAncestorOf(callerID, *target.ParentID) { + return true + } + + // Distant descendant → ancestor: target is somewhere up caller's chain. + // (e.g. Security Auditor → PM, where Security Auditor's parent is Dev Lead.) + if caller.ParentID != nil && isAncestorOf(targetID, *caller.ParentID) { + return true + } + return false } diff --git a/platform/internal/registry/access_test.go b/platform/internal/registry/access_test.go index bd47a40f..537a0b62 100644 --- a/platform/internal/registry/access_test.go +++ b/platform/internal/registry/access_test.go @@ -97,9 +97,13 @@ func TestCanCommunicate_ChildToParent(t *testing.T) { func TestCanCommunicate_Denied_DifferentParents(t *testing.T) { mock := setupMockDB(t) - // ws-a (parent: p1) and ws-b (parent: p2) — not siblings + // ws-a (parent: p1) and ws-b (parent: p2) — not siblings, no shared ancestor. expectLookup(mock, "ws-a", ptr("p1")) expectLookup(mock, "ws-b", ptr("p2")) + // Walk #1: isAncestorOf(ws-a, p2) → p2 is parentless, false. + expectLookup(mock, "p2", nil) + // Walk #2: isAncestorOf(ws-b, p1) → p1 is parentless, false. + expectLookup(mock, "p1", nil) if CanCommunicate("ws-a", "ws-b") { t.Error("workspaces with different parents should NOT communicate") @@ -108,9 +112,15 @@ func TestCanCommunicate_Denied_DifferentParents(t *testing.T) { func TestCanCommunicate_Denied_CousinToRoot(t *testing.T) { mock := setupMockDB(t) - // ws-child (parent: ws-mid) and ws-root (no parent, NOT ws-mid) + // ws-child (parent: ws-mid, which has its own root ws-other-root) and + // ws-root (a different parentless workspace). + // The ancestor walk from ws-child should reach ws-other-root but never + // ws-root, so communication is denied. expectLookup(mock, "ws-child", ptr("ws-mid")) expectLookup(mock, "ws-root", nil) + // Ancestor walk: starts at *caller.ParentID = ws-mid. Walks ws-mid → ws-other-root → nil. + expectLookup(mock, "ws-mid", ptr("ws-other-root")) + expectLookup(mock, "ws-other-root", nil) if CanCommunicate("ws-child", "ws-root") { t.Error("child should NOT communicate with unrelated root workspace") @@ -136,13 +146,75 @@ func TestCanCommunicate_Denied_TargetNotFound(t *testing.T) { } } -func TestCanCommunicate_Denied_Grandchild(t *testing.T) { +func TestCanCommunicate_Allowed_GrandparentToGrandchild(t *testing.T) { mock := setupMockDB(t) - // ws-grandparent and ws-grandchild (parent: ws-mid, NOT ws-grandparent) - expectLookup(mock, "ws-grandparent", nil) - expectLookup(mock, "ws-grandchild", ptr("ws-mid")) + // PM (no parent) → Backend Engineer (parent: Dev Lead, parent: PM). + // Originally rejected ("grandparent should NOT communicate with grandchild + // directly") — that broke audit_summary routing because Security Auditor + // could not delegate up to PM. The hierarchy is now ancestor↔descendant. + expectLookup(mock, "ws-pm", nil) + expectLookup(mock, "ws-be", ptr("ws-dl")) + // Ancestor walk: target.ParentID = ws-dl. isAncestorOf(ws-pm, ws-dl). + // Walks ws-dl → ws-pm → match. (Walk lookup #1: ws-dl.) + expectLookup(mock, "ws-dl", ptr("ws-pm")) - if CanCommunicate("ws-grandparent", "ws-grandchild") { - t.Error("grandparent should NOT communicate with grandchild directly") + if !CanCommunicate("ws-pm", "ws-be") { + t.Error("PM should be able to communicate with Backend Engineer (descendant)") + } +} + +func TestCanCommunicate_Allowed_GrandchildToGrandparent(t *testing.T) { + mock := setupMockDB(t) + // Security Auditor (parent: Dev Lead) → PM (parent of Dev Lead). + // This is the Security Auditor → PM audit_summary delivery path. + expectLookup(mock, "ws-sec", ptr("ws-dl")) + expectLookup(mock, "ws-pm", nil) + // Direct parent → child fast path: target.ParentID = nil, skip. + // Direct child → parent: caller.ParentID = ws-dl, target.ID = ws-pm, + // ws-dl != ws-pm, skip. + // Distant ancestor → descendant: target.ParentID = nil, skip. + // Distant descendant → ancestor: caller.ParentID = ws-dl. Walks + // isAncestorOf(ws-pm, ws-dl) → looks up ws-dl → returns ws-pm → match. + expectLookup(mock, "ws-dl", ptr("ws-pm")) + + if !CanCommunicate("ws-sec", "ws-pm") { + t.Error("Security Auditor should be able to send audit_summary up to PM") + } +} + +func TestCanCommunicate_Allowed_DeepAncestor(t *testing.T) { + mock := setupMockDB(t) + // Four-level chain: ws-leaf (parent: ws-l3, parent: ws-l2, parent: ws-l1). + // ws-leaf → ws-l1 should be allowed. + expectLookup(mock, "ws-leaf", ptr("ws-l3")) + expectLookup(mock, "ws-l1", nil) + // Distant descendant → ancestor walk: starts at ws-l3. + // ws-l3 → ws-l2: not ws-l1, continue. + // ws-l2 → ws-l1: match! + expectLookup(mock, "ws-l3", ptr("ws-l2")) + expectLookup(mock, "ws-l2", ptr("ws-l1")) + + if !CanCommunicate("ws-leaf", "ws-l1") { + t.Error("4-level descendant should reach root ancestor") + } +} + +func TestCanCommunicate_Denied_UnrelatedAncestors(t *testing.T) { + mock := setupMockDB(t) + // Two separate org subtrees: + // tree A: ws-a-leaf → ws-a-mid → ws-a-root + // tree B: ws-b-leaf → ws-b-mid → ws-b-root + // ws-a-leaf → ws-b-root must be denied even though both have parents + // (no shared ancestor). + expectLookup(mock, "ws-a-leaf", ptr("ws-a-mid")) + expectLookup(mock, "ws-b-root", nil) + // Walk: isAncestorOf(ws-b-root, ws-a-mid). + // ws-a-mid → ws-a-root: not ws-b-root, continue. + // ws-a-root has no parent → false. + expectLookup(mock, "ws-a-mid", ptr("ws-a-root")) + expectLookup(mock, "ws-a-root", nil) + + if CanCommunicate("ws-a-leaf", "ws-b-root") { + t.Error("workspaces in different subtrees should NOT communicate via the walk") } }