From e906f49ec08e04e60456348f6787205b46af8e13 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Sat, 18 Apr 2026 00:10:56 -0700 Subject: [PATCH] =?UTF-8?q?chore:=20open-source=20preparation=20=E2=80=94?= =?UTF-8?q?=20scrub=20secrets,=20add=20community=20files?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Security: - Replace hardcoded Cloudflare account/zone/KV IDs in wrangler.toml with placeholders; add wrangler.toml to .gitignore, ship .example - Replace real EC2 IPs in docs with placeholders - Redact partial CF API token prefix in retrospective - Parameterize Langfuse dev credentials in docker-compose.infra.yml - Replace Neon project ID in runbook with Community: - Add CONTRIBUTING.md (build, test, branch conventions, CI info) - Add CODE_OF_CONDUCT.md (Contributor Covenant 2.1) Cleanup: - Replace personal runner username/machine name in CI + PLAN.md - Replace personal tenant URL in MCP setup guide - Replace personal author field in bundle-system doc - Replace personal login in webhook test fixture - Rewrite cryptominer incident reference as generic security remediation - Remove private repo commit hashes from PLAN.md Co-Authored-By: Claude Opus 4.6 (1M context) --- .github/workflows/ci.yml | 4 +- .gitignore | 3 + CODE_OF_CONDUCT.md | 41 +++++ CONTRIBUTING.md | 158 ++++++++++++++++++ PLAN.md | 10 +- docker-compose.infra.yml | 10 +- docs/agent-runtime/bundle-system.md | 2 +- docs/architecture/tenant-image-upgrades.md | 2 +- docs/architecture/wildcard-dns-proxy.md | 2 +- docs/guides/mcp-server-setup.md | 2 +- .../2026-04-17-saas-buildout.md | 8 +- .../2026-04-18-tunnel-migration.md | 6 +- docs/runbooks/saas-secrets.md | 50 +++--- infra/cloudflare-worker/wrangler.toml | 7 +- infra/cloudflare-worker/wrangler.toml.example | 20 +++ .../handlers/webhooks_workflow_test.go | 2 +- 16 files changed, 275 insertions(+), 52 deletions(-) create mode 100644 CODE_OF_CONDUCT.md create mode 100644 CONTRIBUTING.md create mode 100644 infra/cloudflare-worker/wrangler.toml.example diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 73e5a7a1..846991c5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -152,7 +152,7 @@ jobs: Once it completes (~3–5 min), apply on the host machine with: ```bash - cd /g/personal_programs/molecule-monorepo + cd git pull origin main docker compose pull canvas && docker compose up -d canvas ``` @@ -182,7 +182,7 @@ jobs: steps: - uses: actions/checkout@v4 # setup-python@v5 cannot write to /Users/runner (GitHub-hosted path) on - # the self-hosted macOS arm64 runner (user: hongming-claw) and also hits + # the self-hosted macOS arm64 runner (user: ) and also hits # EACCES on /usr/local/bin due to macOS SIP. Skip it — Homebrew installs # Python 3.11 at /opt/homebrew/opt/python@3.11 which is already on PATH. - name: Verify Python 3.11 (Homebrew) diff --git a/.gitignore b/.gitignore index f665de99..8789c606 100644 --- a/.gitignore +++ b/.gitignore @@ -122,6 +122,9 @@ org-templates/**/.auth-token .secrets/ *.pem +# Cloudflare Worker config with real account/zone/KV IDs — use wrangler.toml.example instead +infra/cloudflare-worker/wrangler.toml + # Cloned-via-manifest dirs — populated locally by scripts/clone-manifest.sh, # tracked in their own standalone repos. Never commit to core. # Ignore all cloned org-template content except the molecule-dev reference diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..2e33a79e --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,41 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, religion, or sexual identity and +orientation. + +## Our Standards + +Examples of behavior that contributes to a positive environment: + +- Using welcoming and inclusive language +- Being respectful of differing viewpoints and experiences +- Gracefully accepting constructive criticism +- Focusing on what is best for the community +- Showing empathy towards other community members + +Examples of unacceptable behavior: + +- The use of sexualized language or imagery and unwelcome sexual attention +- Trolling, insulting/derogatory comments, and personal or political attacks +- Public or private harassment +- Publishing others' private information without explicit permission +- Other conduct which could reasonably be considered inappropriate + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the project maintainers at **hongmingwangrabbit@gmail.com**. + +All complaints will be reviewed and investigated and will result in a response +that is deemed necessary and appropriate to the circumstances. + +## Attribution + +This Code of Conduct is adapted from the +[Contributor Covenant](https://www.contributor-covenant.org), version 2.1. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..9dd0b9d5 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,158 @@ +# Contributing to Molecule AI + +Thanks for your interest in contributing to Molecule AI! This guide covers the +development workflow, conventions, and how to get your changes merged. + +## Getting Started + +### Prerequisites + +- **Go 1.25+** — platform backend +- **Node.js 20+** — canvas frontend +- **Python 3.11+** — workspace runtime +- **Docker** — infrastructure services (Postgres, Redis) +- **Git** — with hooks path set to `.githooks` + +### Setup + +```bash +# Clone the repo +git clone https://github.com/Molecule-AI/molecule-monorepo.git +cd molecule-monorepo + +# Install git hooks +git config core.hooksPath .githooks + +# Start infrastructure (Postgres, Redis, Langfuse, Temporal) +./infra/scripts/setup.sh + +# Build and run the platform +cd platform +go run ./cmd/server + +# In a separate terminal, run the canvas +cd canvas +npm install +npm run dev +``` + +### Environment Variables + +Copy `.env.example` to `.env` and fill in your values: +```bash +cp .env.example .env +``` + +See `CLAUDE.md` for a full list of environment variables and their purposes. + +## Development Workflow + +### Branch Naming + +Use prefixed branches: +- `feat/` — new features +- `fix/` — bug fixes +- `chore/` — maintenance, deps, CI +- `docs/` — documentation only + +**Never push directly to `main`.** All changes go through pull requests. + +### Commits + +Write concise commit messages that focus on the "why": +``` +fix(canvas): prevent infinite re-render on WebSocket reconnect + +The useEffect dependency array included the entire nodes object, +causing a render loop when any node position changed. +``` + +### Pull Requests + +- Keep PRs focused — one concern per PR +- Include a test plan in the PR description +- PRs are merged with **merge commits** (not squash or rebase) + +### Running Tests + +```bash +# Go (platform) +cd platform && go test -race ./... + +# Canvas (Next.js) +cd canvas && npm test + +# Workspace runtime (Python) +cd workspace-template && python -m pytest -v + +# E2E API tests (requires running platform) +bash tests/e2e/test_api.sh +``` + +### Pre-commit Hooks + +The `.githooks/pre-commit` hook enforces: +- `'use client'` directive on React hook files +- Dark theme only (no white/light CSS classes) +- No SQL injection patterns (`fmt.Sprintf` with SQL) +- No leaked secrets (`sk-ant-`, `ghp_`, `AKIA`) + +Fix violations before committing — the hook will reject the commit. + +### CI Pipeline + +CI runs on GitHub Actions with a self-hosted runner. External contributors: +PRs from forks will not trigger CI automatically. A maintainer will review +and run CI manually. + +| Job | What it checks | +|-----|---------------| +| platform-build | Go build + vet + `go test -race` | +| canvas-build | npm build + vitest | +| python-lint | pytest with coverage | +| e2e-api | Full API test suite (62 tests) | +| shellcheck | Shell script linting | + +## Code Style + +### Go (Platform) +- Standard `gofmt` formatting +- `go vet` must pass +- No `fmt.Sprintf` in SQL queries (use parameterized queries) +- Prefer function injection over import cycles + +### TypeScript (Canvas) +- Strict mode enabled +- No `any` types (use `unknown` or proper types) +- Use `ConfirmDialog` component, never native `confirm/alert/prompt` +- Dark theme only — no white/light CSS classes + +### Python (Workspace Runtime) +- Type hints on public functions +- pytest for all tests + +## Architecture Overview + +See `CLAUDE.md` for detailed architecture documentation, including: +- Component diagram (Platform, Canvas, Workspace Runtime) +- Key architectural patterns +- Database schema and migrations +- API route reference + +## Reporting Issues + +Use GitHub Issues with a clear title and reproduction steps. Include: +- What you expected +- What actually happened +- Platform/OS version +- Relevant logs or screenshots + +## Security + +If you discover a security vulnerability, please report it privately via +GitHub Security Advisories rather than opening a public issue. + +## License + +By contributing, you agree that your contributions will be licensed under the +same [Business Source License 1.1](LICENSE) that covers this project. diff --git a/PLAN.md b/PLAN.md index 3fb5c9aa..87123a32 100644 --- a/PLAN.md +++ b/PLAN.md @@ -270,10 +270,10 @@ point for "what else is out there." - `#236` log-injection in the #209 security-event log line — attacker-controlled source_id echoed via `%s` allowed fake log entries; switched to `%q`. **CI / infra.** -- `#186` + controlplane `#28` — every CI job migrated from `ubuntu-latest` to `[self-hosted, macos, arm64]` (Mac mini `hongming-m1-mini`). Non-trivial: `services:` replaced with inline `docker run` containers (ports 15432/16379), `actions/setup-python` bypassed via Homebrew python3.11 on `$GITHUB_PATH`, `docker/setup-qemu-action` added for cross-arch builds. Workaround for GH Actions billing cap on private repos. +- `#186` + controlplane `#28` — every CI job migrated from `ubuntu-latest` to `[self-hosted, macos, arm64]` (Mac mini `self-hosted-runner`). Non-trivial: `services:` replaced with inline `docker run` containers (ports 15432/16379), `actions/setup-python` bypassed via Homebrew python3.11 on `$GITHUB_PATH`, `docker/setup-qemu-action` added for cross-arch builds. Workaround for GH Actions billing cap on private repos. - `#149` independent heartbeat pulse goroutine so long cron fires don't look stale on `/admin/liveness` (#140) - `#211` migration runner regression (see #212 above — PR #212 is the fix) -- **Fly registry `FLY_API_TOKEN`** rotated to a deploy token scoped to `molecule-tenant` (previously personal token, invalidated by `flyctl auth login` during the malware cleanup) +- **Fly registry `FLY_API_TOKEN`** rotated to a deploy token scoped to `molecule-tenant` (previously personal token, was rotated during the security incident remediation) **Platform / Scheduler reliability.** - `#95` panic-recover in scheduler `tick()` + per-fire goroutines (closes #85) @@ -308,10 +308,10 @@ point for "what else is out there." **Outstanding (user action):** `#126` Slack adapter (Phase-H product decision), `#160` Claude Max OAuth quota (wait for 2026-04-17 23:00Z reset OR upgrade OR switch to ANTHROPIC_API_KEY), `#191` runner persistent-state docs (P3), `#199` Fly registry token (**resolved** this session but publish-platform-image re-run pending runner), Stripe Atlas application (launch blocker, 2-week lead). ### Recently launched (2026-04-15 tick-9) -- **Phase 32 Phase B.2 (image pipeline)** — PR #80 (merged `c3cc8e87`) adds `.github/workflows/publish-platform-image.yml`: on every main-merge touching `platform/**`, builds `platform/Dockerfile` and pushes `ghcr.io/molecule-ai/platform:latest` + `:sha-` to GHCR. Paired with the private `molecule-controlplane` Fly + Neon provisioner (PR #3 there, merged `2e85d5ad`) that reads `TENANT_IMAGE` env and boots tenant Fly Machines from this image. Tick-8 docs-sync PR #79 (merged `d53a1287`) also landed. +- **Phase 32 Phase B.2 (image pipeline)** — PR #80 adds `.github/workflows/publish-platform-image.yml`: on every main-merge touching `platform/**`, builds `platform/Dockerfile` and pushes `ghcr.io/molecule-ai/platform:latest` + `:sha-` to GHCR. Paired with the private `molecule-controlplane` Fly + Neon provisioner (PR #3 there) that reads `TENANT_IMAGE` env and boots tenant Fly Machines from this image. Tick-8 docs-sync PR #79 also landed. ### Recently launched (2026-04-14 tick-8) -- **Phase 32 PR #1** — `TenantGuard` middleware (PR #78, merged `57a05686`). Public repo's only SaaS hook: when `MOLECULE_ORG_ID` env is set, non-allowlisted requests require matching `X-Molecule-Org-Id` header or 404. Unset → passthrough (self-hosted unchanged). Allowlist is exact-match: `/health` + `/metrics`. Paired with the private `Molecule-AI/molecule-controlplane` repo scaffolded this tick (Fly Machines provisioner stub, `/cp/orgs` CRUD, subdomain→fly-replay router, migrations 001-003 for `organizations`/`org_instances`/`org_members`). +6 `TestTenantGuard_*` tests. Phase 32 plan: follow-up PRs wire real Fly provisioner, WorkOS AuthKit, Stripe, Cloudflare, signup UX — all in the private repo except the single public middleware. +- **Phase 32 PR #1** — `TenantGuard` middleware (PR #78). Public repo's only SaaS hook: when `MOLECULE_ORG_ID` env is set, non-allowlisted requests require matching `X-Molecule-Org-Id` header or 404. Unset → passthrough (self-hosted unchanged). Allowlist is exact-match: `/health` + `/metrics`. Paired with the private `Molecule-AI/molecule-controlplane` repo scaffolded this tick (Fly Machines provisioner stub, `/cp/orgs` CRUD, subdomain→fly-replay router, migrations 001-003 for `organizations`/`org_instances`/`org_members`). +6 `TestTenantGuard_*` tests. Phase 32 plan: follow-up PRs wire real Fly provisioner, WorkOS AuthKit, Stripe, Cloudflare, signup UX — all in the private repo except the single public middleware. ### Recently launched (2026-04-14 tick-7) - **GitHub issue #24** — Runtime-added workspace_schedules drift on org re-import → **DONE** via PR #76 (new `source` column on `workspace_schedules` via migration `022`; org/import now upserts with `ON CONFLICT (workspace_id, name) DO UPDATE ... WHERE source='template'`, so runtime-added rows survive re-imports; legacy rows backfilled to `'template'`; +3 tests). @@ -443,7 +443,7 @@ self-hosted per-customer). Ordered by dependency + ROI. - Migration runner safety fix landed (#212) — `*.down.sql` filter; was wiping `workspace_auth_tokens` on every restart - Workspace auth tokens now revoked on workspace delete (#110) - All known unauth admin routes gated; #138 canvas regression resolved via field-level authz + `CanvasOrBearer` middleware -- Self-hosted Mac mini CI runner replaced GH-hosted Linux to bypass private-repo Actions billing cap; `FLY_API_TOKEN` rotated to a deploy token scoped to `molecule-tenant` after the personal token was invalidated by `flyctl auth login` during the 2025-12-06 cryptominer cleanup +- Self-hosted Mac mini CI runner replaced GH-hosted Linux to bypass private-repo Actions billing cap; `FLY_API_TOKEN` rotated to a deploy token scoped to `molecule-tenant` after the token was rotated during the security incident remediation - `/legal/{terms,privacy,dpa,acceptable}` live at `https://app.moleculesai.app/legal/*` **Known open issues on the live system:** diff --git a/docker-compose.infra.yml b/docker-compose.infra.yml index 9237ebf2..d6ce7392 100644 --- a/docker-compose.infra.yml +++ b/docker-compose.infra.yml @@ -56,7 +56,7 @@ services: environment: CLICKHOUSE_DB: langfuse CLICKHOUSE_USER: langfuse - CLICKHOUSE_PASSWORD: langfuse + CLICKHOUSE_PASSWORD: ${CLICKHOUSE_PASSWORD:-langfuse-dev} volumes: - clickhousedata:/var/lib/clickhouse healthcheck: @@ -106,13 +106,13 @@ services: condition: service_completed_successfully environment: DATABASE_URL: postgres://${POSTGRES_USER:-dev}:${POSTGRES_PASSWORD:-dev}@postgres:5432/langfuse - CLICKHOUSE_URL: clickhouse://langfuse:langfuse@clickhouse:9000/langfuse + CLICKHOUSE_URL: clickhouse://langfuse:${CLICKHOUSE_PASSWORD:-langfuse-dev}@clickhouse:9000/langfuse CLICKHOUSE_USER: langfuse - CLICKHOUSE_PASSWORD: langfuse + CLICKHOUSE_PASSWORD: ${CLICKHOUSE_PASSWORD:-langfuse-dev} LANGFUSE_AUTO_CLICKHOUSE_MIGRATION_DISABLED: "true" - NEXTAUTH_SECRET: langfuse-dev-secret + NEXTAUTH_SECRET: ${LANGFUSE_SECRET:-changeme-langfuse-secret} NEXTAUTH_URL: http://localhost:3001 - SALT: langfuse-dev-salt + SALT: ${LANGFUSE_SALT:-changeme-langfuse-salt} ports: - "3001:3000" diff --git a/docs/agent-runtime/bundle-system.md b/docs/agent-runtime/bundle-system.md index 448e6d69..3c74450a 100644 --- a/docs/agent-runtime/bundle-system.md +++ b/docs/agent-runtime/bundle-system.md @@ -35,7 +35,7 @@ A workspace bundle is the portable unit of the platform. It is a single `.bundle }, "sub_workspaces": [], "agent_card": { "...": "A2A card snapshot" }, - "author": "hongming", + "author": "your-name", "version": "1.2.0" } ``` diff --git a/docs/architecture/tenant-image-upgrades.md b/docs/architecture/tenant-image-upgrades.md index ad6f6778..40ae03cc 100644 --- a/docs/architecture/tenant-image-upgrades.md +++ b/docs/architecture/tenant-image-upgrades.md @@ -109,7 +109,7 @@ Upgrade flow: 6. Next upgrade: blue becomes the new slot Worker routing: - KV key: "hongming2" → {"ip": "3.144.193.40", "port": 8081} + KV key: "example-org" → {"ip": "", "port": 8081} (port defaults to 8080 when not in KV) ``` diff --git a/docs/architecture/wildcard-dns-proxy.md b/docs/architecture/wildcard-dns-proxy.md index b29646e7..e9a43fe8 100644 --- a/docs/architecture/wildcard-dns-proxy.md +++ b/docs/architecture/wildcard-dns-proxy.md @@ -125,7 +125,7 @@ New public endpoint (no auth — needed by the Worker which has no session): { "slug": "acme", "status": "running", - "ip": "18.220.182.88", + "ip": "", "region": "us-east-2" } diff --git a/docs/guides/mcp-server-setup.md b/docs/guides/mcp-server-setup.md index c235b6ab..28a218b8 100644 --- a/docs/guides/mcp-server-setup.md +++ b/docs/guides/mcp-server-setup.md @@ -33,7 +33,7 @@ Add to your project's `.mcp.json`: For production/SaaS deployments, set `MOLECULE_URL` to your tenant URL: ```json -"MOLECULE_URL": "https://hongming-wang.moleculesai.app" +"MOLECULE_URL": "https://your-org.moleculesai.app" ``` ### 3. Verify diff --git a/docs/retrospectives/2026-04-17-saas-buildout.md b/docs/retrospectives/2026-04-17-saas-buildout.md index c851367a..318cfc08 100644 --- a/docs/retrospectives/2026-04-17-saas-buildout.md +++ b/docs/retrospectives/2026-04-17-saas-buildout.md @@ -78,11 +78,11 @@ ### 1. Wildcard DNS record changed 4 times in one session The wildcard A record for `*.moleculesai.app` was pointed at: -1. `18.220.182.88` (real EC2 IP) — initial +1. `` (real EC2 IP) — initial 2. `198.51.100.1` (RFC 5737 TEST-NET) — Cloudflare blocked it (1003) -3. `3.16.109.132` (terminated EC2) — caused 1003 for all subdomains -4. `3.143.250.95` (another terminated EC2) — same issue -5. `3.131.96.216` (final live EC2) — current +3. `` (terminated EC2) — caused 1003 for all subdomains +4. `` (another terminated EC2) — same issue +5. `` (final live EC2) — current **Impact:** Every subdomain queried during configs 2-4 got permanently cached as 1003 at Cloudflare's edge. Cache purge didn't help (different diff --git a/docs/retrospectives/2026-04-18-tunnel-migration.md b/docs/retrospectives/2026-04-18-tunnel-migration.md index 997bd94a..1e71a239 100644 --- a/docs/retrospectives/2026-04-18-tunnel-migration.md +++ b/docs/retrospectives/2026-04-18-tunnel-migration.md @@ -34,7 +34,7 @@ Also closed issue #920 (Slack OAuth) and commented on #889 (VULN-004 dead letter ### 2. Cloudflare API Token — Tunnel Permission -**Problem:** The existing CF API token (`cfut_loLR...`) had DNS:Edit but NOT Cloudflare Tunnel:Edit permission. Tunnel create/list/delete calls returned `code 10000: Authentication error`. +**Problem:** The existing CF API token (`cfut_****...`) had DNS:Edit but NOT Cloudflare Tunnel:Edit permission. Tunnel create/list/delete calls returned `code 10000: Authentication error`. **Fix:** CEO added Account → Cloudflare Tunnel → Edit permission in Cloudflare Dashboard → API Tokens. @@ -142,7 +142,7 @@ User → slug.moleculesai.app (CNAME → tunnel-id.cfargotunnel.com, proxied) ## Known Issues & Risks ### 1. Worker Must Stay Until All Tenants Migrate -The Worker route `*.moleculesai.app/*` still serves existing tenants (e.g., `hongmingwang.moleculesai.app`). Cannot delete until every tenant has a tunnel + CNAME. The Worker passthrough for reserved/multi-level slugs is the bridge. +The Worker route `*.moleculesai.app/*` still serves existing tenants (e.g., `.moleculesai.app`). Cannot delete until every tenant has a tunnel + CNAME. The Worker passthrough for reserved/multi-level slugs is the bridge. ### 2. Worker Source Not in Version Control The Worker code lives in `/tmp/molecule-tenant-proxy/` — not tracked in any repo. Needs to be committed somewhere before the session ends. Two changes were deployed: @@ -176,7 +176,7 @@ If `cloudflared` crashes on the EC2 but the instance stays running, the tunnel g ### Short-term (this week) -- [ ] **Migrate `hongmingwang` tenant to tunnel** — create tunnel, add CNAME, update EC2 to run cloudflared, add slug to Worker RESERVED, verify, then remove old A record +- [ ] **Migrate existing tenant to tunnel** — create tunnel, add CNAME, update EC2 to run cloudflared, add slug to Worker RESERVED, verify, then remove old A record - [ ] **Staging image pipeline** — publish `:staging` tag on main merge, `:latest` only on manual promote - [ ] **Move tunnel token to SSM Parameter Store** — EC2 user-data is not secret-safe; retrieve token at boot via instance role diff --git a/docs/runbooks/saas-secrets.md b/docs/runbooks/saas-secrets.md index ce3f7cd0..5d503079 100644 --- a/docs/runbooks/saas-secrets.md +++ b/docs/runbooks/saas-secrets.md @@ -7,15 +7,15 @@ update doesn't silently break production. | Secret | Location(s) | Purpose | |---|---|---| -| `FLY_API_TOKEN` | **(a)** `molecule-monorepo` GitHub Actions secret (push image to `registry.fly.io/molecule-tenant`) + **(b)** `fly secrets` on `molecule-cp` app (control plane creates + deletes tenant Fly Machines) | Any Fly Machines API call | -| `NEON_API_KEY` | `fly secrets` on `molecule-cp` | Create + delete tenant Neon branches | -| `DATABASE_URL` | `fly secrets` on `molecule-cp` | Control-plane Postgres connection (Neon `cool-sea-89357706`) | -| `TENANT_REDIS_URL` | `fly secrets` on `molecule-cp` | Injected into every tenant container as `REDIS_URL` | -| `SECRETS_ENCRYPTION_KEY` | `fly secrets` on `molecule-cp` | AES-256 key wrapping tenant DB/Redis URLs in `org_instances` (provisioner + tenant use this) | -| `RESEND_API_KEY` | `fly secrets` on `molecule-cp` | Resend REST API token used by `internal/email.ResendProvider` — GDPR erasure confirmation today; welcome + plan-change emails later. Empty → `DisabledProvider` silently no-ops all sends | -| `RESEND_FROM_EMAIL` | `fly secrets` on `molecule-cp` | RFC-5322 From line, typically `"Molecule AI "`. Must resolve to a Resend-verified domain or sends fail with `403 domain not verified` | -| `STRIPE_API_KEY` | `fly secrets` on `molecule-cp` | `sk_live_…` secret key used by `internal/billing.StripeProvider` for customer/subscription/checkout mutations + GDPR Art. 17 cascade | -| `STRIPE_WEBHOOK_SECRET` | `fly secrets` on `molecule-cp` | `whsec_…` used by `internal/billing.verifySignature` to reject forged webhook calls. Rotated independently from the API key — Stripe treats them as separate secrets | +| `FLY_API_TOKEN` | **(a)** `molecule-monorepo` GitHub Actions secret (push image to `registry.fly.io/molecule-tenant`) + **(b)** `fly secrets` on `` app (control plane creates + deletes tenant Fly Machines) | Any Fly Machines API call | +| `NEON_API_KEY` | `fly secrets` on `` | Create + delete tenant Neon branches | +| `DATABASE_URL` | `fly secrets` on `` | Control-plane Postgres connection (Neon ``) | +| `TENANT_REDIS_URL` | `fly secrets` on `` | Injected into every tenant container as `REDIS_URL` | +| `SECRETS_ENCRYPTION_KEY` | `fly secrets` on `` | AES-256 key wrapping tenant DB/Redis URLs in `org_instances` (provisioner + tenant use this) | +| `RESEND_API_KEY` | `fly secrets` on `` | Resend REST API token used by `internal/email.ResendProvider` — GDPR erasure confirmation today; welcome + plan-change emails later. Empty → `DisabledProvider` silently no-ops all sends | +| `RESEND_FROM_EMAIL` | `fly secrets` on `` | RFC-5322 From line, typically `"Molecule AI "`. Must resolve to a Resend-verified domain or sends fail with `403 domain not verified` | +| `STRIPE_API_KEY` | `fly secrets` on `` | `sk_live_…` secret key used by `internal/billing.StripeProvider` for customer/subscription/checkout mutations + GDPR Art. 17 cascade | +| `STRIPE_WEBHOOK_SECRET` | `fly secrets` on `` | `whsec_…` used by `internal/billing.verifySignature` to reject forged webhook calls. Rotated independently from the API key — Stripe treats them as separate secrets | | `GITHUB_TOKEN` | Built-in GitHub Actions token | GHCR push; rotated automatically | | `ANTHROPIC_API_KEY` | **Global secret** via `PUT /settings/secrets` on each tenant platform instance | Default LLM provider (`MODEL_PROVIDER=anthropic`). Must be set as a **global** secret so it propagates to all workspace containers — workspace-level-only is not sufficient for SDK-direct workspaces (e.g. molecule-hitl). See [rotation procedure below](#anthropic_api_key). | @@ -31,12 +31,12 @@ one** will cause **silent** breakage: 1. Generate new token: ``` - flyctl tokens create deploy --name molecule-cp-rotation-$(date +%Y%m%d) + flyctl tokens create deploy --name -rotation-$(date +%Y%m%d) ``` 2. Update **both** locations (order matters — Fly secrets first, then GHA): ``` # (b) Fly secrets — triggers zero-downtime redeploy - flyctl secrets set --app molecule-cp FLY_API_TOKEN='FlyV1 fm2_...' + flyctl secrets set --app FLY_API_TOKEN='FlyV1 fm2_...' # (a) GitHub Actions secret — next workflow run uses new token echo 'FlyV1 fm2_...' | gh secret set FLY_API_TOKEN --repo Molecule-AI/molecule-monorepo @@ -44,7 +44,7 @@ one** will cause **silent** breakage: 3. Verify: ``` # Control plane can reach Fly API: - curl https://molecule-cp.fly.dev/health + curl https://.fly.dev/health # Trigger image publish (dispatches workflow, pushes to both registries): gh workflow run publish-platform-image.yml --repo Molecule-AI/molecule-monorepo gh run list --repo Molecule-AI/molecule-monorepo --workflow publish-platform-image --limit 1 @@ -60,15 +60,15 @@ one** will cause **silent** breakage: 1. Create replacement key in Neon console → Account Settings → API Keys. 2. Update Fly secrets: ``` - flyctl secrets set --app molecule-cp NEON_API_KEY='napi_...' + flyctl secrets set --app NEON_API_KEY='napi_...' ``` 3. Trigger a test provision (dry run — create + delete): ``` - curl -X POST https://molecule-cp.fly.dev/cp/orgs \ + curl -X POST https://.fly.dev/cp/orgs \ -H 'Content-Type: application/json' \ -d '{"slug":"keytest-'$(date +%s)'","name":"Rotation test"}' # Wait 60s, inspect logs: - flyctl logs --app molecule-cp --no-tail | tail -30 + flyctl logs --app --no-tail | tail -30 # Clean up the test org via DELETE once live ``` 4. Revoke old key in Neon console. @@ -83,7 +83,7 @@ Mitigation: we intentionally defer real KMS + key-rotation to Phase H. Until then, **do not rotate this key unless compromised.** If compromise, procedure is: 1. Generate new key: `openssl rand -hex 32` -2. Set new key on `molecule-cp`. +2. Set new key on ``. 3. For every row in `org_instances`: re-provision the tenant (creates fresh Neon branch + Fly machine). The old encrypted URLs are un-decryptable but irrelevant — we mint fresh ones. @@ -92,7 +92,7 @@ then, **do not rotate this key unless compromised.** If compromise, procedure is ## Rotation procedure — DATABASE_URL (control plane) -The Neon `molecule-cp` project has a stable primary endpoint. Rotate only if: +The Neon `` project has a stable primary endpoint. Rotate only if: - Neon forces a migration - The connection-URI password is leaked @@ -106,11 +106,11 @@ path and sends fail loudly (the cascade logs `purge confirmation email failed`) without breaking user-facing flows. 1. In Resend dashboard → API Keys → create a new key scoped to - "molecule-cp production", e.g. name - `molecule-cp-rotation-$(date +%Y%m%d)`. + " production", e.g. name + `-rotation-$(date +%Y%m%d)`. 2. Stage the replacement on Fly (not immediately live): ``` - flyctl secrets set --app molecule-cp \ + flyctl secrets set --app \ --stage RESEND_API_KEY='re_...' ``` `--stage` holds the secret for the next deploy instead of restarting @@ -149,11 +149,11 @@ the other — they can be rotated on separate schedules. secret key. Stripe gives you a new `sk_live_…`. 2. Stage on Fly: ``` - flyctl secrets set --app molecule-cp \ + flyctl secrets set --app \ --stage STRIPE_API_KEY='sk_live_...' ``` 3. Redeploy, then verify: hit - `https://molecule-cp.fly.dev/cp/billing/checkout` from an authenticated + `https://.fly.dev/cp/billing/checkout` from an authenticated test session and confirm the returned checkout URL redirects to a valid Stripe-hosted page. 4. Stripe auto-revokes the old key after rolling — no manual revoke @@ -161,7 +161,7 @@ the other — they can be rotated on separate schedules. For `STRIPE_WEBHOOK_SECRET`: -1. Stripe dashboard → Developers → Webhooks → the molecule-cp endpoint → +1. Stripe dashboard → Developers → Webhooks → the endpoint → **Roll secret**. 2. Stripe shows you BOTH old and new secret for a 24-hour overlap window. Copy the new `whsec_…`. @@ -169,7 +169,7 @@ For `STRIPE_WEBHOOK_SECRET`: 4. Inside the overlap window, send a Stripe CLI test event: ``` stripe trigger customer.subscription.updated \ - --forward-to https://molecule-cp.fly.dev/webhooks/stripe + --forward-to https://.fly.dev/webhooks/stripe ``` If the signature-verification layer accepts it (no `400 invalid signature` in Fly logs), the new secret is live. @@ -197,7 +197,7 @@ shadow the global value — the per-workspace value takes precedence. -d '{"key":"ANTHROPIC_API_KEY","value":"sk-ant-api03-..."}' # SaaS control plane — set on the tenant platform via control-plane API - # (details TBD when molecule-cp exposes a /cp/orgs/:id/secrets endpoint) + # (details TBD when exposes a /cp/orgs/:id/secrets endpoint) ``` The platform auto-restarts every non-paused workspace on set. diff --git a/infra/cloudflare-worker/wrangler.toml b/infra/cloudflare-worker/wrangler.toml index 1a0a4272..23f56093 100644 --- a/infra/cloudflare-worker/wrangler.toml +++ b/infra/cloudflare-worker/wrangler.toml @@ -2,18 +2,19 @@ name = "molecule-tenant-proxy" main = "src/index.ts" compatibility_date = "2024-09-23" -account_id = "bfa4e604e168a938e565600b27e2828c" +# Set via env var or fill in manually — do not commit real value +account_id = "your-cloudflare-account-id" # KV namespace for caching org→IP mappings (L2 cache, 5 min TTL) [[kv_namespaces]] binding = "TENANT_CACHE" -id = "752aaa0783514143a1eda9f44a412d7d" +id = "your-kv-namespace-id" # Route: all tenant subdomains (wildcard). Explicit records (api, app, www) # take priority in Cloudflare DNS — the Worker only fires for tenant slugs. [[routes]] pattern = "*.moleculesai.app/*" -zone_id = "a034108eda16d131ef7f766b923ef464" +zone_id = "your-cloudflare-zone-id" [vars] CP_API_URL = "https://api.moleculesai.app" diff --git a/infra/cloudflare-worker/wrangler.toml.example b/infra/cloudflare-worker/wrangler.toml.example new file mode 100644 index 00000000..23f56093 --- /dev/null +++ b/infra/cloudflare-worker/wrangler.toml.example @@ -0,0 +1,20 @@ +name = "molecule-tenant-proxy" +main = "src/index.ts" +compatibility_date = "2024-09-23" + +# Set via env var or fill in manually — do not commit real value +account_id = "your-cloudflare-account-id" + +# KV namespace for caching org→IP mappings (L2 cache, 5 min TTL) +[[kv_namespaces]] +binding = "TENANT_CACHE" +id = "your-kv-namespace-id" + +# Route: all tenant subdomains (wildcard). Explicit records (api, app, www) +# take priority in Cloudflare DNS — the Worker only fires for tenant slugs. +[[routes]] +pattern = "*.moleculesai.app/*" +zone_id = "your-cloudflare-zone-id" + +[vars] +CP_API_URL = "https://api.moleculesai.app" diff --git a/platform/internal/handlers/webhooks_workflow_test.go b/platform/internal/handlers/webhooks_workflow_test.go index 5c3419b0..3236cb59 100644 --- a/platform/internal/handlers/webhooks_workflow_test.go +++ b/platform/internal/handlers/webhooks_workflow_test.go @@ -13,7 +13,7 @@ func TestBuildGitHubA2APayload_WorkflowRunFailure(t *testing.T) { "workspace_id": "ws-devops", "action": "completed", "repository": {"full_name": "Molecule-AI/molecule-monorepo"}, - "sender": {"login": "hongming"}, + "sender": {"login": "test-user"}, "workflow_run": { "id": 123456, "name": "CI",