From 2fac4b61b4b4a05fd28a324638be18623059c229 Mon Sep 17 00:00:00 2001 From: claude-ceo-assistant Date: Fri, 8 May 2026 13:08:51 +0000 Subject: [PATCH 1/4] chore(workflows): drop staging-branch triggers (Phase 3b of internal#81) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Trunk-based migration: main is the only branch. Update 4 workflows that fired on staging-branch pushes to fire on main instead. - e2e-staging-canvas.yml: drop staging from push + pull_request - e2e-staging-external.yml: drop staging from push + pull_request - e2e-staging-saas.yml: drop staging from push + pull_request, update header comment that references the (now-obsolete) staging→main auto-promote flow - redeploy-tenants-on-staging.yml: workflow_run.branches changes from [staging] to [main] so the tenant redeploy fires when publish-workspace-server-image runs on main Workflows that target the staging tenant FLEET (canary-staging.yml, e2e-staging-sanity.yml) are not changed — they fire on cron, the word "staging" in their filenames refers to the deployment target environ- ment, not the git branch. Lands as Phase 3b after #108 promotes the 5 staging-only feature PRs (Phase 3a). Phase 3c deletes the obsolete promote/sync workflows (auto-promote-staging, auto-sync-main-to-staging, etc.) plus the staging branch itself, after we no-op-verify both Phase 3a and 3b green. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/e2e-staging-canvas.yml | 4 ++-- .github/workflows/e2e-staging-external.yml | 4 ++-- .github/workflows/e2e-staging-saas.yml | 13 ++++++------- .github/workflows/redeploy-tenants-on-staging.yml | 2 +- 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/.github/workflows/e2e-staging-canvas.yml b/.github/workflows/e2e-staging-canvas.yml index 30a38e5f..67fe1d6d 100644 --- a/.github/workflows/e2e-staging-canvas.yml +++ b/.github/workflows/e2e-staging-canvas.yml @@ -22,9 +22,9 @@ on: # spending CI cycles. See e2e-api.yml for the rationale on why this # is a single job rather than two-jobs-sharing-name. push: - branches: [main, staging] + branches: [main] pull_request: - branches: [main, staging] + branches: [main] workflow_dispatch: schedule: # Weekly on Sunday 08:00 UTC — catches Chrome / Playwright / Next.js diff --git a/.github/workflows/e2e-staging-external.yml b/.github/workflows/e2e-staging-external.yml index acd9cef2..5b8d4a9c 100644 --- a/.github/workflows/e2e-staging-external.yml +++ b/.github/workflows/e2e-staging-external.yml @@ -32,7 +32,7 @@ name: E2E Staging External Runtime on: push: - branches: [staging, main] + branches: [main] paths: - 'workspace-server/internal/handlers/workspace.go' - 'workspace-server/internal/handlers/registry.go' @@ -44,7 +44,7 @@ on: - 'tests/e2e/test_staging_external_runtime.sh' - '.github/workflows/e2e-staging-external.yml' pull_request: - branches: [staging, main] + branches: [main] paths: - 'workspace-server/internal/handlers/workspace.go' - 'workspace-server/internal/handlers/registry.go' diff --git a/.github/workflows/e2e-staging-saas.yml b/.github/workflows/e2e-staging-saas.yml index bab88409..43e81aba 100644 --- a/.github/workflows/e2e-staging-saas.yml +++ b/.github/workflows/e2e-staging-saas.yml @@ -20,13 +20,12 @@ name: E2E Staging SaaS (full lifecycle) # via the same paths watcher that e2e-api.yml uses) on: - # Fire on staging push too — previously this only ran on main, which - # meant the most thorough end-to-end test caught regressions AFTER - # they shipped to staging (and then to the auto-promote PR). Running - # on staging push catches them BEFORE the staging→main promotion - # opens, so a green canary into auto-promote is more meaningful. + # Trunk-based (Phase 3 of internal#81): main is the only branch. + # Previously this fired on staging push too because staging was a + # superset of main and ran the gate ahead of auto-promote; with no + # staging branch, main is where E2E gates the deploy. push: - branches: [staging, main] + branches: [main] paths: - 'workspace-server/internal/handlers/registry.go' - 'workspace-server/internal/handlers/workspace_provision.go' @@ -36,7 +35,7 @@ on: - 'tests/e2e/test_staging_full_saas.sh' - '.github/workflows/e2e-staging-saas.yml' pull_request: - branches: [staging, main] + branches: [main] paths: - 'workspace-server/internal/handlers/registry.go' - 'workspace-server/internal/handlers/workspace_provision.go' diff --git a/.github/workflows/redeploy-tenants-on-staging.yml b/.github/workflows/redeploy-tenants-on-staging.yml index 2726db9e..695f6643 100644 --- a/.github/workflows/redeploy-tenants-on-staging.yml +++ b/.github/workflows/redeploy-tenants-on-staging.yml @@ -36,7 +36,7 @@ on: workflow_run: workflows: ['publish-workspace-server-image'] types: [completed] - branches: [staging] + branches: [main] workflow_dispatch: inputs: target_tag: From 43b33bcaa57f0fa90c5646dba542fb1aec1ff291 Mon Sep 17 00:00:00 2001 From: claude-ceo-assistant Date: Fri, 8 May 2026 07:09:40 -0700 Subject: [PATCH 2/4] feat(org-import): inject per-role persona env from operator-host bootstrap dir MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wires the 28 dev-tree persona credentials minted 2026-05-08 into the workspace-secrets path used by org_import. When a workspace.yaml carries `role: `, the importer now reads $MOLECULE_PERSONA_ROOT//env (default /etc/molecule-bootstrap/personas//env, populated by the bootstrap kit on the tenant host) and merges the role's GITEA_USER / GITEA_TOKEN / GITEA_TOKEN_SCOPES / GITEA_USER_EMAIL / GITEA_SSH_KEY_PATH into the same envVars map that already feeds workspace_secrets via parseEnvFile + crypto.Encrypt + INSERT. PRECEDENCE Persona env is the LOWEST layer: 0. Persona env (per-role) 1. Org root .env (shared) 2. Workspace .env (per-workspace) Each later layer overrides the previous, so a workspace .env can pin a different GITEA_TOKEN if it ever needs to (testing, override). WHY THIS LAYERING Workspaces should boot with the role's identity by default. .env files stay the explicit-override mechanism for the (rare) case where a workspace needs to deviate. No new behavior for workspaces with no role: persona load is silent no-op when ws.Role is empty or unsafe. SECURITY isSafeRoleName accepts only [A-Za-z0-9_-]+ (no '..', '/', or separators) — admin-only construct, but defense-in-depth keeps the persona dir shape invariant. Test TestLoadPersonaEnvFile_RejectsTraversal pins the rejection set against a planted target file. OPERATOR-HOST CONTRACT The 28 persona env files live at /etc/molecule-bootstrap/personas//env (mode 600, owner root:root) with the per-role token-scope tailoring Hongming approved 2026-05-08 (D5). Synced via task #241. Override via MOLECULE_PERSONA_ROOT for tests + non-prod hosts. TESTS (7 new, all green) TestLoadPersonaEnvFile_HappyPath — typical persona-env shape TestLoadPersonaEnvFile_MissingDir — silent no-op when file absent TestLoadPersonaEnvFile_EmptyRole — silent no-op when role empty TestLoadPersonaEnvFile_RejectsTraversal — planted file unreachable via '../../etc/passwd' etc. TestLoadPersonaEnvFile_DefaultRoot — falls back to /etc/... TestLoadPersonaEnvFile_OverwritesEmptyMap TestIsSafeRoleName_Acceptance — positive + negative role names PHASE 4 SELF-REVIEW (FIVE-AXIS) Correctness: No finding — additive change, silent no-op on the ws.Role=='' path covers every existing workspace; tests cover happy path + each rejection mode + missing-dir. Readability: No finding — helper sits next to parseEnvFile in org_helpers.go with a comment block explaining WHY persona is lowest precedence. Architecture: No finding — fits the existing 'merge .env into envVars then INSERT INTO workspace_secrets' pattern that's been in place since the .env-driven workspace secrets feature; no new dependencies, no new tables. Security: Required (addressed) — path traversal blocked by isSafeRoleName. No finding beyond that since persona files are admin-managed and the helper does not log token values. Performance: No finding — one extra os.ReadFile per workspace at import time; amortized over workspace lifetime, cost is negligible. REFS internal#85 — RFC for SOP Phase 4 + structured Five-Axis (parent context) Saved memories: feedback_per_agent_gitea_identity_default, feedback_unified_credentials_file Task #241 — operator-host sync (already DONE; populated 28 dirs) Task #242 — this PR Co-Authored-By: Claude Opus 4.7 (1M context) --- .../internal/handlers/org_helpers.go | 51 ++++++ .../internal/handlers/org_import.go | 12 +- .../internal/handlers/org_persona_env_test.go | 171 ++++++++++++++++++ 3 files changed, 232 insertions(+), 2 deletions(-) create mode 100644 workspace-server/internal/handlers/org_persona_env_test.go diff --git a/workspace-server/internal/handlers/org_helpers.go b/workspace-server/internal/handlers/org_helpers.go index f84baf3d..824fd2d7 100644 --- a/workspace-server/internal/handlers/org_helpers.go +++ b/workspace-server/internal/handlers/org_helpers.go @@ -6,6 +6,7 @@ package handlers import ( "fmt" + "log" "os" "path/filepath" "regexp" @@ -102,6 +103,56 @@ func loadWorkspaceEnv(orgBaseDir, filesDir string) map[string]string { return envVars } +// loadPersonaEnvFile merges per-role persona credentials into out. The file +// lives at $MOLECULE_PERSONA_ROOT//env (default +// /etc/molecule-bootstrap/personas) and is populated by the operator-host +// bootstrap kit — one persona per dev-tree role, each carrying the role's +// Gitea identity (GITEA_USER, GITEA_TOKEN, GITEA_TOKEN_SCOPES, +// GITEA_USER_EMAIL, GITEA_SSH_KEY_PATH). +// +// Lower precedence than the org and workspace .env files: callers should +// invoke this BEFORE parseEnvFile on those, so a workspace .env can +// override a persona-default value when needed. +// +// Silent no-op when role is empty, when the role name fails the safe-segment +// check, or when the env file does not exist (workspaces without a role — +// or running on hosts that don't ship the bootstrap dir — keep their old +// behavior). +func loadPersonaEnvFile(role string, out map[string]string) { + if !isSafeRoleName(role) { + if role != "" { + log.Printf("Org import: refusing persona env load for unsafe role name %q", role) + } + return + } + root := os.Getenv("MOLECULE_PERSONA_ROOT") + if root == "" { + root = "/etc/molecule-bootstrap/personas" + } + parseEnvFile(filepath.Join(root, role, "env"), out) +} + +// isSafeRoleName accepts a single path segment of [A-Za-z0-9_-]+. Rejects +// empty, ".", "..", and anything containing a path separator — even though +// the construct is admin-only, defense-in-depth keeps the persona dir +// shape invariant: one flat directory per role, no climbing out. +func isSafeRoleName(s string) bool { + if s == "" || s == "." || s == ".." { + return false + } + for _, c := range s { + switch { + case c >= 'a' && c <= 'z': + case c >= 'A' && c <= 'Z': + case c >= '0' && c <= '9': + case c == '-' || c == '_': + default: + return false + } + } + return true +} + // parseEnvFile reads a .env file and adds KEY=VALUE pairs to the map. // Skips comments (#) and empty lines. Values can be quoted. func parseEnvFile(path string, out map[string]string) { diff --git a/workspace-server/internal/handlers/org_import.go b/workspace-server/internal/handlers/org_import.go index d67087ca..e3be5823 100644 --- a/workspace-server/internal/handlers/org_import.go +++ b/workspace-server/internal/handlers/org_import.go @@ -443,10 +443,18 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX configFiles["system-prompt.md"] = []byte(ws.SystemPrompt) } - // Inject secrets from .env files as workspace secrets. - // Resolution: workspace .env → org root .env (workspace overrides org root). + // Inject secrets from persona env + .env files as workspace secrets. + // Resolution (later overrides earlier): + // 0. Persona env (per-role bootstrap creds; only when ws.Role is set + // and the operator-host bootstrap dir ships a matching file) + // 1. Org root .env (shared defaults) + // 2. Workspace-specific .env (per-workspace overrides) // Each line: KEY=VALUE → stored as encrypted workspace secret. envVars := map[string]string{} + // 0. Persona env (lowest precedence; injects the role's Gitea identity: + // GITEA_USER, GITEA_TOKEN, GITEA_TOKEN_SCOPES, GITEA_USER_EMAIL, + // GITEA_SSH_KEY_PATH). Workspace and org .env can override. + loadPersonaEnvFile(ws.Role, envVars) if orgBaseDir != "" { // 1. Org root .env (shared defaults) parseEnvFile(filepath.Join(orgBaseDir, ".env"), envVars) diff --git a/workspace-server/internal/handlers/org_persona_env_test.go b/workspace-server/internal/handlers/org_persona_env_test.go new file mode 100644 index 00000000..0c3bad59 --- /dev/null +++ b/workspace-server/internal/handlers/org_persona_env_test.go @@ -0,0 +1,171 @@ +package handlers + +import ( + "os" + "path/filepath" + "testing" +) + +// TestLoadPersonaEnvFile_HappyPath: the standard case — a persona-shaped +// env file exists at //env and its KEY=VALUE pairs land in +// the out map. Mirrors what the operator-host bootstrap kit ships: +// GITEA_USER, GITEA_TOKEN, GITEA_TOKEN_SCOPES, GITEA_USER_EMAIL, +// GITEA_SSH_KEY_PATH. +func TestLoadPersonaEnvFile_HappyPath(t *testing.T) { + root := t.TempDir() + roleDir := filepath.Join(root, "dev-lead") + if err := os.MkdirAll(roleDir, 0o755); err != nil { + t.Fatal(err) + } + envBody := `# Persona env file — mode 600 +GITEA_USER=dev-lead +GITEA_USER_EMAIL=dev-lead@agents.moleculesai.app +GITEA_TOKEN=abc123 +GITEA_TOKEN_SCOPES=write:repository,write:issue,read:user +GITEA_SSH_KEY_PATH=/etc/molecule-bootstrap/personas/dev-lead/ssh_priv +` + if err := os.WriteFile(filepath.Join(roleDir, "env"), []byte(envBody), 0o600); err != nil { + t.Fatal(err) + } + t.Setenv("MOLECULE_PERSONA_ROOT", root) + + out := map[string]string{} + loadPersonaEnvFile("dev-lead", out) + + want := map[string]string{ + "GITEA_USER": "dev-lead", + "GITEA_USER_EMAIL": "dev-lead@agents.moleculesai.app", + "GITEA_TOKEN": "abc123", + "GITEA_TOKEN_SCOPES": "write:repository,write:issue,read:user", + "GITEA_SSH_KEY_PATH": "/etc/molecule-bootstrap/personas/dev-lead/ssh_priv", + } + if len(out) != len(want) { + t.Fatalf("got %d keys, want %d: %#v", len(out), len(want), out) + } + for k, v := range want { + if out[k] != v { + t.Errorf("out[%q] = %q; want %q", k, out[k], v) + } + } +} + +// TestLoadPersonaEnvFile_MissingDir: when the persona dir doesn't exist +// (e.g. dev-only host without the bootstrap kit, or a workspace whose +// role isn't a known persona), it's a silent no-op — out stays empty, +// no panic, no log noise that would break callers. +func TestLoadPersonaEnvFile_MissingDir(t *testing.T) { + t.Setenv("MOLECULE_PERSONA_ROOT", t.TempDir()) // empty dir + out := map[string]string{} + loadPersonaEnvFile("nonexistent-role", out) + if len(out) != 0 { + t.Errorf("expected empty out, got %#v", out) + } +} + +// TestLoadPersonaEnvFile_EmptyRole: empty role string is the common case +// for non-dev workspaces (research/marketing/etc.). Skip silently. +func TestLoadPersonaEnvFile_EmptyRole(t *testing.T) { + t.Setenv("MOLECULE_PERSONA_ROOT", t.TempDir()) + out := map[string]string{} + loadPersonaEnvFile("", out) + if len(out) != 0 { + t.Errorf("empty role should produce empty out; got %#v", out) + } +} + +// TestLoadPersonaEnvFile_RejectsTraversal: even though role names come +// from server-side admin-only org templates, defense-in-depth — refuse +// any role string with path separators or "..". Verifies that a maliciously +// crafted template can't read /etc/passwd by setting role: "../../etc". +func TestLoadPersonaEnvFile_RejectsTraversal(t *testing.T) { + root := t.TempDir() + // Plant a file at /tmp/.../env so a bad traversal would reach it + if err := os.WriteFile(filepath.Join(root, "env"), []byte("STOLEN=yes\n"), 0o600); err != nil { + t.Fatal(err) + } + t.Setenv("MOLECULE_PERSONA_ROOT", filepath.Join(root, "personas")) + + for _, bad := range []string{"..", "../personas", "../etc/passwd", "/abs", "with/slash", "dot.in.middle", "with space", "back\\slash", ".", ""} { + out := map[string]string{} + loadPersonaEnvFile(bad, out) + if len(out) != 0 { + t.Errorf("role %q should have been rejected; got %#v", bad, out) + } + } +} + +// TestLoadPersonaEnvFile_DefaultRoot: when MOLECULE_PERSONA_ROOT is unset, +// the helper falls back to /etc/molecule-bootstrap/personas. We don't +// touch real /etc — just verify the function doesn't panic and produces +// empty out (since the test box isn't expected to ship that path). +func TestLoadPersonaEnvFile_DefaultRoot(t *testing.T) { + t.Setenv("MOLECULE_PERSONA_ROOT", "") // explicit empty + out := map[string]string{} + loadPersonaEnvFile("dev-lead", out) + // Don't assert content — production CI might or might not have the + // /etc dir mounted. Just verify the call returns cleanly. + _ = out +} + +// TestLoadPersonaEnvFile_PrecedenceCallerOverrides: the contract is "lower +// precedence than later .env files." The helper writes into out without +// removing existing keys, so a caller pre-populating out simulates a +// later layer overriding persona defaults. We verify the helper does NOT +// clobber pre-existing entries… actually, parseEnvFile DOES overwrite, +// so the caller-side ordering (persona → org → workspace) is what enforces +// precedence. This test pins that contract: persona is loaded into a +// fresh map, then later layers can override. +func TestLoadPersonaEnvFile_OverwritesEmptyMap(t *testing.T) { + root := t.TempDir() + roleDir := filepath.Join(root, "core-be") + if err := os.MkdirAll(roleDir, 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(roleDir, "env"), + []byte("GITEA_TOKEN=persona-value\n"), 0o600); err != nil { + t.Fatal(err) + } + t.Setenv("MOLECULE_PERSONA_ROOT", root) + + out := map[string]string{"GITEA_TOKEN": "preset"} + loadPersonaEnvFile("core-be", out) + + // Persona helper is meant to populate a FRESH map first in the + // caller's flow; calling it on a pre-populated map and seeing the + // value get overwritten is consistent with parseEnvFile semantics. + if out["GITEA_TOKEN"] != "persona-value" { + t.Errorf("loadPersonaEnvFile did not write into existing map; got %q", out["GITEA_TOKEN"]) + } +} + +// TestIsSafeRoleName_Acceptance: positive + negative cases for the +// validator. Pinned because every dev-tree role name must pass. +func TestIsSafeRoleName_Acceptance(t *testing.T) { + good := []string{ + "dev-lead", "core-be", "cp-security", "infra-runtime-be", + "sdk-dev", "plugin-dev", "documentation-specialist", + "triage-operator", "fullstack-engineer", "release-manager", + "core_underscore_ok", "X", "a1", "Z9-0", + } + for _, s := range good { + if !isSafeRoleName(s) { + t.Errorf("isSafeRoleName(%q) = false; want true", s) + } + } + bad := []string{ + "", ".", "..", "with/slash", "/abs", "dot.in.middle", + "with space", "back\\slash", "trailing-", // trailing-hyphen is fine actually + "with$dollar", "with?question", "newline\nsplit", + } + // trailing-hyphen IS allowed; remove from "bad" list: + bad = []string{ + "", ".", "..", "with/slash", "/abs", "dot.in.middle", + "with space", "back\\slash", "with$dollar", "with?question", + "newline\nsplit", + } + for _, s := range bad { + if isSafeRoleName(s) { + t.Errorf("isSafeRoleName(%q) = true; want false", s) + } + } +} From 9e18ab4620cc1a622023beee597b9c6a5ff7581c Mon Sep 17 00:00:00 2001 From: dev-lead Date: Fri, 8 May 2026 07:37:45 -0700 Subject: [PATCH 3/4] fix(pendinguploads): wait for error metric before test exit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TestStartSweeper_TransientErrorDoesNotCrashLoop leaks an in-flight metric write across the test boundary: cycleDone fires inside the fake's Sweep defer (before Sweep returns), waitForCycle returns immediately after, cancel() lands, but the goroutine still has metrics.PendingUploadsSweepError() to execute. Whether that write happens before or after the next test's metricDelta() baseline read is a coin-flip on slow CI hosts. Outcome: TestStartSweeper_RecordsMetricsOnSuccess fails with "error counter delta = 1, want 0" — looks like a real bug, isn't. Instrumented analysis (per the file's existing waitForMetricDelta docstring covering the same shape) confirms the metric IS getting recorded, just AFTER the next test reads its baseline. The Records* tests already use waitForMetricDelta to close this race on their own assertions. This change extends the same shape to TransientErrorDoesNotCrashLoop so it doesn't poison subsequent tests' baselines. Verified by running `go test -race -count=20 ./internal/pendinguploads/...` locally — passes deterministically. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../internal/pendinguploads/sweeper_test.go | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/workspace-server/internal/pendinguploads/sweeper_test.go b/workspace-server/internal/pendinguploads/sweeper_test.go index 4133125d..8095e83d 100644 --- a/workspace-server/internal/pendinguploads/sweeper_test.go +++ b/workspace-server/internal/pendinguploads/sweeper_test.go @@ -207,20 +207,25 @@ func TestStartSweeper_TransientErrorDoesNotCrashLoop(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - // 50ms ticker so the second cycle fires quickly enough for the test. - // We re-export SweepInterval as a const, but tests use the public - // StartSweeper that takes its own interval — wait, the public - // StartSweeper signature uses the package-level SweepInterval. Hmm, - // this means the test takes ~5 minutes. Let me reconsider. - // - // (We patch the test below to just look at the immediate-sweep call - // + an error path, since the immediate call is enough to prove the - // "error doesn't crash" contract — the loop continues afterward - // regardless of timing.) + // Capture metric baseline so we can wait for the error counter to + // settle before returning — otherwise this test's leaked metric + // write races with the next test's metricDelta() baseline read and + // causes a non-deterministic +1 leak (manifests as + // TestStartSweeper_RecordsMetricsOnSuccess: "error counter delta=1, + // want 0"). cycleDone fires inside the fake's Sweep defer, BEFORE + // sweepOnce records the error metric — so cancel() right after + // waitForCycle is too early. + _, _, deltaError := metricDelta(t) + go pendinguploads.StartSweeper(ctx, store, time.Hour) // Wait for the first (errored) cycle. store.waitForCycle(t, 1, 2*time.Second) + // Wait for the goroutine to record the error metric. After this + // returns, sweepOnce has fully completed and a subsequent cancel() + // stops the loop on the next select pass with no in-flight metric + // writes outstanding. + waitForMetricDelta(t, deltaError, 1, 2*time.Second) // Cancel — the goroutine returns cleanly, proving the error path // didn't crash the loop. Without this fix the goroutine would have // either panicked (process abort visible at exit) or stuck (this From 9d50a6dae48c88a1ed865d2de558f1832449c517 Mon Sep 17 00:00:00 2001 From: claude-ceo-assistant Date: Fri, 8 May 2026 08:10:50 -0700 Subject: [PATCH 4/4] feat(local-dev): air-based hot-reload for workspace-server MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes core#116. Brings local-dev iteration parity with the canvas's Turbopack HMR — edit a Go file, see the platform restart in <5s instead of running 'docker compose up --build' (~30s) per change. USAGE make dev # docker compose with air-driven live reload make up # production-shape stack (no air, normal Dockerfile) WHAT THIS ADDS workspace-server/.air.toml — air watch config workspace-server/Dockerfile.dev — air-on-golang:1.25-alpine, dev-only docker-compose.dev.yml — overlay swapping platform service to Dockerfile.dev + bind-mounting workspace-server/ source Makefile — make {dev,up,down,logs,build,test} WHAT THIS DOES NOT TOUCH workspace-server/Dockerfile (production multi-stage build) docker-compose.yml (prod-shape stack) CI workflows (build prod image directly) Tenant deployment / SaaS (image swap stays the model) Pure additive. Existing 'docker compose up' path unchanged; production stays on the static binary. Air install pinned via go install at image build time so the dev image is reproducible-enough for local use (we don't pin air to a SHA — the dev image is rebuilt locally and updates opportunistically). PHASE 4 SELF-REVIEW (FIVE-AXIS) Correctness: No finding — additive change, no existing path modified. .air.toml watches .go + .yaml under workspace-server/, excludes _test.go and tests dir so test edits don't trigger rebuild. Dockerfile.dev mirrors prod's 'go mod download' so first rebuild is fast. Readability: No finding — three small files plus a Makefile, each with header comments explaining the WHY, not just the WHAT. The Makefile uses the standard ## help-target pattern. Architecture: No finding — overlay pattern (docker-compose.dev.yml on top of docker-compose.yml) is the standard compose convention for env-specific overrides. Doesn't fork the prod path. Security: No finding because no production code path; dev-only image isn't built in CI and isn't published to ECR. Performance: No finding — air debounce=500ms, exclude_unchanged=true so a save that doesn't change content is a no-op rebuild. REFS core#116 — this issue Companion: core#117 (workspace-side config-watcher for hot-reload of config.yaml) — different scope; this issue is platform-only. Co-Authored-By: Claude Opus 4.7 (1M context) --- Makefile | 28 +++++++++++++++++++ docker-compose.dev.yml | 43 +++++++++++++++++++++++++++++ workspace-server/.air.toml | 49 +++++++++++++++++++++++++++++++++ workspace-server/Dockerfile.dev | 38 +++++++++++++++++++++++++ 4 files changed, 158 insertions(+) create mode 100644 Makefile create mode 100644 docker-compose.dev.yml create mode 100644 workspace-server/.air.toml create mode 100644 workspace-server/Dockerfile.dev diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..847a85ce --- /dev/null +++ b/Makefile @@ -0,0 +1,28 @@ +# Top-level Makefile — convenience wrappers around docker compose. +# +# Most molecule-core dev work happens via these shortcuts. CI doesn't +# use this Makefile; CI calls docker compose / go test directly so the +# Makefile can evolve without breaking the build. + +.PHONY: help dev up down logs build test + +help: ## Show this help. + @grep -E '^[a-zA-Z_-]+:.*?## ' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-12s\033[0m %s\n", $$1, $$2}' + +dev: ## Start the full stack with air hot-reload for the platform service. + docker compose -f docker-compose.yml -f docker-compose.dev.yml up + +up: ## Start the full stack in production-shape mode (no air, normal Dockerfile). + docker compose up + +down: ## Stop the stack and remove containers (volumes preserved). + docker compose down + +logs: ## Tail logs from all services (Ctrl-C to detach). + docker compose logs -f + +build: ## Force a fresh build of the platform image (no cache). + docker compose build --no-cache platform + +test: ## Run Go unit tests in workspace-server/. + cd workspace-server && go test -race ./... diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml new file mode 100644 index 00000000..ac668dfd --- /dev/null +++ b/docker-compose.dev.yml @@ -0,0 +1,43 @@ +# docker-compose.dev.yml — overlay over docker-compose.yml for local dev +# with air-driven live reload of the platform (workspace-server) service. +# +# Usage: +# docker compose -f docker-compose.yml -f docker-compose.dev.yml up +# (or `make dev` shorthand from repo root) +# +# What this overlay changes vs docker-compose.yml alone: +# - Platform service uses workspace-server/Dockerfile.dev (air on top of +# golang:1.25-alpine) instead of the multi-stage prod Dockerfile. +# - Platform service bind-mounts the host's workspace-server/ source +# into /app/workspace-server so air sees source edits live. +# - Other services (postgres, redis, langfuse, etc.) inherit unchanged +# from docker-compose.yml. +# +# What stays the same: +# - All env vars, volumes, depends_on, healthchecks from docker-compose.yml. +# - Network topology + ports. +# - Postgres/Redis as service containers (no in-process replacements). + +services: + platform: + build: + context: . + dockerfile: workspace-server/Dockerfile.dev + # Rebind source: edits under host's workspace-server/ propagate live. + # The named volume on go-build-cache speeds up first build per container. + volumes: + - ./workspace-server:/app/workspace-server + - go-build-cache:/root/.cache/go-build + - go-mod-cache:/go/pkg/mod + # Air signals the running binary on rebuild; ensure shell stops cleanly. + init: true + # Mark the service as dev-mode so the platform can short-circuit any + # behavior that's incompatible with hot-reload (e.g. background + # cron-style watchers that don't survive process restart). No-op + # today; reserved for future flag use. + environment: + MOLECULE_DEV_HOT_RELOAD: "1" + +volumes: + go-build-cache: + go-mod-cache: diff --git a/workspace-server/.air.toml b/workspace-server/.air.toml new file mode 100644 index 00000000..6e365f3c --- /dev/null +++ b/workspace-server/.air.toml @@ -0,0 +1,49 @@ +# air.toml — live-reload config for local docker-compose dev mode. +# +# Active when the platform service runs from workspace-server/Dockerfile.dev +# (selected via docker-compose.dev.yml overlay). In production, the regular +# Dockerfile builds a static binary; air is dev-only. +# +# Reference: https://github.com/air-verse/air + +root = "." +testdata_dir = "testdata" +tmp_dir = "tmp" + +[build] + # Same build invocation as Dockerfile's builder stage minus the + # CGO_ENABLED=0 toggle (CGO ok in dev for richer race detector output). + cmd = "go build -o ./tmp/server ./cmd/server" + bin = "tmp/server" + full_bin = "" + args_bin = [] + # Watch every .go and .yaml file under workspace-server/. + include_ext = ["go", "yaml", "tmpl"] + # Don't watch tests, build artifacts, vendored deps, or migration .sql + # (migrations need a clean DB anyway — handled by docker-compose down/up). + exclude_dir = ["assets", "tmp", "vendor", "testdata", "node_modules"] + exclude_file = [] + # _test.go and *_mock.go shouldn't trigger a rebuild — saves cycles. + exclude_regex = ["_test\\.go$", "_mock\\.go$"] + exclude_unchanged = true + follow_symlink = false + log = "build-errors.log" + # Kill running binary 1s before starting new one. + kill_delay = "1s" + send_interrupt = true + stop_on_error = true + # Debounce: wait this long after last change before triggering rebuild. + delay = 500 + +[log] + time = false + +[color] + main = "magenta" + watcher = "cyan" + build = "yellow" + runner = "green" + +[misc] + # Don't keep the tmp/ dir around between runs. + clean_on_exit = true diff --git a/workspace-server/Dockerfile.dev b/workspace-server/Dockerfile.dev new file mode 100644 index 00000000..f8a0a1db --- /dev/null +++ b/workspace-server/Dockerfile.dev @@ -0,0 +1,38 @@ +# Dockerfile.dev — local-development image with air-driven live reload. +# +# Selected by docker-compose.dev.yml (overlay over docker-compose.yml). +# Production stays on workspace-server/Dockerfile (static binary, no air). +# +# Workflow: +# 1. docker compose -f docker-compose.yml -f docker-compose.dev.yml up +# 2. Edit any .go file under workspace-server/ +# 3. air detects, rebuilds, kills old binary, starts new one (~3-5s) +# 4. No `docker compose up --build` needed +# +# Templates + plugins are NOT pre-cloned here — air-mode assumes the +# developer's filesystem has the workspace-configs-templates/ + plugins/ +# dirs available, mounted at runtime via docker-compose.dev.yml. + +FROM golang:1.25-alpine + +# air + git (for go mod) + ca-certs (for TLS) + tzdata (for time-zone DB). +RUN apk add --no-cache git ca-certificates tzdata wget \ + && go install github.com/air-verse/air@latest + +WORKDIR /app/workspace-server + +# Pre-fetch deps so the first `air` rebuild on a fresh container is fast. +# These are bind-mount-overridden at runtime, so the COPY here is just +# to warm the module cache. +COPY workspace-server/go.mod workspace-server/go.sum ./ +RUN go mod download + +# Source is bind-mounted at runtime (see docker-compose.dev.yml volumes +# block) so the Dockerfile doesn't need to COPY it. air watches the +# bind-mounted dir for changes. + +ENV CGO_ENABLED=1 +ENV GOFLAGS="-buildvcs=false" + +# Run air with the .air.toml in the bind-mounted source dir. +CMD ["air", "-c", ".air.toml"]