diff --git a/.github/workflows/e2e-staging-canvas.yml b/.github/workflows/e2e-staging-canvas.yml index 30a38e5f..67fe1d6d 100644 --- a/.github/workflows/e2e-staging-canvas.yml +++ b/.github/workflows/e2e-staging-canvas.yml @@ -22,9 +22,9 @@ on: # spending CI cycles. See e2e-api.yml for the rationale on why this # is a single job rather than two-jobs-sharing-name. push: - branches: [main, staging] + branches: [main] pull_request: - branches: [main, staging] + branches: [main] workflow_dispatch: schedule: # Weekly on Sunday 08:00 UTC — catches Chrome / Playwright / Next.js diff --git a/.github/workflows/e2e-staging-external.yml b/.github/workflows/e2e-staging-external.yml index acd9cef2..5b8d4a9c 100644 --- a/.github/workflows/e2e-staging-external.yml +++ b/.github/workflows/e2e-staging-external.yml @@ -32,7 +32,7 @@ name: E2E Staging External Runtime on: push: - branches: [staging, main] + branches: [main] paths: - 'workspace-server/internal/handlers/workspace.go' - 'workspace-server/internal/handlers/registry.go' @@ -44,7 +44,7 @@ on: - 'tests/e2e/test_staging_external_runtime.sh' - '.github/workflows/e2e-staging-external.yml' pull_request: - branches: [staging, main] + branches: [main] paths: - 'workspace-server/internal/handlers/workspace.go' - 'workspace-server/internal/handlers/registry.go' diff --git a/.github/workflows/e2e-staging-saas.yml b/.github/workflows/e2e-staging-saas.yml index bab88409..43e81aba 100644 --- a/.github/workflows/e2e-staging-saas.yml +++ b/.github/workflows/e2e-staging-saas.yml @@ -20,13 +20,12 @@ name: E2E Staging SaaS (full lifecycle) # via the same paths watcher that e2e-api.yml uses) on: - # Fire on staging push too — previously this only ran on main, which - # meant the most thorough end-to-end test caught regressions AFTER - # they shipped to staging (and then to the auto-promote PR). Running - # on staging push catches them BEFORE the staging→main promotion - # opens, so a green canary into auto-promote is more meaningful. + # Trunk-based (Phase 3 of internal#81): main is the only branch. + # Previously this fired on staging push too because staging was a + # superset of main and ran the gate ahead of auto-promote; with no + # staging branch, main is where E2E gates the deploy. push: - branches: [staging, main] + branches: [main] paths: - 'workspace-server/internal/handlers/registry.go' - 'workspace-server/internal/handlers/workspace_provision.go' @@ -36,7 +35,7 @@ on: - 'tests/e2e/test_staging_full_saas.sh' - '.github/workflows/e2e-staging-saas.yml' pull_request: - branches: [staging, main] + branches: [main] paths: - 'workspace-server/internal/handlers/registry.go' - 'workspace-server/internal/handlers/workspace_provision.go' diff --git a/.github/workflows/redeploy-tenants-on-staging.yml b/.github/workflows/redeploy-tenants-on-staging.yml index 2726db9e..695f6643 100644 --- a/.github/workflows/redeploy-tenants-on-staging.yml +++ b/.github/workflows/redeploy-tenants-on-staging.yml @@ -36,7 +36,7 @@ on: workflow_run: workflows: ['publish-workspace-server-image'] types: [completed] - branches: [staging] + branches: [main] workflow_dispatch: inputs: target_tag: diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..847a85ce --- /dev/null +++ b/Makefile @@ -0,0 +1,28 @@ +# Top-level Makefile — convenience wrappers around docker compose. +# +# Most molecule-core dev work happens via these shortcuts. CI doesn't +# use this Makefile; CI calls docker compose / go test directly so the +# Makefile can evolve without breaking the build. + +.PHONY: help dev up down logs build test + +help: ## Show this help. + @grep -E '^[a-zA-Z_-]+:.*?## ' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-12s\033[0m %s\n", $$1, $$2}' + +dev: ## Start the full stack with air hot-reload for the platform service. + docker compose -f docker-compose.yml -f docker-compose.dev.yml up + +up: ## Start the full stack in production-shape mode (no air, normal Dockerfile). + docker compose up + +down: ## Stop the stack and remove containers (volumes preserved). + docker compose down + +logs: ## Tail logs from all services (Ctrl-C to detach). + docker compose logs -f + +build: ## Force a fresh build of the platform image (no cache). + docker compose build --no-cache platform + +test: ## Run Go unit tests in workspace-server/. + cd workspace-server && go test -race ./... diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml new file mode 100644 index 00000000..ac668dfd --- /dev/null +++ b/docker-compose.dev.yml @@ -0,0 +1,43 @@ +# docker-compose.dev.yml — overlay over docker-compose.yml for local dev +# with air-driven live reload of the platform (workspace-server) service. +# +# Usage: +# docker compose -f docker-compose.yml -f docker-compose.dev.yml up +# (or `make dev` shorthand from repo root) +# +# What this overlay changes vs docker-compose.yml alone: +# - Platform service uses workspace-server/Dockerfile.dev (air on top of +# golang:1.25-alpine) instead of the multi-stage prod Dockerfile. +# - Platform service bind-mounts the host's workspace-server/ source +# into /app/workspace-server so air sees source edits live. +# - Other services (postgres, redis, langfuse, etc.) inherit unchanged +# from docker-compose.yml. +# +# What stays the same: +# - All env vars, volumes, depends_on, healthchecks from docker-compose.yml. +# - Network topology + ports. +# - Postgres/Redis as service containers (no in-process replacements). + +services: + platform: + build: + context: . + dockerfile: workspace-server/Dockerfile.dev + # Rebind source: edits under host's workspace-server/ propagate live. + # The named volume on go-build-cache speeds up first build per container. + volumes: + - ./workspace-server:/app/workspace-server + - go-build-cache:/root/.cache/go-build + - go-mod-cache:/go/pkg/mod + # Air signals the running binary on rebuild; ensure shell stops cleanly. + init: true + # Mark the service as dev-mode so the platform can short-circuit any + # behavior that's incompatible with hot-reload (e.g. background + # cron-style watchers that don't survive process restart). No-op + # today; reserved for future flag use. + environment: + MOLECULE_DEV_HOT_RELOAD: "1" + +volumes: + go-build-cache: + go-mod-cache: diff --git a/workspace-server/.air.toml b/workspace-server/.air.toml new file mode 100644 index 00000000..6e365f3c --- /dev/null +++ b/workspace-server/.air.toml @@ -0,0 +1,49 @@ +# air.toml — live-reload config for local docker-compose dev mode. +# +# Active when the platform service runs from workspace-server/Dockerfile.dev +# (selected via docker-compose.dev.yml overlay). In production, the regular +# Dockerfile builds a static binary; air is dev-only. +# +# Reference: https://github.com/air-verse/air + +root = "." +testdata_dir = "testdata" +tmp_dir = "tmp" + +[build] + # Same build invocation as Dockerfile's builder stage minus the + # CGO_ENABLED=0 toggle (CGO ok in dev for richer race detector output). + cmd = "go build -o ./tmp/server ./cmd/server" + bin = "tmp/server" + full_bin = "" + args_bin = [] + # Watch every .go and .yaml file under workspace-server/. + include_ext = ["go", "yaml", "tmpl"] + # Don't watch tests, build artifacts, vendored deps, or migration .sql + # (migrations need a clean DB anyway — handled by docker-compose down/up). + exclude_dir = ["assets", "tmp", "vendor", "testdata", "node_modules"] + exclude_file = [] + # _test.go and *_mock.go shouldn't trigger a rebuild — saves cycles. + exclude_regex = ["_test\\.go$", "_mock\\.go$"] + exclude_unchanged = true + follow_symlink = false + log = "build-errors.log" + # Kill running binary 1s before starting new one. + kill_delay = "1s" + send_interrupt = true + stop_on_error = true + # Debounce: wait this long after last change before triggering rebuild. + delay = 500 + +[log] + time = false + +[color] + main = "magenta" + watcher = "cyan" + build = "yellow" + runner = "green" + +[misc] + # Don't keep the tmp/ dir around between runs. + clean_on_exit = true diff --git a/workspace-server/Dockerfile.dev b/workspace-server/Dockerfile.dev new file mode 100644 index 00000000..f8a0a1db --- /dev/null +++ b/workspace-server/Dockerfile.dev @@ -0,0 +1,38 @@ +# Dockerfile.dev — local-development image with air-driven live reload. +# +# Selected by docker-compose.dev.yml (overlay over docker-compose.yml). +# Production stays on workspace-server/Dockerfile (static binary, no air). +# +# Workflow: +# 1. docker compose -f docker-compose.yml -f docker-compose.dev.yml up +# 2. Edit any .go file under workspace-server/ +# 3. air detects, rebuilds, kills old binary, starts new one (~3-5s) +# 4. No `docker compose up --build` needed +# +# Templates + plugins are NOT pre-cloned here — air-mode assumes the +# developer's filesystem has the workspace-configs-templates/ + plugins/ +# dirs available, mounted at runtime via docker-compose.dev.yml. + +FROM golang:1.25-alpine + +# air + git (for go mod) + ca-certs (for TLS) + tzdata (for time-zone DB). +RUN apk add --no-cache git ca-certificates tzdata wget \ + && go install github.com/air-verse/air@latest + +WORKDIR /app/workspace-server + +# Pre-fetch deps so the first `air` rebuild on a fresh container is fast. +# These are bind-mount-overridden at runtime, so the COPY here is just +# to warm the module cache. +COPY workspace-server/go.mod workspace-server/go.sum ./ +RUN go mod download + +# Source is bind-mounted at runtime (see docker-compose.dev.yml volumes +# block) so the Dockerfile doesn't need to COPY it. air watches the +# bind-mounted dir for changes. + +ENV CGO_ENABLED=1 +ENV GOFLAGS="-buildvcs=false" + +# Run air with the .air.toml in the bind-mounted source dir. +CMD ["air", "-c", ".air.toml"] diff --git a/workspace-server/internal/handlers/org_helpers.go b/workspace-server/internal/handlers/org_helpers.go index f84baf3d..824fd2d7 100644 --- a/workspace-server/internal/handlers/org_helpers.go +++ b/workspace-server/internal/handlers/org_helpers.go @@ -6,6 +6,7 @@ package handlers import ( "fmt" + "log" "os" "path/filepath" "regexp" @@ -102,6 +103,56 @@ func loadWorkspaceEnv(orgBaseDir, filesDir string) map[string]string { return envVars } +// loadPersonaEnvFile merges per-role persona credentials into out. The file +// lives at $MOLECULE_PERSONA_ROOT//env (default +// /etc/molecule-bootstrap/personas) and is populated by the operator-host +// bootstrap kit — one persona per dev-tree role, each carrying the role's +// Gitea identity (GITEA_USER, GITEA_TOKEN, GITEA_TOKEN_SCOPES, +// GITEA_USER_EMAIL, GITEA_SSH_KEY_PATH). +// +// Lower precedence than the org and workspace .env files: callers should +// invoke this BEFORE parseEnvFile on those, so a workspace .env can +// override a persona-default value when needed. +// +// Silent no-op when role is empty, when the role name fails the safe-segment +// check, or when the env file does not exist (workspaces without a role — +// or running on hosts that don't ship the bootstrap dir — keep their old +// behavior). +func loadPersonaEnvFile(role string, out map[string]string) { + if !isSafeRoleName(role) { + if role != "" { + log.Printf("Org import: refusing persona env load for unsafe role name %q", role) + } + return + } + root := os.Getenv("MOLECULE_PERSONA_ROOT") + if root == "" { + root = "/etc/molecule-bootstrap/personas" + } + parseEnvFile(filepath.Join(root, role, "env"), out) +} + +// isSafeRoleName accepts a single path segment of [A-Za-z0-9_-]+. Rejects +// empty, ".", "..", and anything containing a path separator — even though +// the construct is admin-only, defense-in-depth keeps the persona dir +// shape invariant: one flat directory per role, no climbing out. +func isSafeRoleName(s string) bool { + if s == "" || s == "." || s == ".." { + return false + } + for _, c := range s { + switch { + case c >= 'a' && c <= 'z': + case c >= 'A' && c <= 'Z': + case c >= '0' && c <= '9': + case c == '-' || c == '_': + default: + return false + } + } + return true +} + // parseEnvFile reads a .env file and adds KEY=VALUE pairs to the map. // Skips comments (#) and empty lines. Values can be quoted. func parseEnvFile(path string, out map[string]string) { diff --git a/workspace-server/internal/handlers/org_import.go b/workspace-server/internal/handlers/org_import.go index d67087ca..e3be5823 100644 --- a/workspace-server/internal/handlers/org_import.go +++ b/workspace-server/internal/handlers/org_import.go @@ -443,10 +443,18 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX configFiles["system-prompt.md"] = []byte(ws.SystemPrompt) } - // Inject secrets from .env files as workspace secrets. - // Resolution: workspace .env → org root .env (workspace overrides org root). + // Inject secrets from persona env + .env files as workspace secrets. + // Resolution (later overrides earlier): + // 0. Persona env (per-role bootstrap creds; only when ws.Role is set + // and the operator-host bootstrap dir ships a matching file) + // 1. Org root .env (shared defaults) + // 2. Workspace-specific .env (per-workspace overrides) // Each line: KEY=VALUE → stored as encrypted workspace secret. envVars := map[string]string{} + // 0. Persona env (lowest precedence; injects the role's Gitea identity: + // GITEA_USER, GITEA_TOKEN, GITEA_TOKEN_SCOPES, GITEA_USER_EMAIL, + // GITEA_SSH_KEY_PATH). Workspace and org .env can override. + loadPersonaEnvFile(ws.Role, envVars) if orgBaseDir != "" { // 1. Org root .env (shared defaults) parseEnvFile(filepath.Join(orgBaseDir, ".env"), envVars) diff --git a/workspace-server/internal/handlers/org_persona_env_test.go b/workspace-server/internal/handlers/org_persona_env_test.go new file mode 100644 index 00000000..0c3bad59 --- /dev/null +++ b/workspace-server/internal/handlers/org_persona_env_test.go @@ -0,0 +1,171 @@ +package handlers + +import ( + "os" + "path/filepath" + "testing" +) + +// TestLoadPersonaEnvFile_HappyPath: the standard case — a persona-shaped +// env file exists at //env and its KEY=VALUE pairs land in +// the out map. Mirrors what the operator-host bootstrap kit ships: +// GITEA_USER, GITEA_TOKEN, GITEA_TOKEN_SCOPES, GITEA_USER_EMAIL, +// GITEA_SSH_KEY_PATH. +func TestLoadPersonaEnvFile_HappyPath(t *testing.T) { + root := t.TempDir() + roleDir := filepath.Join(root, "dev-lead") + if err := os.MkdirAll(roleDir, 0o755); err != nil { + t.Fatal(err) + } + envBody := `# Persona env file — mode 600 +GITEA_USER=dev-lead +GITEA_USER_EMAIL=dev-lead@agents.moleculesai.app +GITEA_TOKEN=abc123 +GITEA_TOKEN_SCOPES=write:repository,write:issue,read:user +GITEA_SSH_KEY_PATH=/etc/molecule-bootstrap/personas/dev-lead/ssh_priv +` + if err := os.WriteFile(filepath.Join(roleDir, "env"), []byte(envBody), 0o600); err != nil { + t.Fatal(err) + } + t.Setenv("MOLECULE_PERSONA_ROOT", root) + + out := map[string]string{} + loadPersonaEnvFile("dev-lead", out) + + want := map[string]string{ + "GITEA_USER": "dev-lead", + "GITEA_USER_EMAIL": "dev-lead@agents.moleculesai.app", + "GITEA_TOKEN": "abc123", + "GITEA_TOKEN_SCOPES": "write:repository,write:issue,read:user", + "GITEA_SSH_KEY_PATH": "/etc/molecule-bootstrap/personas/dev-lead/ssh_priv", + } + if len(out) != len(want) { + t.Fatalf("got %d keys, want %d: %#v", len(out), len(want), out) + } + for k, v := range want { + if out[k] != v { + t.Errorf("out[%q] = %q; want %q", k, out[k], v) + } + } +} + +// TestLoadPersonaEnvFile_MissingDir: when the persona dir doesn't exist +// (e.g. dev-only host without the bootstrap kit, or a workspace whose +// role isn't a known persona), it's a silent no-op — out stays empty, +// no panic, no log noise that would break callers. +func TestLoadPersonaEnvFile_MissingDir(t *testing.T) { + t.Setenv("MOLECULE_PERSONA_ROOT", t.TempDir()) // empty dir + out := map[string]string{} + loadPersonaEnvFile("nonexistent-role", out) + if len(out) != 0 { + t.Errorf("expected empty out, got %#v", out) + } +} + +// TestLoadPersonaEnvFile_EmptyRole: empty role string is the common case +// for non-dev workspaces (research/marketing/etc.). Skip silently. +func TestLoadPersonaEnvFile_EmptyRole(t *testing.T) { + t.Setenv("MOLECULE_PERSONA_ROOT", t.TempDir()) + out := map[string]string{} + loadPersonaEnvFile("", out) + if len(out) != 0 { + t.Errorf("empty role should produce empty out; got %#v", out) + } +} + +// TestLoadPersonaEnvFile_RejectsTraversal: even though role names come +// from server-side admin-only org templates, defense-in-depth — refuse +// any role string with path separators or "..". Verifies that a maliciously +// crafted template can't read /etc/passwd by setting role: "../../etc". +func TestLoadPersonaEnvFile_RejectsTraversal(t *testing.T) { + root := t.TempDir() + // Plant a file at /tmp/.../env so a bad traversal would reach it + if err := os.WriteFile(filepath.Join(root, "env"), []byte("STOLEN=yes\n"), 0o600); err != nil { + t.Fatal(err) + } + t.Setenv("MOLECULE_PERSONA_ROOT", filepath.Join(root, "personas")) + + for _, bad := range []string{"..", "../personas", "../etc/passwd", "/abs", "with/slash", "dot.in.middle", "with space", "back\\slash", ".", ""} { + out := map[string]string{} + loadPersonaEnvFile(bad, out) + if len(out) != 0 { + t.Errorf("role %q should have been rejected; got %#v", bad, out) + } + } +} + +// TestLoadPersonaEnvFile_DefaultRoot: when MOLECULE_PERSONA_ROOT is unset, +// the helper falls back to /etc/molecule-bootstrap/personas. We don't +// touch real /etc — just verify the function doesn't panic and produces +// empty out (since the test box isn't expected to ship that path). +func TestLoadPersonaEnvFile_DefaultRoot(t *testing.T) { + t.Setenv("MOLECULE_PERSONA_ROOT", "") // explicit empty + out := map[string]string{} + loadPersonaEnvFile("dev-lead", out) + // Don't assert content — production CI might or might not have the + // /etc dir mounted. Just verify the call returns cleanly. + _ = out +} + +// TestLoadPersonaEnvFile_PrecedenceCallerOverrides: the contract is "lower +// precedence than later .env files." The helper writes into out without +// removing existing keys, so a caller pre-populating out simulates a +// later layer overriding persona defaults. We verify the helper does NOT +// clobber pre-existing entries… actually, parseEnvFile DOES overwrite, +// so the caller-side ordering (persona → org → workspace) is what enforces +// precedence. This test pins that contract: persona is loaded into a +// fresh map, then later layers can override. +func TestLoadPersonaEnvFile_OverwritesEmptyMap(t *testing.T) { + root := t.TempDir() + roleDir := filepath.Join(root, "core-be") + if err := os.MkdirAll(roleDir, 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(roleDir, "env"), + []byte("GITEA_TOKEN=persona-value\n"), 0o600); err != nil { + t.Fatal(err) + } + t.Setenv("MOLECULE_PERSONA_ROOT", root) + + out := map[string]string{"GITEA_TOKEN": "preset"} + loadPersonaEnvFile("core-be", out) + + // Persona helper is meant to populate a FRESH map first in the + // caller's flow; calling it on a pre-populated map and seeing the + // value get overwritten is consistent with parseEnvFile semantics. + if out["GITEA_TOKEN"] != "persona-value" { + t.Errorf("loadPersonaEnvFile did not write into existing map; got %q", out["GITEA_TOKEN"]) + } +} + +// TestIsSafeRoleName_Acceptance: positive + negative cases for the +// validator. Pinned because every dev-tree role name must pass. +func TestIsSafeRoleName_Acceptance(t *testing.T) { + good := []string{ + "dev-lead", "core-be", "cp-security", "infra-runtime-be", + "sdk-dev", "plugin-dev", "documentation-specialist", + "triage-operator", "fullstack-engineer", "release-manager", + "core_underscore_ok", "X", "a1", "Z9-0", + } + for _, s := range good { + if !isSafeRoleName(s) { + t.Errorf("isSafeRoleName(%q) = false; want true", s) + } + } + bad := []string{ + "", ".", "..", "with/slash", "/abs", "dot.in.middle", + "with space", "back\\slash", "trailing-", // trailing-hyphen is fine actually + "with$dollar", "with?question", "newline\nsplit", + } + // trailing-hyphen IS allowed; remove from "bad" list: + bad = []string{ + "", ".", "..", "with/slash", "/abs", "dot.in.middle", + "with space", "back\\slash", "with$dollar", "with?question", + "newline\nsplit", + } + for _, s := range bad { + if isSafeRoleName(s) { + t.Errorf("isSafeRoleName(%q) = true; want false", s) + } + } +} diff --git a/workspace-server/internal/pendinguploads/sweeper_test.go b/workspace-server/internal/pendinguploads/sweeper_test.go index 4133125d..8095e83d 100644 --- a/workspace-server/internal/pendinguploads/sweeper_test.go +++ b/workspace-server/internal/pendinguploads/sweeper_test.go @@ -207,20 +207,25 @@ func TestStartSweeper_TransientErrorDoesNotCrashLoop(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - // 50ms ticker so the second cycle fires quickly enough for the test. - // We re-export SweepInterval as a const, but tests use the public - // StartSweeper that takes its own interval — wait, the public - // StartSweeper signature uses the package-level SweepInterval. Hmm, - // this means the test takes ~5 minutes. Let me reconsider. - // - // (We patch the test below to just look at the immediate-sweep call - // + an error path, since the immediate call is enough to prove the - // "error doesn't crash" contract — the loop continues afterward - // regardless of timing.) + // Capture metric baseline so we can wait for the error counter to + // settle before returning — otherwise this test's leaked metric + // write races with the next test's metricDelta() baseline read and + // causes a non-deterministic +1 leak (manifests as + // TestStartSweeper_RecordsMetricsOnSuccess: "error counter delta=1, + // want 0"). cycleDone fires inside the fake's Sweep defer, BEFORE + // sweepOnce records the error metric — so cancel() right after + // waitForCycle is too early. + _, _, deltaError := metricDelta(t) + go pendinguploads.StartSweeper(ctx, store, time.Hour) // Wait for the first (errored) cycle. store.waitForCycle(t, 1, 2*time.Second) + // Wait for the goroutine to record the error metric. After this + // returns, sweepOnce has fully completed and a subsequent cancel() + // stops the loop on the next select pass with no in-flight metric + // writes outstanding. + waitForMetricDelta(t, deltaError, 1, 2*time.Second) // Cancel — the goroutine returns cleanly, proving the error path // didn't crash the loop. Without this fix the goroutine would have // either panicked (process abort visible at exit) or stuck (this