diff --git a/platform/internal/handlers/workspace.go b/platform/internal/handlers/workspace.go index fafca3f8..8fea5c01 100644 --- a/platform/internal/handlers/workspace.go +++ b/platform/internal/handlers/workspace.go @@ -15,6 +15,7 @@ import ( "github.com/Molecule-AI/molecule-monorepo/platform/internal/models" "github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner" "github.com/Molecule-AI/molecule-monorepo/platform/internal/wsauth" + "github.com/Molecule-AI/molecule-monorepo/platform/pkg/provisionhook" "github.com/gin-gonic/gin" "github.com/lib/pq" "github.com/google/uuid" @@ -25,6 +26,11 @@ type WorkspaceHandler struct { provisioner *provisioner.Provisioner platformURL string configsDir string // path to workspace-configs-templates/ (for reading templates) + // envMutators runs registered EnvMutator plugins right before + // container Start, after built-in secret loads. Nil = no plugins + // registered; Registry.Run handles a nil receiver as a no-op so the + // hot path stays a single nil-pointer compare. + envMutators *provisionhook.Registry } func NewWorkspaceHandler(b *events.Broadcaster, p *provisioner.Provisioner, platformURL, configsDir string) *WorkspaceHandler { @@ -36,6 +42,15 @@ func NewWorkspaceHandler(b *events.Broadcaster, p *provisioner.Provisioner, plat } } +// SetEnvMutators wires a provisionhook.Registry into the handler. Plugins +// living in separate repos register on the same Registry instance during +// boot (see cmd/server/main.go) and main.go calls this setter once before +// router.Setup. Re-callable for tests but not safe under concurrent +// provisions — only invoke during single-threaded init. +func (h *WorkspaceHandler) SetEnvMutators(r *provisionhook.Registry) { + h.envMutators = r +} + // Create handles POST /workspaces func (h *WorkspaceHandler) Create(c *gin.Context) { var payload models.CreateWorkspacePayload diff --git a/platform/internal/handlers/workspace_provision.go b/platform/internal/handlers/workspace_provision.go index 3583c5c4..73fc3fe5 100644 --- a/platform/internal/handlers/workspace_provision.go +++ b/platform/internal/handlers/workspace_provision.go @@ -95,6 +95,25 @@ func (h *WorkspaceHandler) provisionWorkspaceOpts(workspaceID, templatePath stri // workspace_secret named GIT_AUTHOR_NAME if they want custom identity. applyAgentGitIdentity(envVars, payload.Name) + // Plugin extension point: run any registered EnvMutators (e.g. + // github-app-auth, vault-secrets) AFTER built-in identity injection so + // plugins can override or augment GIT_AUTHOR_*, GITHUB_TOKEN, etc. + // A failure here aborts provisioning — a missing GitHub App token + // would manifest later as opaque "git push 401" loops, and the agent + // never recovers. Failing fast here surfaces the cause to the operator. + if err := h.envMutators.Run(ctx, workspaceID, envVars); err != nil { + log.Printf("Provisioner: env mutator chain failed for %s: %v", workspaceID, err) + h.broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_PROVISION_FAILED", workspaceID, map[string]interface{}{ + "error": err.Error(), + }) + if _, dbErr := db.DB.ExecContext(ctx, + `UPDATE workspaces SET status = 'failed', last_sample_error = $2, updated_at = now() WHERE id = $1`, + workspaceID, err.Error()); dbErr != nil { + log.Printf("Provisioner: failed to mark workspace %s as failed after mutator error: %v", workspaceID, dbErr) + } + return + } + cfg := h.buildProvisionerConfig(workspaceID, templatePath, configFiles, payload, envVars, pluginsPath, awarenessNamespace) cfg.ResetClaudeSession = resetClaudeSession // #12 diff --git a/platform/pkg/provisionhook/mutator.go b/platform/pkg/provisionhook/mutator.go new file mode 100644 index 00000000..16b8a439 --- /dev/null +++ b/platform/pkg/provisionhook/mutator.go @@ -0,0 +1,137 @@ +// Package provisionhook is the public extension point that lets external +// plugins mutate the env map a workspace container will boot with, just +// before the provisioner calls Start(cfg). +// +// The package lives under pkg/ (not internal/) because plugins import it +// from outside this Go module. Anything outside pkg/ is core-only. +// +// # Why this exists +// +// Auth providers (GitHub App tokens, GitLab tokens, Bitbucket app +// passwords, internal PAT vaults), secret managers (Vault, AWS Secrets +// Manager, GCP Secret Manager), per-tenant config injectors, and +// observability sidecars all want to write env vars into the workspace +// container before it starts. Each is an OPTIONAL concern that only some +// deployments need. Hardcoding any of them in the platform binary +// violates the "core stays small, capabilities are plugins" principle +// (CEO 2026-04-16, after the monorepo → 44 sub-repos split). +// +// # Plugin shape +// +// A plugin implements EnvMutator and registers an instance with a +// Registry at platform startup. The provisioner calls Run(...) on the +// registry before each workspace container starts. +// +// Plugins live in their own Go modules + repos (e.g. +// github.com/Molecule-AI/molecule-ai-plugin-github-app-auth). Each +// plugin ships its own cmd/server/main.go that imports core's startup +// function + registers the plugin's mutator. Operators deploy the +// plugin binary instead of core's vanilla cmd/server when they want +// the plugin's behaviour. +// +// # Failure handling +// +// MutateEnv returning a non-nil error aborts the provision (workspace +// is marked 'failed', container never starts). Plugins should fail open +// on transient external-service errors (log + return nil) so a flaky +// upstream doesn't block agent provisioning. Reserve errors for hard +// config bugs that the operator must fix. +// +// # Concurrency +// +// Registry is safe for concurrent registration + execution. MutateEnv +// implementations should be safe to call from goroutines (the +// provisioner runs each workspace's provision in its own goroutine). +package provisionhook + +import ( + "context" + "fmt" + "sync" +) + +// EnvMutator is implemented by plugins that want to inject env vars +// into a workspace container at provision time. +// +// - Name returns a stable identifier for logging / metrics. Should +// match the plugin's repo / module name (e.g. "github-app-auth"). +// - MutateEnv receives the workspace ID, the create payload, and a +// mutable env map. It can read existing values, add new ones, or +// overwrite as needed. Mutations are visible to subsequent +// mutators in the chain (registration order). +type EnvMutator interface { + Name() string + MutateEnv(ctx context.Context, workspaceID string, env map[string]string) error +} + +// Registry holds the ordered list of EnvMutator instances the +// provisioner runs before each workspace boot. Safe for concurrent +// registration + execution. +type Registry struct { + mu sync.RWMutex + mutators []EnvMutator +} + +// NewRegistry returns an empty registry. The platform creates one at +// startup; plugins call Register on it. +func NewRegistry() *Registry { + return &Registry{} +} + +// Register adds a mutator to the chain. Mutators run in registration +// order. Registering the same instance twice is allowed (it'll run +// twice) — the registry doesn't dedupe; that's the caller's +// responsibility if dedup matters. +func (r *Registry) Register(m EnvMutator) { + if m == nil { + return + } + r.mu.Lock() + defer r.mu.Unlock() + r.mutators = append(r.mutators, m) +} + +// Len reports how many mutators are registered. Used by the platform's +// boot log so operators can see which extension hooks are wired. +func (r *Registry) Len() int { + r.mu.RLock() + defer r.mu.RUnlock() + return len(r.mutators) +} + +// Names returns the names of registered mutators in registration order. +// Used by the boot log so operators can grep for which plugins are +// active. +func (r *Registry) Names() []string { + r.mu.RLock() + defer r.mu.RUnlock() + names := make([]string, len(r.mutators)) + for i, m := range r.mutators { + names[i] = m.Name() + } + return names +} + +// Run calls every registered mutator in order. The first one to return +// a non-nil error aborts the chain — subsequent mutators do NOT run, +// and the error is returned to the caller (which marks the workspace +// failed). +// +// A nil registry is a no-op (returns nil) so the provisioner doesn't +// have to nil-check before calling. +func (r *Registry) Run(ctx context.Context, workspaceID string, env map[string]string) error { + if r == nil { + return nil + } + r.mu.RLock() + mutators := make([]EnvMutator, len(r.mutators)) + copy(mutators, r.mutators) + r.mu.RUnlock() + + for _, m := range mutators { + if err := m.MutateEnv(ctx, workspaceID, env); err != nil { + return fmt.Errorf("provisionhook %q: %w", m.Name(), err) + } + } + return nil +} diff --git a/platform/pkg/provisionhook/mutator_test.go b/platform/pkg/provisionhook/mutator_test.go new file mode 100644 index 00000000..f81daf11 --- /dev/null +++ b/platform/pkg/provisionhook/mutator_test.go @@ -0,0 +1,175 @@ +package provisionhook + +import ( + "context" + "errors" + "sync" + "testing" +) + +// fakeMutator is a test stand-in. Records what MutateEnv received + +// optionally returns a configured error or modifies the env map. +type fakeMutator struct { + name string + mu sync.Mutex + calls int + lastEnv map[string]string + lastWS string + returnErr error + envToInject map[string]string +} + +func (f *fakeMutator) Name() string { return f.name } + +func (f *fakeMutator) MutateEnv(ctx context.Context, workspaceID string, env map[string]string) error { + f.mu.Lock() + defer f.mu.Unlock() + f.calls++ + f.lastWS = workspaceID + f.lastEnv = env + for k, v := range f.envToInject { + env[k] = v + } + return f.returnErr +} + +func TestRegistry_RunsMutatorsInOrder(t *testing.T) { + r := NewRegistry() + a := &fakeMutator{name: "a", envToInject: map[string]string{"A": "1"}} + b := &fakeMutator{name: "b", envToInject: map[string]string{"B": "2"}} + r.Register(a) + r.Register(b) + + env := map[string]string{} + if err := r.Run(context.Background(), "ws-1", env); err != nil { + t.Fatal(err) + } + if env["A"] != "1" || env["B"] != "2" { + t.Errorf("env mutations not applied: %v", env) + } + if a.calls != 1 || b.calls != 1 { + t.Errorf("call counts: a=%d b=%d", a.calls, b.calls) + } +} + +func TestRegistry_LaterMutatorSeesEarlierMutations(t *testing.T) { + // Mutators run in chain — second mutator should see the env first + // mutator added. This is the entire point of running them in order + // (e.g. a secret-resolver plugin can depend on a tenant-config + // plugin running first). + r := NewRegistry() + r.Register(&fakeMutator{name: "first", envToInject: map[string]string{"TENANT": "acme"}}) + saw := "" + r.Register(&envInspector{onCall: func(env map[string]string) { saw = env["TENANT"] }}) + + env := map[string]string{} + _ = r.Run(context.Background(), "ws-1", env) + if saw != "acme" { + t.Errorf("second mutator should have seen TENANT=acme, saw %q", saw) + } +} + +func TestRegistry_FirstErrorAbortsChain(t *testing.T) { + r := NewRegistry() + a := &fakeMutator{name: "a", returnErr: errors.New("boom")} + b := &fakeMutator{name: "b"} + r.Register(a) + r.Register(b) + + err := r.Run(context.Background(), "ws-1", map[string]string{}) + if err == nil { + t.Fatal("expected error from first mutator to propagate") + } + if b.calls != 0 { + t.Errorf("second mutator should not run after first errors; got %d calls", b.calls) + } + // Error should be wrapped with the mutator name so logs say which + // plugin failed, not just the underlying error. + if !contains(err.Error(), `provisionhook "a"`) { + t.Errorf("error should name the failing mutator: %v", err) + } +} + +func TestRegistry_NilReceiverIsNoop(t *testing.T) { + // The provisioner shouldn't have to nil-check before calling. + var r *Registry + if err := r.Run(context.Background(), "ws-1", map[string]string{}); err != nil { + t.Errorf("nil registry should return nil error: %v", err) + } +} + +func TestRegistry_NilMutatorIsIgnored(t *testing.T) { + r := NewRegistry() + r.Register(nil) + r.Register(&fakeMutator{name: "real"}) + if r.Len() != 1 { + t.Errorf("nil mutator should have been dropped; len=%d", r.Len()) + } +} + +func TestRegistry_NamesReturnsRegistrationOrder(t *testing.T) { + r := NewRegistry() + r.Register(&fakeMutator{name: "tenant-config"}) + r.Register(&fakeMutator{name: "github-app-auth"}) + r.Register(&fakeMutator{name: "vault-secrets"}) + got := r.Names() + want := []string{"tenant-config", "github-app-auth", "vault-secrets"} + if !equalSlices(got, want) { + t.Errorf("names: got %v, want %v", got, want) + } +} + +func TestRegistry_ConcurrentRegisterAndRun(t *testing.T) { + // Sanity: the mutex prevents data races between registration + + // execution. Run with `go test -race`. + r := NewRegistry() + var wg sync.WaitGroup + for i := 0; i < 50; i++ { + wg.Add(2) + go func(i int) { + defer wg.Done() + r.Register(&fakeMutator{name: "concurrent"}) + }(i) + go func() { + defer wg.Done() + _ = r.Run(context.Background(), "ws-x", map[string]string{}) + }() + } + wg.Wait() + if r.Len() != 50 { + t.Errorf("expected 50 registered mutators, got %d", r.Len()) + } +} + +// envInspector is a tiny mutator that calls a callback on each +// invocation. Used by TestRegistry_LaterMutatorSeesEarlierMutations. +type envInspector struct { + onCall func(env map[string]string) +} + +func (e *envInspector) Name() string { return "inspector" } +func (e *envInspector) MutateEnv(_ context.Context, _ string, env map[string]string) error { + e.onCall(env) + return nil +} + +func contains(haystack, needle string) bool { + for i := 0; i+len(needle) <= len(haystack); i++ { + if haystack[i:i+len(needle)] == needle { + return true + } + } + return false +} + +func equalSlices(a, b []string) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +}