feat(platform): provision-time env mutator hook for plugins

Add `provisionhook.EnvMutator` extension point so out-of-tree plugins
(e.g. github-app-auth, vault-secrets) can inject or override env vars
right before container Start, without forking core or piling more
provider-specific code into the handlers package.

WorkspaceHandler gains an optional `envMutators *provisionhook.Registry`
wired in via SetEnvMutators during boot. The hook fires after built-in
secret loads + per-agent git identity, so plugins can both read what's
already there and override anything they own (GIT_AUTHOR_*, GITHUB_TOKEN).

A nil registry is a no-op via Registry.Run's nil-receiver branch — keeps
the hot path a single nil compare and means existing flows stay green
even with zero plugins registered.

Mutator failure aborts provisioning and marks the workspace failed with
the wrapped error in last_sample_error. Failing fast surfaces the cause
to the operator instead of letting an agent boot into opaque "git push
401" loops it can never recover from on its own.

Tests cover ordered execution, chained env visibility, first-error abort,
nil-receiver no-op, nil-mutator drop, registration order, and concurrent
register-vs-run safety (-race clean).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
rabbitblood 2026-04-16 06:15:36 -07:00
parent 1a85ca7656
commit e08f28c962
4 changed files with 346 additions and 0 deletions

View File

@ -15,6 +15,7 @@ import (
"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/wsauth"
"github.com/Molecule-AI/molecule-monorepo/platform/pkg/provisionhook"
"github.com/gin-gonic/gin"
"github.com/lib/pq"
"github.com/google/uuid"
@ -25,6 +26,11 @@ type WorkspaceHandler struct {
provisioner *provisioner.Provisioner
platformURL string
configsDir string // path to workspace-configs-templates/ (for reading templates)
// envMutators runs registered EnvMutator plugins right before
// container Start, after built-in secret loads. Nil = no plugins
// registered; Registry.Run handles a nil receiver as a no-op so the
// hot path stays a single nil-pointer compare.
envMutators *provisionhook.Registry
}
func NewWorkspaceHandler(b *events.Broadcaster, p *provisioner.Provisioner, platformURL, configsDir string) *WorkspaceHandler {
@ -36,6 +42,15 @@ func NewWorkspaceHandler(b *events.Broadcaster, p *provisioner.Provisioner, plat
}
}
// SetEnvMutators wires a provisionhook.Registry into the handler. Plugins
// living in separate repos register on the same Registry instance during
// boot (see cmd/server/main.go) and main.go calls this setter once before
// router.Setup. Re-callable for tests but not safe under concurrent
// provisions — only invoke during single-threaded init.
func (h *WorkspaceHandler) SetEnvMutators(r *provisionhook.Registry) {
h.envMutators = r
}
// Create handles POST /workspaces
func (h *WorkspaceHandler) Create(c *gin.Context) {
var payload models.CreateWorkspacePayload

View File

@ -95,6 +95,25 @@ func (h *WorkspaceHandler) provisionWorkspaceOpts(workspaceID, templatePath stri
// workspace_secret named GIT_AUTHOR_NAME if they want custom identity.
applyAgentGitIdentity(envVars, payload.Name)
// Plugin extension point: run any registered EnvMutators (e.g.
// github-app-auth, vault-secrets) AFTER built-in identity injection so
// plugins can override or augment GIT_AUTHOR_*, GITHUB_TOKEN, etc.
// A failure here aborts provisioning — a missing GitHub App token
// would manifest later as opaque "git push 401" loops, and the agent
// never recovers. Failing fast here surfaces the cause to the operator.
if err := h.envMutators.Run(ctx, workspaceID, envVars); err != nil {
log.Printf("Provisioner: env mutator chain failed for %s: %v", workspaceID, err)
h.broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_PROVISION_FAILED", workspaceID, map[string]interface{}{
"error": err.Error(),
})
if _, dbErr := db.DB.ExecContext(ctx,
`UPDATE workspaces SET status = 'failed', last_sample_error = $2, updated_at = now() WHERE id = $1`,
workspaceID, err.Error()); dbErr != nil {
log.Printf("Provisioner: failed to mark workspace %s as failed after mutator error: %v", workspaceID, dbErr)
}
return
}
cfg := h.buildProvisionerConfig(workspaceID, templatePath, configFiles, payload, envVars, pluginsPath, awarenessNamespace)
cfg.ResetClaudeSession = resetClaudeSession // #12

View File

@ -0,0 +1,137 @@
// Package provisionhook is the public extension point that lets external
// plugins mutate the env map a workspace container will boot with, just
// before the provisioner calls Start(cfg).
//
// The package lives under pkg/ (not internal/) because plugins import it
// from outside this Go module. Anything outside pkg/ is core-only.
//
// # Why this exists
//
// Auth providers (GitHub App tokens, GitLab tokens, Bitbucket app
// passwords, internal PAT vaults), secret managers (Vault, AWS Secrets
// Manager, GCP Secret Manager), per-tenant config injectors, and
// observability sidecars all want to write env vars into the workspace
// container before it starts. Each is an OPTIONAL concern that only some
// deployments need. Hardcoding any of them in the platform binary
// violates the "core stays small, capabilities are plugins" principle
// (CEO 2026-04-16, after the monorepo → 44 sub-repos split).
//
// # Plugin shape
//
// A plugin implements EnvMutator and registers an instance with a
// Registry at platform startup. The provisioner calls Run(...) on the
// registry before each workspace container starts.
//
// Plugins live in their own Go modules + repos (e.g.
// github.com/Molecule-AI/molecule-ai-plugin-github-app-auth). Each
// plugin ships its own cmd/server/main.go that imports core's startup
// function + registers the plugin's mutator. Operators deploy the
// plugin binary instead of core's vanilla cmd/server when they want
// the plugin's behaviour.
//
// # Failure handling
//
// MutateEnv returning a non-nil error aborts the provision (workspace
// is marked 'failed', container never starts). Plugins should fail open
// on transient external-service errors (log + return nil) so a flaky
// upstream doesn't block agent provisioning. Reserve errors for hard
// config bugs that the operator must fix.
//
// # Concurrency
//
// Registry is safe for concurrent registration + execution. MutateEnv
// implementations should be safe to call from goroutines (the
// provisioner runs each workspace's provision in its own goroutine).
package provisionhook
import (
"context"
"fmt"
"sync"
)
// EnvMutator is implemented by plugins that want to inject env vars
// into a workspace container at provision time.
//
// - Name returns a stable identifier for logging / metrics. Should
// match the plugin's repo / module name (e.g. "github-app-auth").
// - MutateEnv receives the workspace ID, the create payload, and a
// mutable env map. It can read existing values, add new ones, or
// overwrite as needed. Mutations are visible to subsequent
// mutators in the chain (registration order).
type EnvMutator interface {
Name() string
MutateEnv(ctx context.Context, workspaceID string, env map[string]string) error
}
// Registry holds the ordered list of EnvMutator instances the
// provisioner runs before each workspace boot. Safe for concurrent
// registration + execution.
type Registry struct {
mu sync.RWMutex
mutators []EnvMutator
}
// NewRegistry returns an empty registry. The platform creates one at
// startup; plugins call Register on it.
func NewRegistry() *Registry {
return &Registry{}
}
// Register adds a mutator to the chain. Mutators run in registration
// order. Registering the same instance twice is allowed (it'll run
// twice) — the registry doesn't dedupe; that's the caller's
// responsibility if dedup matters.
func (r *Registry) Register(m EnvMutator) {
if m == nil {
return
}
r.mu.Lock()
defer r.mu.Unlock()
r.mutators = append(r.mutators, m)
}
// Len reports how many mutators are registered. Used by the platform's
// boot log so operators can see which extension hooks are wired.
func (r *Registry) Len() int {
r.mu.RLock()
defer r.mu.RUnlock()
return len(r.mutators)
}
// Names returns the names of registered mutators in registration order.
// Used by the boot log so operators can grep for which plugins are
// active.
func (r *Registry) Names() []string {
r.mu.RLock()
defer r.mu.RUnlock()
names := make([]string, len(r.mutators))
for i, m := range r.mutators {
names[i] = m.Name()
}
return names
}
// Run calls every registered mutator in order. The first one to return
// a non-nil error aborts the chain — subsequent mutators do NOT run,
// and the error is returned to the caller (which marks the workspace
// failed).
//
// A nil registry is a no-op (returns nil) so the provisioner doesn't
// have to nil-check before calling.
func (r *Registry) Run(ctx context.Context, workspaceID string, env map[string]string) error {
if r == nil {
return nil
}
r.mu.RLock()
mutators := make([]EnvMutator, len(r.mutators))
copy(mutators, r.mutators)
r.mu.RUnlock()
for _, m := range mutators {
if err := m.MutateEnv(ctx, workspaceID, env); err != nil {
return fmt.Errorf("provisionhook %q: %w", m.Name(), err)
}
}
return nil
}

View File

@ -0,0 +1,175 @@
package provisionhook
import (
"context"
"errors"
"sync"
"testing"
)
// fakeMutator is a test stand-in. Records what MutateEnv received +
// optionally returns a configured error or modifies the env map.
type fakeMutator struct {
name string
mu sync.Mutex
calls int
lastEnv map[string]string
lastWS string
returnErr error
envToInject map[string]string
}
func (f *fakeMutator) Name() string { return f.name }
func (f *fakeMutator) MutateEnv(ctx context.Context, workspaceID string, env map[string]string) error {
f.mu.Lock()
defer f.mu.Unlock()
f.calls++
f.lastWS = workspaceID
f.lastEnv = env
for k, v := range f.envToInject {
env[k] = v
}
return f.returnErr
}
func TestRegistry_RunsMutatorsInOrder(t *testing.T) {
r := NewRegistry()
a := &fakeMutator{name: "a", envToInject: map[string]string{"A": "1"}}
b := &fakeMutator{name: "b", envToInject: map[string]string{"B": "2"}}
r.Register(a)
r.Register(b)
env := map[string]string{}
if err := r.Run(context.Background(), "ws-1", env); err != nil {
t.Fatal(err)
}
if env["A"] != "1" || env["B"] != "2" {
t.Errorf("env mutations not applied: %v", env)
}
if a.calls != 1 || b.calls != 1 {
t.Errorf("call counts: a=%d b=%d", a.calls, b.calls)
}
}
func TestRegistry_LaterMutatorSeesEarlierMutations(t *testing.T) {
// Mutators run in chain — second mutator should see the env first
// mutator added. This is the entire point of running them in order
// (e.g. a secret-resolver plugin can depend on a tenant-config
// plugin running first).
r := NewRegistry()
r.Register(&fakeMutator{name: "first", envToInject: map[string]string{"TENANT": "acme"}})
saw := ""
r.Register(&envInspector{onCall: func(env map[string]string) { saw = env["TENANT"] }})
env := map[string]string{}
_ = r.Run(context.Background(), "ws-1", env)
if saw != "acme" {
t.Errorf("second mutator should have seen TENANT=acme, saw %q", saw)
}
}
func TestRegistry_FirstErrorAbortsChain(t *testing.T) {
r := NewRegistry()
a := &fakeMutator{name: "a", returnErr: errors.New("boom")}
b := &fakeMutator{name: "b"}
r.Register(a)
r.Register(b)
err := r.Run(context.Background(), "ws-1", map[string]string{})
if err == nil {
t.Fatal("expected error from first mutator to propagate")
}
if b.calls != 0 {
t.Errorf("second mutator should not run after first errors; got %d calls", b.calls)
}
// Error should be wrapped with the mutator name so logs say which
// plugin failed, not just the underlying error.
if !contains(err.Error(), `provisionhook "a"`) {
t.Errorf("error should name the failing mutator: %v", err)
}
}
func TestRegistry_NilReceiverIsNoop(t *testing.T) {
// The provisioner shouldn't have to nil-check before calling.
var r *Registry
if err := r.Run(context.Background(), "ws-1", map[string]string{}); err != nil {
t.Errorf("nil registry should return nil error: %v", err)
}
}
func TestRegistry_NilMutatorIsIgnored(t *testing.T) {
r := NewRegistry()
r.Register(nil)
r.Register(&fakeMutator{name: "real"})
if r.Len() != 1 {
t.Errorf("nil mutator should have been dropped; len=%d", r.Len())
}
}
func TestRegistry_NamesReturnsRegistrationOrder(t *testing.T) {
r := NewRegistry()
r.Register(&fakeMutator{name: "tenant-config"})
r.Register(&fakeMutator{name: "github-app-auth"})
r.Register(&fakeMutator{name: "vault-secrets"})
got := r.Names()
want := []string{"tenant-config", "github-app-auth", "vault-secrets"}
if !equalSlices(got, want) {
t.Errorf("names: got %v, want %v", got, want)
}
}
func TestRegistry_ConcurrentRegisterAndRun(t *testing.T) {
// Sanity: the mutex prevents data races between registration +
// execution. Run with `go test -race`.
r := NewRegistry()
var wg sync.WaitGroup
for i := 0; i < 50; i++ {
wg.Add(2)
go func(i int) {
defer wg.Done()
r.Register(&fakeMutator{name: "concurrent"})
}(i)
go func() {
defer wg.Done()
_ = r.Run(context.Background(), "ws-x", map[string]string{})
}()
}
wg.Wait()
if r.Len() != 50 {
t.Errorf("expected 50 registered mutators, got %d", r.Len())
}
}
// envInspector is a tiny mutator that calls a callback on each
// invocation. Used by TestRegistry_LaterMutatorSeesEarlierMutations.
type envInspector struct {
onCall func(env map[string]string)
}
func (e *envInspector) Name() string { return "inspector" }
func (e *envInspector) MutateEnv(_ context.Context, _ string, env map[string]string) error {
e.onCall(env)
return nil
}
func contains(haystack, needle string) bool {
for i := 0; i+len(needle) <= len(haystack); i++ {
if haystack[i:i+len(needle)] == needle {
return true
}
}
return false
}
func equalSlices(a, b []string) bool {
if len(a) != len(b) {
return false
}
for i := range a {
if a[i] != b[i] {
return false
}
}
return true
}