From 496363bdec3318eebad01bb38f1e3d6fa799d9a9 Mon Sep 17 00:00:00 2001 From: rabbitblood Date: Thu, 16 Apr 2026 00:45:26 -0700 Subject: [PATCH] feat(provisioner): per-agent git identity via GIT_AUTHOR_* env vars MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every workspace now commits under its own name. Step 3 of the three- step agent-separation plan (platform-level git identity today; GitHub App migration follows as Option 1). ## Problem All 20+ agents in the molecule-dev template (PM, Dev Lead, Research Lead, FE, BE, DevOps, Security, QA, UIUX, Marketing roles, etc.) share a single GITHUB_TOKEN — specifically the CEO's personal PAT. So every commit, PR, and issue across the live repos ends up attributed to HongmingWang-Rabbit. `git log` can't distinguish "which agent wrote this code" from "did the CEO write it"; neither can the authority- verification rule in triage-operator/philosophy.md (rule #3). ## Fix When the provisioner starts a workspace container, it now sets: GIT_AUTHOR_NAME = "Molecule AI " GIT_AUTHOR_EMAIL = @agents.moleculesai.app GIT_COMMITTER_NAME = (same) GIT_COMMITTER_EMAIL = (same) Git prefers these env vars over `git config user.name` / `user.email`, so no per-container git-config step is needed; every commit automatically carries the right authorship. Examples (20 agents, 20 distinct identities): Frontend Engineer → frontend-engineer@agents.moleculesai.app Backend Engineer → backend-engineer@agents.moleculesai.app Product Marketing Manager → product-marketing-manager@agents.moleculesai.app UIUX Designer → uiux-designer@agents.moleculesai.app Domain `agents.moleculesai.app` is deliberate: marks the email as a bot address without resembling a real inbox. ## Operator override preserved `applyAgentGitIdentity` runs AFTER the secret-load loops in `provisionWorkspaceOpts`, but uses `setIfEmpty` so any workspace_secret with the same key wins. Teams that want custom authorship (shared org signing identity, a person-on-the-loop owner) can still set `GIT_AUTHOR_NAME` via /workspaces/:id/secrets and get their value through to git. ## What this does NOT solve (yet) - PR / issue authorship is still whoever owns GITHUB_TOKEN (the shared PAT). That needs the GitHub App migration (Option 1, next PR). The commit-level split shipped here is the prerequisite: the App path will keep these env vars and just swap the PAT for a short-lived installation token. - Existing containers continue with their pre-fix env (git env vars are baked in at container-create time). Applying is one plain `POST /workspaces/:id/restart` per agent after this merges + deploys — the restart goes through provisionWorkspace which picks up the new injection. ## Tests `agent_git_identity_test.go` — 4 behavior tests + a 10-row slug test: - fills all 4 env vars from a workspace name - operator override via pre-set env is preserved (setIfEmpty semantics) - empty / whitespace workspace name is a no-op (no `unknown@...` emails) - nil map doesn't panic (defensive) - slugify handles spaces / punctuation / edge hyphens / em-dashes All 15 cases pass; platform build clean. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../internal/handlers/agent_git_identity.go | 71 ++++++++++++ .../handlers/agent_git_identity_test.go | 101 ++++++++++++++++++ .../internal/handlers/workspace_provision.go | 11 ++ 3 files changed, 183 insertions(+) create mode 100644 platform/internal/handlers/agent_git_identity.go create mode 100644 platform/internal/handlers/agent_git_identity_test.go diff --git a/platform/internal/handlers/agent_git_identity.go b/platform/internal/handlers/agent_git_identity.go new file mode 100644 index 00000000..929160df --- /dev/null +++ b/platform/internal/handlers/agent_git_identity.go @@ -0,0 +1,71 @@ +package handlers + +import ( + "regexp" + "strings" +) + +// gitIdentitySlugPattern collapses any run of non-alphanumeric characters +// into a single hyphen when deriving an email localpart from a workspace +// name. Dots, parentheses, unicode dashes, whitespace — all get squashed. +var gitIdentitySlugPattern = regexp.MustCompile(`[^a-z0-9]+`) + +// gitIdentityEmailDomain is the @-part of generated agent emails. These +// addresses are not deliverable — they're identity markers only. Using +// the project's canonical domain keeps them attributable without looking +// like they belong to a real human inbox. If this changes, also update +// docs/authorship.md (when it exists). +const gitIdentityEmailDomain = "agents.moleculesai.app" + +// applyAgentGitIdentity sets GIT_AUTHOR_* / GIT_COMMITTER_* env vars so +// every commit from this workspace container carries a distinct author +// in `git log` and `git blame`. Git reads these env vars before falling +// back to `git config user.name` / `user.email`, so this works even if +// the container's git config is untouched. +// +// Idempotent + respectful: if any of the four variables is already set +// (e.g. by an operator-supplied workspace_secret), the existing value +// wins — this function only fills in the defaults. +// +// The workspace name is the display name from org.yaml ("Frontend +// Engineer", "Product Marketing Manager", "Research Lead"). The email +// localpart is the slugified form of that name. Empty workspace names +// leave the env untouched — we don't want to emit +// `unknown@agents.moleculesai.app` for a provisioning glitch that +// dropped the name. +func applyAgentGitIdentity(envVars map[string]string, workspaceName string) { + if envVars == nil { + return + } + workspaceName = strings.TrimSpace(workspaceName) + if workspaceName == "" { + return + } + + authorName := "Molecule AI " + workspaceName + slug := slugifyForEmail(workspaceName) + authorEmail := slug + "@" + gitIdentityEmailDomain + + setIfEmpty(envVars, "GIT_AUTHOR_NAME", authorName) + setIfEmpty(envVars, "GIT_AUTHOR_EMAIL", authorEmail) + setIfEmpty(envVars, "GIT_COMMITTER_NAME", authorName) + setIfEmpty(envVars, "GIT_COMMITTER_EMAIL", authorEmail) +} + +// slugifyForEmail collapses a workspace name to a safe email localpart: +// lowercase, non-alphanumeric runs → single hyphen, stripped at edges. +// "Frontend Engineer" → "frontend-engineer". +// "Product Marketing Manager" → "product-marketing-manager". +// "UIUX Designer" → "uiux-designer". +func slugifyForEmail(name string) string { + lowered := strings.ToLower(name) + slug := gitIdentitySlugPattern.ReplaceAllString(lowered, "-") + return strings.Trim(slug, "-") +} + +func setIfEmpty(m map[string]string, key, val string) { + if _, ok := m[key]; ok { + return + } + m[key] = val +} diff --git a/platform/internal/handlers/agent_git_identity_test.go b/platform/internal/handlers/agent_git_identity_test.go new file mode 100644 index 00000000..1d7b7dc0 --- /dev/null +++ b/platform/internal/handlers/agent_git_identity_test.go @@ -0,0 +1,101 @@ +package handlers + +import ( + "testing" +) + +// applyAgentGitIdentity is the platform-level chokepoint for per-agent +// commit authorship. These tests pin the generated name/email format +// and the operator-override semantics (workspace_secrets wins). + +func TestApplyAgentGitIdentity_FillsFourVars(t *testing.T) { + env := map[string]string{} + applyAgentGitIdentity(env, "Frontend Engineer") + + cases := map[string]string{ + "GIT_AUTHOR_NAME": "Molecule AI Frontend Engineer", + "GIT_AUTHOR_EMAIL": "frontend-engineer@agents.moleculesai.app", + "GIT_COMMITTER_NAME": "Molecule AI Frontend Engineer", + "GIT_COMMITTER_EMAIL": "frontend-engineer@agents.moleculesai.app", + } + for k, want := range cases { + if got := env[k]; got != want { + t.Errorf("%s: got %q, want %q", k, got, want) + } + } +} + +func TestApplyAgentGitIdentity_RespectsOperatorOverride(t *testing.T) { + // If a workspace_secret already provides GIT_AUTHOR_NAME (the secret + // loader runs before us), that operator intent wins. We only fill in + // what isn't already set. + env := map[string]string{ + "GIT_AUTHOR_NAME": "Custom Name", + "GIT_AUTHOR_EMAIL": "custom@example.com", + } + applyAgentGitIdentity(env, "Backend Engineer") + + if env["GIT_AUTHOR_NAME"] != "Custom Name" { + t.Errorf("GIT_AUTHOR_NAME should not be overwritten, got %q", env["GIT_AUTHOR_NAME"]) + } + if env["GIT_AUTHOR_EMAIL"] != "custom@example.com" { + t.Errorf("GIT_AUTHOR_EMAIL should not be overwritten, got %q", env["GIT_AUTHOR_EMAIL"]) + } + // The COMMITTER pair wasn't pre-set, so defaults fill it in. + if env["GIT_COMMITTER_NAME"] != "Molecule AI Backend Engineer" { + t.Errorf("GIT_COMMITTER_NAME should be filled, got %q", env["GIT_COMMITTER_NAME"]) + } +} + +func TestApplyAgentGitIdentity_EmptyNameIsNoop(t *testing.T) { + // A provisioning glitch where the workspace name arrived empty + // shouldn't emit `unknown@agents.moleculesai.app` — those commits + // are worse than no identity at all (they look like a real misconfig + // rather than a recoverable state). + env := map[string]string{} + applyAgentGitIdentity(env, "") + if len(env) != 0 { + t.Errorf("empty name should leave env untouched, got %v", env) + } + // Whitespace-only name also counts as empty. + applyAgentGitIdentity(env, " ") + if len(env) != 0 { + t.Errorf("whitespace name should leave env untouched, got %v", env) + } +} + +func TestApplyAgentGitIdentity_NilMapIsSafe(t *testing.T) { + // Defensive: never panic on a nil map (buildProvisionerConfig signature + // doesn't guarantee non-nil). Tests the explicit nil-check. + defer func() { + if r := recover(); r != nil { + t.Errorf("applyAgentGitIdentity panicked on nil map: %v", r) + } + }() + applyAgentGitIdentity(nil, "PM") +} + +func TestSlugifyForEmail(t *testing.T) { + cases := []struct { + in, want string + }{ + {"Frontend Engineer", "frontend-engineer"}, + {"Product Marketing Manager", "product-marketing-manager"}, + {"UIUX Designer", "uiux-designer"}, + {"PM", "pm"}, + {"SEO Growth Analyst", "seo-growth-analyst"}, + {"Social Media Brand", "social-media-brand"}, + // Odd cases: multiple spaces, punctuation, edge hyphens. + {" Extra Spaces ", "extra-spaces"}, + {"Role (with parens)", "role-with-parens"}, + {"em—dash", "em-dash"}, + {"---weird---", "weird"}, + } + for _, tc := range cases { + t.Run(tc.in, func(t *testing.T) { + if got := slugifyForEmail(tc.in); got != tc.want { + t.Errorf("slugifyForEmail(%q) = %q, want %q", tc.in, got, tc.want) + } + }) + } +} diff --git a/platform/internal/handlers/workspace_provision.go b/platform/internal/handlers/workspace_provision.go index bdfe6af5..e6ae3f7d 100644 --- a/platform/internal/handlers/workspace_provision.go +++ b/platform/internal/handlers/workspace_provision.go @@ -83,6 +83,17 @@ func (h *WorkspaceHandler) provisionWorkspaceOpts(workspaceID, templatePath stri pluginsPath, _ := filepath.Abs(filepath.Join(h.configsDir, "..", "plugins")) awarenessNamespace := h.loadAwarenessNamespace(ctx, workspaceID) + + // Per-agent git identity (Option 3 of agent-separation rollout). + // Sets GIT_AUTHOR_* / GIT_COMMITTER_* so commits from each workspace + // carry a distinct author in `git log` / `git blame` — instead of + // every agent appearing as whoever the shared PAT belongs to. PR + + // issue authorship is still tied to GITHUB_TOKEN (shared PAT); that + // gets solved by the GitHub App migration (Option 1, follow-up PR). + // Runs after secret loads so an operator can still override via a + // workspace_secret named GIT_AUTHOR_NAME if they want custom identity. + applyAgentGitIdentity(envVars, payload.Name) + cfg := h.buildProvisionerConfig(workspaceID, templatePath, configFiles, payload, envVars, pluginsPath, awarenessNamespace) cfg.ResetClaudeSession = resetClaudeSession // #12