diff --git a/workspace-server/internal/provisioner/cp_provisioner.go b/workspace-server/internal/provisioner/cp_provisioner.go index dfd1afe5c..8f6f0c557 100644 --- a/workspace-server/internal/provisioner/cp_provisioner.go +++ b/workspace-server/internal/provisioner/cp_provisioner.go @@ -178,12 +178,21 @@ func (p *CPProvisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string, // /admin/liveness and other admin-gated platform endpoints (core#831). // p.adminToken is read from os.Getenv("ADMIN_TOKEN") at provisioner creation; // it is also used for CP→platform HTTP auth but those are separate concerns. - env := cfg.EnvVars - if p.adminToken != "" { - env = make(map[string]string, len(cfg.EnvVars)+1) - for k, v := range cfg.EnvVars { - env[k] = v + // + // Forensic #145 hardening: tenant workspaces run on EC2 via this path, so + // the SCM-write-token denylist (see buildContainerEnv) is enforced here + // too. Always build a filtered copy — never pass cfg.EnvVars through + // verbatim — so a latent persona-merged GITEA_TOKEN can't reach the + // tenant container regardless of whether ADMIN_TOKEN is set. + env := make(map[string]string, len(cfg.EnvVars)+1) + for k, v := range cfg.EnvVars { + if isSCMWriteTokenKey(k) { + log.Printf("CPProvisioner.Start: dropped SCM-write credential %q from tenant workspace env (forensic #145 guard)", k) + continue } + env[k] = v + } + if p.adminToken != "" { env["ADMIN_TOKEN"] = p.adminToken } // Collect template files and generated configs, with OFFSEC-010 guards: @@ -343,6 +352,7 @@ func collectCPConfigFiles(cfg WorkspaceConfig) (map[string]string, error) { } return files, nil } + // Stop terminates the workspace's EC2 instance via the control plane. // // Looks up the actual EC2 instance_id from the workspaces table before @@ -497,7 +507,9 @@ func (p *CPProvisioner) IsRunning(ctx context.Context, workspaceID string) (bool // Don't leak the body — upstream errors may echo headers. return true, fmt.Errorf("cp provisioner: status: unexpected %d", resp.StatusCode) } - var result struct{ State string `json:"state"` } + var result struct { + State string `json:"state"` + } // Cap body read at 64 KiB for parity with Start — a misconfigured // or compromised CP streaming a huge body could otherwise exhaust // memory in this hot path (called reactively per-request from diff --git a/workspace-server/internal/provisioner/provisioner.go b/workspace-server/internal/provisioner/provisioner.go index 4c19c2046..a527b10a6 100644 --- a/workspace-server/internal/provisioner/provisioner.go +++ b/workspace-server/internal/provisioner/provisioner.go @@ -591,6 +591,28 @@ func ValidateWorkspaceAccess(access, workspacePath string) error { } } +// scmWriteTokenKeys is the explicit denylist of environment variable names +// that carry a Git SCM *write* credential (push / merge / approve). These +// must never reach a tenant workspace container — see the forensic #145 +// rationale in buildContainerEnv. Kept as an exact-match set rather than a +// substring/prefix heuristic so the guard is auditable and can't silently +// over-strip a legitimately-named var. +var scmWriteTokenKeys = map[string]struct{}{ + "GITEA_TOKEN": {}, + "GITHUB_TOKEN": {}, + "GH_TOKEN": {}, // gh CLI honours GH_TOKEN as a GITHUB_TOKEN alias + "GITLAB_TOKEN": {}, + "GL_TOKEN": {}, // glab CLI alias + "BITBUCKET_TOKEN": {}, +} + +// isSCMWriteTokenKey reports whether an env var name is a known Git SCM +// write credential that must be stripped from tenant workspace env. +func isSCMWriteTokenKey(key string) bool { + _, ok := scmWriteTokenKeys[key] + return ok +} + // buildContainerEnv assembles the initial environment variables injected // into every workspace container. // @@ -627,6 +649,21 @@ func buildContainerEnv(cfg WorkspaceConfig) []string { env = append(env, fmt.Sprintf("AWARENESS_URL=%s", cfg.AwarenessURL)) } for k, v := range cfg.EnvVars { + // Forensic #145 hardening: tenant workspace containers run + // agent-controlled code and must NEVER receive a Git SCM *write* + // credential. Without merge/approve creds in-container the + // two-eyes review gate is structurally self-bypass-proof — an + // agent that forges an approval has no token to act on it. A + // latent path exists (loadPersonaEnvFile merges a per-role + // persona `GITEA_TOKEN` into cfg.EnvVars when MOLECULE_PERSONA_ROOT + // is set on a tenant host); it is inert today (persona dirs are + // operator-host-only) but unguarded. Strip SCM-write tokens here + // by construction so the invariant holds regardless of whether + // that path ever becomes reachable. + if isSCMWriteTokenKey(k) { + log.Printf("buildContainerEnv: dropped SCM-write credential %q from workspace env (forensic #145 guard)", k) + continue + } env = append(env, fmt.Sprintf("%s=%s", k, v)) } // Inject ADMIN_TOKEN from the platform server's environment so workspace diff --git a/workspace-server/internal/provisioner/provisioner_test.go b/workspace-server/internal/provisioner/provisioner_test.go index 8d4a20f05..9b6049765 100644 --- a/workspace-server/internal/provisioner/provisioner_test.go +++ b/workspace-server/internal/provisioner/provisioner_test.go @@ -636,10 +636,15 @@ func TestBuildContainerEnv_AwarenessOnlyWhenBothSet(t *testing.T) { } func TestBuildContainerEnv_CustomEnvVarsAppended(t *testing.T) { + // NOTE: this test previously asserted GITHUB_TOKEN passed through + // verbatim. That assertion encoded the forensic #145 latent leak as + // expected behavior. Post-guard, ordinary custom env still flows but + // SCM-write credentials are stripped — see + // TestBuildContainerEnv_StripsSCMWriteTokens for the negative assertion. cfg := WorkspaceConfig{ WorkspaceID: "ws-x", PlatformURL: "http://localhost:8080", - EnvVars: map[string]string{"CUSTOM": "value", "GITHUB_TOKEN": "fake-token-for-test"}, + EnvVars: map[string]string{"CUSTOM": "value", "ANTHROPIC_API_KEY": "sk-not-an-scm-token"}, } env := buildContainerEnv(cfg) seen := map[string]string{} @@ -652,8 +657,8 @@ func TestBuildContainerEnv_CustomEnvVarsAppended(t *testing.T) { if seen["CUSTOM"] != "value" { t.Errorf("CUSTOM env missing, got env=%v", env) } - if seen["GITHUB_TOKEN"] != "fake-token-for-test" { - t.Errorf("GITHUB_TOKEN env missing, got env=%v", env) + if seen["ANTHROPIC_API_KEY"] != "sk-not-an-scm-token" { + t.Errorf("non-SCM custom env must still pass through, got env=%v", env) } // Built-in defaults still present if seen["MOLECULE_URL"] == "" { @@ -661,6 +666,129 @@ func TestBuildContainerEnv_CustomEnvVarsAppended(t *testing.T) { } } +// ---------- forensic #145: SCM-write-token denylist guard ---------- + +// TestBuildContainerEnv_StripsSCMWriteTokens is the core negative +// assertion: a tenant workspace env constructed via buildContainerEnv MUST +// NOT contain any Git SCM *write* credential, regardless of how it got into +// cfg.EnvVars. This proves the two-eyes review gate stays structurally +// self-bypass-proof — an agent in-container has no merge/approve token to +// act on a forged approval. See forensic #145. +// +// This test FAILS on the pre-guard code (where buildContainerEnv passed +// cfg.EnvVars through verbatim) and PASSES once the denylist filter is in +// place — i.e. the guard is proven by construction, not by environment +// accident. +func TestBuildContainerEnv_StripsSCMWriteTokens(t *testing.T) { + scmTokens := []string{ + "GITEA_TOKEN", "GITHUB_TOKEN", "GH_TOKEN", + "GITLAB_TOKEN", "GL_TOKEN", "BITBUCKET_TOKEN", + } + + t.Run("normal path — SCM tokens explicitly set in EnvVars", func(t *testing.T) { + envVars := map[string]string{"CUSTOM": "ok", "ANTHROPIC_API_KEY": "sk-keep"} + for _, k := range scmTokens { + envVars[k] = "leaked-write-credential-" + k + } + cfg := WorkspaceConfig{ + WorkspaceID: "ws-tenant", + PlatformURL: "http://localhost:8080", + Tier: 2, + EnvVars: envVars, + } + assertNoSCMWriteToken(t, buildContainerEnv(cfg), scmTokens) + + // Sanity: non-SCM custom env is NOT collateral-damaged by the filter. + if !envContains(buildContainerEnv(cfg), "CUSTOM=ok") { + t.Errorf("filter must not strip non-SCM custom env") + } + if !envContains(buildContainerEnv(cfg), "ANTHROPIC_API_KEY=sk-keep") { + t.Errorf("filter must not strip non-SCM API keys") + } + }) + + t.Run("persona-file path — simulates loadPersonaEnvFile merge", func(t *testing.T) { + // The latent path: handlers.loadPersonaEnvFile() merges a per-role + // persona env file (carrying GITEA_USER, GITEA_TOKEN, …) into the + // workspace env map when MOLECULE_PERSONA_ROOT is set on a tenant + // host. We can't invoke that cross-package helper here, but its + // observable effect is exactly "a GITEA_TOKEN appears in + // cfg.EnvVars". Constructing that condition directly proves the + // guard holds even if the latent path becomes reachable. + cfg := WorkspaceConfig{ + WorkspaceID: "ws-tenant", + PlatformURL: "http://localhost:8080", + Tier: 2, + EnvVars: map[string]string{ + // Persona identity fields that are SAFE to keep (read-only + // identity, not a write credential): + "GITEA_USER": "backend-engineer", + "GITEA_USER_EMAIL": "backend-engineer@agents.moleculesai.app", + // The credential that must be stripped: + "GITEA_TOKEN": "persona-merged-write-pat", + "GITEA_TOKEN_SCOPES": "write:repository", + }, + } + got := buildContainerEnv(cfg) + assertNoSCMWriteToken(t, got, scmTokens) + // Non-credential persona identity may still flow through — only the + // write token is the denied surface. + if !envContains(got, "GITEA_USER=backend-engineer") { + t.Errorf("non-credential persona identity (GITEA_USER) should not be stripped") + } + }) +} + +// TestCPProvisionerEnv_StripsSCMWriteTokens covers the tenant-EC2 path: +// CPProvisioner.Start builds the env map the control plane forwards to the +// EC2 workspace container. The same forensic #145 denylist must hold there. +func TestCPProvisionerEnv_StripsSCMWriteTokens(t *testing.T) { + // isSCMWriteTokenKey is the single source of truth shared by both + // buildContainerEnv (local Docker) and CPProvisioner.Start (tenant EC2). + // Assert it classifies every known SCM-write var as denied and leaves + // ordinary / read-only-identity vars alone. + for _, k := range []string{ + "GITEA_TOKEN", "GITHUB_TOKEN", "GH_TOKEN", + "GITLAB_TOKEN", "GL_TOKEN", "BITBUCKET_TOKEN", + } { + if !isSCMWriteTokenKey(k) { + t.Errorf("isSCMWriteTokenKey(%q) = false, want true (SCM-write credential must be denied)", k) + } + } + for _, k := range []string{ + "GITEA_USER", "GITEA_USER_EMAIL", "ANTHROPIC_API_KEY", + "CUSTOM", "PLATFORM_URL", "ADMIN_TOKEN", "", + } { + if isSCMWriteTokenKey(k) { + t.Errorf("isSCMWriteTokenKey(%q) = true, want false (must not over-strip non-SCM env)", k) + } + } +} + +func assertNoSCMWriteToken(t *testing.T, env []string, scmTokens []string) { + t.Helper() + for _, e := range env { + key := e + if i := strings.IndexByte(e, '='); i >= 0 { + key = e[:i] + } + for _, banned := range scmTokens { + if key == banned { + t.Errorf("SCM-write credential %q leaked into workspace env (forensic #145 invariant violated): %q", banned, e) + } + } + } +} + +func envContains(env []string, want string) bool { + for _, e := range env { + if e == want { + return true + } + } + return false +} + // ---------- buildWorkspaceMount — #65 workspace_access ---------- func TestBuildWorkspaceMount_SelectionMatrix(t *testing.T) {