fix(provisioner): Forensic #145 — preserve workspace-authored SCM token, strip only operator/global bleed #2320

Merged
claude-ceo-assistant merged 1 commits from fix/forensic145-preserve-workspace-scm-token into main 2026-06-05 21:57:59 +00:00
8 changed files with 286 additions and 37 deletions
@@ -508,6 +508,7 @@ func TestBuildProvisionerConfig_WorkspacePathFromPayload(t *testing.T) {
map[string][]byte{"config.yaml": []byte("name: test")},
models.CreateWorkspacePayload{Tier: 2, Runtime: "claude-code", WorkspaceDir: "/tmp/workspace", WorkspaceAccess: "read_write"},
map[string]string{"OPENAI_API_KEY": "sk-test"},
nil,
"/tmp/plugins",
)
@@ -243,6 +243,7 @@ func TestBuildProvisionerConfig_CopiesComputeSizingFromPayload(t *testing.T) {
},
},
nil,
nil,
t.TempDir(),
)
@@ -129,7 +129,7 @@ func (h *WorkspaceHandler) provisionWorkspaceOpts(workspaceID, templatePath stri
workspaceID, filepath.Base(runtimeTemplate))
templatePath = runtimeTemplate
// Rebuild cfg with the recovered template path so Start() sees it.
cfg = h.buildProvisionerConfig(ctx, workspaceID, templatePath, configFiles, payload, prepared.EnvVars, prepared.PluginsPath)
cfg = h.buildProvisionerConfig(ctx, workspaceID, templatePath, configFiles, payload, prepared.EnvVars, prepared.Config.WorkspaceSecretKeys, prepared.PluginsPath)
cfg.ResetClaudeSession = resetClaudeSession
recovered = true
break
@@ -281,6 +281,7 @@ func (h *WorkspaceHandler) buildProvisionerConfig(
configFiles map[string][]byte,
payload models.CreateWorkspacePayload,
envVars map[string]string,
workspaceSecretKeys map[string]struct{},
pluginsPath string,
) provisioner.WorkspaceConfig {
// Per-workspace workspace_dir takes priority over global WORKSPACE_DIR env var.
@@ -337,8 +338,13 @@ func (h *WorkspaceHandler) buildProvisionerConfig(
Height: payload.Compute.Display.Height,
Protocol: payload.Compute.Display.Protocol,
},
EnvVars: envVars,
PlatformURL: h.platformURL,
EnvVars: envVars,
// Forensic #145: positive provenance set so the SCM-write-token guard
// (cp_provisioner.Start) exempts a workspace-authored GITEA_TOKEN from
// the operator-bleed strip while still stripping global/persona-merged
// SCM tokens. Carried by both Docker- and CP-mode configs.
WorkspaceSecretKeys: workspaceSecretKeys,
PlatformURL: h.platformURL,
// Image left empty — molecule-core's runtime_image_pins table (mig
// 047, dead reader removed by RFC internal#617 / task #335) was an
// aspirational SSOT that never received a writer. CP's
@@ -1233,9 +1239,18 @@ func firstNonEmptyEnv(names ...string) string {
// stores — NOT the user's own scoped PAT they explicitly authorized via
// the per-workspace Secrets tab.
//
// The third return value (workspaceKeys) is the POSITIVE counterpart: the
// set of keys authored via the per-workspace `workspace_secrets` table
// (user / org-admin set, authenticated as the workspace owner). It is the
// provenance signal the forensic #145 SCM-write-token guard consults to
// EXEMPT a workspace-scoped GITEA_TOKEN (the intended, legitimate delivery
// channel for a reviewer agent) from the operator-bleed strip. A key set
// in BOTH stores lands here (workspace overrides global) and is removed
// from globalKeys, matching the precedence semantic below.
//
// The merged map preserves the existing precedence semantic (workspace
// rows overwrite global rows on key collision); only the provenance side-
// channel is new. Existing single-return callers can ignore globalKeys.
// channels are new. Existing callers can ignore globalKeys / workspaceKeys.
//
// F1086 / #1206: the returned error string is the SAFE-CANNED message that
// gets persisted to workspaces.last_sample_error AND broadcast as the
@@ -1243,9 +1258,10 @@ func firstNonEmptyEnv(names ...string) string {
// the encryption version, the decrypt-error text) is logged here, never
// returned to the caller, so it can't leak via the canvas event stream
// (cf. TestProvisionWorkspace_NoInternalErrorsInBroadcast).
func loadWorkspaceSecrets(ctx context.Context, workspaceID string) (map[string]string, map[string]struct{}, string) {
func loadWorkspaceSecrets(ctx context.Context, workspaceID string) (map[string]string, map[string]struct{}, map[string]struct{}, string) {
envVars := map[string]string{}
globalKeys := map[string]struct{}{}
workspaceKeys := map[string]struct{}{}
globalRows, globalErr := db.DB.QueryContext(ctx,
`SELECT key, encrypted_value, encryption_version FROM global_secrets`)
if globalErr == nil {
@@ -1266,7 +1282,7 @@ func loadWorkspaceSecrets(ctx context.Context, workspaceID string) (map[string]s
decrypted, decErr := crypto.DecryptVersioned(v, ver)
if decErr != nil {
log.Printf("Provisioner: FATAL — failed to decrypt global secret %s (version=%d): %v — aborting provision of workspace %s", k, ver, decErr, workspaceID)
return nil, nil, "failed to decrypt global secret"
return nil, nil, nil, "failed to decrypt global secret"
}
envVars[k] = string(decrypted)
globalKeys[k] = struct{}{}
@@ -1300,7 +1316,7 @@ func loadWorkspaceSecrets(ctx context.Context, workspaceID string) (map[string]s
decrypted, decErr := crypto.DecryptVersioned(v, ver)
if decErr != nil {
log.Printf("Provisioner: FATAL — failed to decrypt workspace secret %s (version=%d) for %s: %v — aborting provision", k, ver, workspaceID, decErr)
return nil, nil, "failed to decrypt workspace secret"
return nil, nil, nil, "failed to decrypt workspace secret"
}
envVars[k] = string(decrypted)
// User-authored workspace_secrets value supersedes any
@@ -1309,13 +1325,19 @@ func loadWorkspaceSecrets(ctx context.Context, workspaceID string) (map[string]s
// re-set the value via the canvas Secrets tab, so it is
// no longer "the operator-store version."
delete(globalKeys, k)
// Positive provenance: record that this key was authored
// via workspace_secrets. The forensic #145 SCM-write-token
// guard exempts only keys in this set — a workspace-scoped
// GITEA_TOKEN is the intended delivery channel for that
// workspace's agent.
workspaceKeys[k] = struct{}{}
}
}
if err := wsRows.Err(); err != nil {
log.Printf("Provisioner: workspace_secrets rows.Err workspace=%s: %v", workspaceID, err)
}
}
return envVars, globalKeys, ""
return envVars, globalKeys, workspaceKeys, ""
}
// provisionWorkspaceCP provisions a workspace via the control plane API.
@@ -122,7 +122,7 @@ func (h *WorkspaceHandler) prepareProvisionContext(
payload models.CreateWorkspacePayload,
resetClaudeSession bool,
) (*preparedProvisionContext, *provisionAbort) {
envVars, globalSecretKeys, decryptErr := loadWorkspaceSecrets(ctx, workspaceID)
envVars, globalSecretKeys, workspaceSecretKeys, decryptErr := loadWorkspaceSecrets(ctx, workspaceID)
if decryptErr != "" {
return nil, &provisionAbort{Msg: decryptErr}
}
@@ -294,7 +294,7 @@ func (h *WorkspaceHandler) prepareProvisionContext(
return nil, abort
}
cfg := h.buildProvisionerConfig(ctx, workspaceID, templatePath, configFiles, payload, envVars, pluginsPath)
cfg := h.buildProvisionerConfig(ctx, workspaceID, templatePath, configFiles, payload, envVars, workspaceSecretKeys, pluginsPath)
cfg.ResetClaudeSession = resetClaudeSession
return &preparedProvisionContext{
@@ -845,6 +845,7 @@ func TestBuildProvisionerConfig_BasicFields(t *testing.T) {
map[string][]byte{"config.yaml": []byte("name: test")},
models.CreateWorkspacePayload{Tier: 1, Runtime: "claude-code"},
map[string]string{"API_KEY": "secret"},
nil,
pluginsPath,
)
@@ -893,6 +894,7 @@ func TestBuildProvisionerConfig_WorkspacePathFromEnv(t *testing.T) {
nil,
models.CreateWorkspacePayload{Tier: 2, Runtime: "claude-code"},
nil,
nil,
pluginsPath,
)
@@ -901,6 +903,71 @@ func TestBuildProvisionerConfig_WorkspacePathFromEnv(t *testing.T) {
}
}
// ==================== loadWorkspaceSecrets provenance (forensic #145) ====================
// TestLoadWorkspaceSecrets_WorkspaceKeysProvenance pins the positive
// provenance side-channel added for forensic #145: a key sourced from
// workspace_secrets must land in the third return value (workspaceKeys),
// while a key sourced only from global_secrets must NOT. A key present in
// BOTH stores is treated as workspace-authored (workspace overrides global),
// so it lands in workspaceKeys AND is removed from globalKeys.
func TestLoadWorkspaceSecrets_WorkspaceKeysProvenance(t *testing.T) {
mock := setupTestDB(t)
// global_secrets: an operator-store GITEA_TOKEN (the bleed channel) and
// an OPERATOR_ONLY key that no workspace row re-sets.
globalRows := sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
AddRow("GITEA_TOKEN", []byte("operator-store-gitea"), 0).
AddRow("OPERATOR_ONLY", []byte("op-val"), 0)
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM global_secrets`).
WillReturnRows(globalRows)
// workspace_secrets: the user/org-admin re-authors GITEA_TOKEN (override)
// and adds a workspace-only WS_ONLY key. encryption_version 0 = plaintext
// passthrough (crypto.DecryptVersioned).
wsRows := sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
AddRow("GITEA_TOKEN", []byte("workspace-authored-gitea"), 0).
AddRow("WS_ONLY", []byte("ws-val"), 0)
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1`).
WithArgs("ws-prov").
WillReturnRows(wsRows)
envVars, globalKeys, workspaceKeys, errMsg := loadWorkspaceSecrets(context.Background(), "ws-prov")
if errMsg != "" {
t.Fatalf("loadWorkspaceSecrets returned error: %q", errMsg)
}
// Workspace override wins on value precedence.
if got := envVars["GITEA_TOKEN"]; got != "workspace-authored-gitea" {
t.Errorf("GITEA_TOKEN value = %q; want workspace-authored override", got)
}
// workspaceKeys: both workspace-sourced keys present.
if _, ok := workspaceKeys["GITEA_TOKEN"]; !ok {
t.Errorf("GITEA_TOKEN (re-authored via workspace_secrets) missing from workspaceKeys: %v", workspaceKeys)
}
if _, ok := workspaceKeys["WS_ONLY"]; !ok {
t.Errorf("WS_ONLY (workspace_secrets) missing from workspaceKeys: %v", workspaceKeys)
}
// OPERATOR_ONLY came only from global_secrets → NOT workspace-authored.
if _, ok := workspaceKeys["OPERATOR_ONLY"]; ok {
t.Errorf("OPERATOR_ONLY (global_secrets only) wrongly present in workspaceKeys: %v", workspaceKeys)
}
// globalKeys: GITEA_TOKEN's operator-bleed flag dropped by the override;
// OPERATOR_ONLY stays flagged.
if _, ok := globalKeys["GITEA_TOKEN"]; ok {
t.Errorf("GITEA_TOKEN should be removed from globalKeys after workspace override: %v", globalKeys)
}
if _, ok := globalKeys["OPERATOR_ONLY"]; !ok {
t.Errorf("OPERATOR_ONLY missing from globalKeys: %v", globalKeys)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations not met: %v", err)
}
}
// ==================== issueAndInjectToken (issue #418) ====================
// TestIssueAndInjectToken_HappyPath verifies that on a normal (re)provision the
@@ -182,6 +182,46 @@ type cpProvisionResponse struct {
Error string `json:"error"`
}
// buildCPTenantEnv assembles the env map the control plane forwards to a
// tenant EC2 workspace container, applying the forensic #145 SCM-write-token
// guard.
//
// The guard strips every key classified by isSCMWriteTokenKey (GITEA_TOKEN,
// GITHUB_TOKEN, …) UNLESS that key is positively workspace-authored —
// i.e. present in cfg.WorkspaceSecretKeys, the provenance set populated from
// the workspace_secrets table. Rationale:
//
// - Operator / persona-merged (global-scoped) SCM-write tokens are an
// upstream bleed and MUST NOT reach an agent-controlled container — that
// keeps the two-eyes review gate structurally self-bypass-proof.
// - A workspace-scoped GITEA_TOKEN that an org admin deliberately set via
// the canvas Secrets tab is the INTENDED delivery channel for that
// workspace's reviewer agent. Stripping it broke codex reviewers
// (whoami 401/404). It is exempt.
//
// Fail-safe: a nil cfg.WorkspaceSecretKeys yields wsAuthored=false for every
// key, so a missing provenance map strips ALL SCM-write tokens rather than
// leaking them. adminToken, when non-empty, is injected as ADMIN_TOKEN (it is
// never an SCM-write key, so the guard never touches it).
func buildCPTenantEnv(cfg WorkspaceConfig, adminToken string) map[string]string {
env := make(map[string]string, len(cfg.EnvVars)+1)
for k, v := range cfg.EnvVars {
if isSCMWriteTokenKey(k) {
_, wsAuthored := cfg.WorkspaceSecretKeys[k] // nil map → false (fail-safe)
if !wsAuthored {
log.Printf("CPProvisioner.Start: dropped SCM-write credential %q from tenant workspace env (forensic #145 guard; provenance=operator/global)", k)
continue
}
log.Printf("CPProvisioner.Start: preserved workspace-authored SCM credential %q for tenant workspace (forensic #145: workspace_secrets provenance, intended delivery)", k)
}
env[k] = v
}
if adminToken != "" {
env["ADMIN_TOKEN"] = adminToken
}
return env
}
// Start provisions a workspace by calling the control plane → EC2.
func (p *CPProvisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string, error) {
// Inject ADMIN_TOKEN into the workspace container env so the agent can call
@@ -193,18 +233,10 @@ func (p *CPProvisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string,
// the SCM-write-token denylist (see buildContainerEnv) is enforced here
// too. Always build a filtered copy — never pass cfg.EnvVars through
// verbatim — so a latent persona-merged GITEA_TOKEN can't reach the
// tenant container regardless of whether ADMIN_TOKEN is set.
env := make(map[string]string, len(cfg.EnvVars)+1)
for k, v := range cfg.EnvVars {
if isSCMWriteTokenKey(k) {
log.Printf("CPProvisioner.Start: dropped SCM-write credential %q from tenant workspace env (forensic #145 guard)", k)
continue
}
env[k] = v
}
if p.adminToken != "" {
env["ADMIN_TOKEN"] = p.adminToken
}
// tenant container regardless of whether ADMIN_TOKEN is set. Extracted to
// buildCPTenantEnv so the strip/exempt logic is unit-testable without
// standing up the CP HTTP round-trip.
env := buildCPTenantEnv(cfg, p.adminToken)
// Collect template files and generated configs, with OFFSEC-010 guards:
// - Rejects symlinks at the template root (prevents bypass via symlink traversal)
// - Skips symlinks during WalkDir (prevents /etc/passwd etc. inclusion)
@@ -90,21 +90,28 @@ const (
// WorkspaceConfig holds the parameters needed to provision a workspace container.
type WorkspaceConfig struct {
WorkspaceID string
TemplatePath string // Host path to template dir to copy from (e.g. claude-code-default/)
ConfigFiles map[string][]byte // Generated config files to write into /configs volume
PluginsPath string // Host path to plugins directory (mounted at /plugins)
WorkspacePath string // Host path to bind-mount as /workspace (if empty, uses Docker named volume)
Tier int
Runtime string // "claude-code" (default), "codex", "hermes", "openclaw", etc.
InstanceType string // Optional CP EC2 instance type override (SaaS only)
DiskGB int32 // Optional CP root volume size override in GiB (SaaS only)
DataPersistence string // internal#734: "persist"|"ephemeral"|"" — durable-data choice forwarded to CP (SaaS only)
Display WorkspaceDisplayConfig
EnvVars map[string]string // Additional env vars (API keys, etc.)
PlatformURL string
WorkspaceAccess string // #65: "none" (default), "read_only", or "read_write"
ResetClaudeSession bool // #12: if true, discard the claude-sessions volume before start (fresh session dir)
WorkspaceID string
TemplatePath string // Host path to template dir to copy from (e.g. claude-code-default/)
ConfigFiles map[string][]byte // Generated config files to write into /configs volume
PluginsPath string // Host path to plugins directory (mounted at /plugins)
WorkspacePath string // Host path to bind-mount as /workspace (if empty, uses Docker named volume)
Tier int
Runtime string // "claude-code" (default), "codex", "hermes", "openclaw", etc.
InstanceType string // Optional CP EC2 instance type override (SaaS only)
DiskGB int32 // Optional CP root volume size override in GiB (SaaS only)
DataPersistence string // internal#734: "persist"|"ephemeral"|"" — durable-data choice forwarded to CP (SaaS only)
Display WorkspaceDisplayConfig
EnvVars map[string]string // Additional env vars (API keys, etc.)
PlatformURL string
// WorkspaceSecretKeys are env keys authored via the workspace_secrets table
// (user/org-admin set, per-workspace). The Forensic #145 SCM-write-token
// guard EXEMPTS these from stripping: a workspace-scoped GITEA_TOKEN is the
// intended, legitimate delivery channel for that workspace's agent. Operator/
// persona-merged (global) SCM tokens are NOT in this set and stay stripped.
WorkspaceSecretKeys map[string]struct{}
WorkspaceAccess string // #65: "none" (default), "read_only", or "read_write"
ResetClaudeSession bool // #12: if true, discard the claude-sessions volume before start (fresh session dir)
// Image, when non-empty, overrides the runtime→image lookup. CP
// (molecule-controlplane) is the single SSOT for runtime image digest
@@ -835,6 +835,125 @@ func TestCPProvisionerEnv_StripsSCMWriteTokens(t *testing.T) {
}
}
// TestBuildCPTenantEnv_ForensicGuardProvenance pins the forensic #145
// provenance-aware guard on the tenant-EC2 path (CPProvisioner.Start →
// buildCPTenantEnv). The guard strips SCM-write tokens UNLESS they are
// positively workspace-authored (present in cfg.WorkspaceSecretKeys). Each
// security invariant from the fix spec gets a row:
//
// 1. SCM token ONLY in global_secrets (in EnvVars, NOT WorkspaceSecretKeys) → STRIPPED.
// 2. SCM token persona/mutator-injected (in EnvVars, NOT WorkspaceSecretKeys) → STRIPPED.
// 3. SCM token authored via workspace_secrets (in EnvVars AND WorkspaceSecretKeys) → PRESERVED.
// 4. WorkspaceSecretKeys == nil → ALL SCM-write tokens STRIPPED (fail-safe).
// 5. Non-SCM keys pass through unchanged regardless of the set.
func TestBuildCPTenantEnv_ForensicGuardProvenance(t *testing.T) {
const tok = "gitea-write-pat-value"
tests := []struct {
name string
envVars map[string]string
workspaceKeys map[string]struct{}
wantPreserved map[string]string // key→expected value that MUST survive
wantStrippedKeys []string // keys that MUST be absent from the result
}{
{
name: "invariant 1 — global_secrets-only SCM token is stripped",
envVars: map[string]string{"GITEA_TOKEN": tok},
workspaceKeys: map[string]struct{}{}, // not workspace-authored
wantStrippedKeys: []string{"GITEA_TOKEN"},
},
{
name: "invariant 2 — persona/mutator-injected SCM token is stripped",
envVars: map[string]string{"GITEA_TOKEN": "persona-merged-write-pat"},
// Persona/mutator merges into EnvVars but NEVER into the
// workspace_secrets provenance set — this is the exact bleed the
// guard exists for and MUST stay stripped.
workspaceKeys: map[string]struct{}{"ANTHROPIC_API_KEY": {}},
wantStrippedKeys: []string{"GITEA_TOKEN"},
},
{
name: "invariant 3 — workspace_secrets-authored SCM token is preserved",
envVars: map[string]string{"GITEA_TOKEN": tok},
workspaceKeys: map[string]struct{}{"GITEA_TOKEN": {}},
wantPreserved: map[string]string{"GITEA_TOKEN": tok},
},
{
name: "invariant 4 — nil provenance map strips ALL SCM-write tokens (fail-safe)",
envVars: map[string]string{
"GITEA_TOKEN": tok,
"GITHUB_TOKEN": "gh",
"GH_TOKEN": "gh2",
"GITLAB_TOKEN": "gl",
"GL_TOKEN": "gl2",
"BITBUCKET_TOKEN": "bb",
},
workspaceKeys: nil, // missing provenance map must never leak
wantStrippedKeys: []string{
"GITEA_TOKEN", "GITHUB_TOKEN", "GH_TOKEN",
"GITLAB_TOKEN", "GL_TOKEN", "BITBUCKET_TOKEN",
},
},
{
name: "invariant 5 — non-SCM keys pass through regardless of the set",
envVars: map[string]string{
"ANTHROPIC_API_KEY": "sk-keep",
"CUSTOM": "ok",
"GITEA_USER": "reviewer-agent", // read-only identity, not a write token
"GITEA_TOKEN": tok, // SCM-write, NOT workspace-authored → stripped
},
workspaceKeys: map[string]struct{}{}, // empty → GITEA_TOKEN not exempt
wantPreserved: map[string]string{
"ANTHROPIC_API_KEY": "sk-keep",
"CUSTOM": "ok",
"GITEA_USER": "reviewer-agent",
},
wantStrippedKeys: []string{"GITEA_TOKEN"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cfg := WorkspaceConfig{
WorkspaceID: "ws-tenant",
PlatformURL: "http://localhost:8080",
Tier: 2,
EnvVars: tt.envVars,
WorkspaceSecretKeys: tt.workspaceKeys,
}
// adminToken empty so the guard's behaviour is isolated; ADMIN_TOKEN
// injection is covered separately below.
got := buildCPTenantEnv(cfg, "")
for _, k := range tt.wantStrippedKeys {
if v, ok := got[k]; ok {
t.Errorf("SCM-write credential %q leaked into tenant env (forensic #145 invariant violated): value=%q", k, v)
}
}
for k, want := range tt.wantPreserved {
if got[k] != want {
t.Errorf("key %q = %q; want preserved value %q", k, got[k], want)
}
}
})
}
}
// TestBuildCPTenantEnv_AdminTokenInjected asserts ADMIN_TOKEN is injected when
// the provisioner carries one, and is never subject to the SCM-write strip.
func TestBuildCPTenantEnv_AdminTokenInjected(t *testing.T) {
cfg := WorkspaceConfig{
WorkspaceID: "ws-tenant",
EnvVars: map[string]string{"GITEA_TOKEN": "stripme"},
}
got := buildCPTenantEnv(cfg, "admin-secret")
if got["ADMIN_TOKEN"] != "admin-secret" {
t.Errorf("ADMIN_TOKEN = %q; want admin-secret", got["ADMIN_TOKEN"])
}
if _, ok := got["GITEA_TOKEN"]; ok {
t.Errorf("GITEA_TOKEN must still be stripped alongside ADMIN_TOKEN injection")
}
}
// TestBuildContainerEnv_GHPATAliasPrecedence asserts that explicit GH_TOKEN /
// GITHUB_TOKEN in workspace secrets win over the GH_PAT alias (#1687 CR2
// review_id=5646). The alias must only inject a key when it was NOT explicitly