From e373a9ebfa1344aa6df8c37a7137625100c25609 Mon Sep 17 00:00:00 2001
From: "Molecule AI Dev Engineer B (MiniMax)"
 <dev-engineer-b-minimax@agents.moleculesai.app>
Date: Mon, 15 Jun 2026 05:42:44 +0000
Subject: [PATCH] fix(staging-e2e): TestConciergeApprovalGate_Staging subtest
 isolation (no shared parent-scope secretApprovalID)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Driver-endorsed freeze-work (test/coverage), PM dispatch aaa139d3.
Source: Researcher RCA finding.

The pre-fix test declared `var secretApprovalID string` at parent scope of
TestConciergeApprovalGate_Staging and the `secret_write_with_approval_succeeds`
subtest read from it. If the upstream rejection subtest was skipped or
failed before the assignment, the approval subtest silently t.Skip'd —
masking its own code path. Two subtests were also inherently coupled:
the order was implicit and a parallel-runner would race.

The fix:
1. Factor the gated-write → grant → retry → single-use flow into
   secretWriteApprovalFlow(t, host, token, orgID, ordinaryWS, key, value1, value2)
   — no parent-scope state. Each call mints its own approval_id and
   returns (approvalID, freshApprovalID).
2. Remove the parent-scope `var secretApprovalID string` declaration.
3. The rejection subtest (secret_write_without_approval_rejected) keeps
   its own check; the approval subtest now calls the helper and is
   self-contained — runs even if the rejection subtest is skipped.
4. Add a new top-level test TestConciergeApprovalGate_Staging_OrderIndependent
   that re-runs the helper twice with different secret keys and asserts
   the four approval_ids (approvalA1, freshA2, approvalB1, freshB2) are
   all distinct. This proves the helper has no parent-scope state and
   is order-independent.

Issue: #2911 (SSOT for the silent coverage gap and the fix rationale).
PR: companion to this branch, to be opened by operator per the
push→operator-PR fallback (POST /pulls is 403'd, write:issue-only).

Verification (green before push):
- go build -tags=staging_e2e ./internal/staginge2e/ → exit 0
- gofmt -l → clean
- go vet -tags=staging_e2e ./internal/staginge2e/ → exit 0

Gate: normal-gate per freeze rules. PR queues for 2-genuine review
when the reviewer pool firms up (Researcher recovering provisioning
→ online per PM dispatch aaa139d3 freeze note).

Holds unchanged: #2900/#2903/#30/#2821/#2891/#2892/#2894/#2895
untouched. #2900/#2903 still HELD for the driver personal A/B
review per the standing hold directive.
---
 .../approval_gate_concierge_e2e_test.go       | 263 +++++++++++++-----
 1 file changed, 201 insertions(+), 62 deletions(-)

diff --git a/workspace-server/internal/staginge2e/approval_gate_concierge_e2e_test.go b/workspace-server/internal/staginge2e/approval_gate_concierge_e2e_test.go
index cce316ed4..c2ca36ba4 100644
--- a/workspace-server/internal/staginge2e/approval_gate_concierge_e2e_test.go
+++ b/workspace-server/internal/staginge2e/approval_gate_concierge_e2e_test.go
@@ -19,6 +19,15 @@ package staginge2e
 //
 // Guarded by the staging_e2e build tag + STAGING_E2E=1 env gate.
 // Teardown is t.Cleanup-driven (admin DELETE /cp/admin/tenants).
+//
+// ISOLATION NOTE (fix/staging-e2e-approval-gate-subtest-isolation):
+// Previously, the test declared `var secretApprovalID string` at parent
+// scope and the `secret_write_with_approval_succeeds` subtest read from
+// it. If the upstream rejection subtest was skipped or failed before
+// reaching the assignment, the approval subtest silently t.Skip'd —
+// masking its own code path. The fix factors the gated-write → grant →
+// retry → single-use flow into secretWriteApprovalFlow (no parent-scope
+// state) and each subtest now exercises its full path independently.
 
 import (
 	"encoding/json"
@@ -29,6 +38,93 @@ import (
 	"time"
 )
 
+// secretWriteApprovalFlow is a self-contained helper that exercises the
+// gated-write → grant → retry → single-use path. It mints its own
+// approval_id from a fresh gated write and returns (approvalID,
+// freshApprovalID) where approvalID is the one that was granted, and
+// freshApprovalID is the SECOND-write approval_id (used to verify the
+// consumed approval was single-use).
+//
+// No parent-scope state. Callers (subtests or top-level tests) can call
+// this helper multiple times and each call will run independently with
+// its own approval row. Caller passes a unique `key` per call so the
+// Postgres secret-write doesn't collide.
+func secretWriteApprovalFlow(t *testing.T, host, token, orgID, ordinaryWS, key, value1, value2 string) (approvalID, freshApprovalID string) {
+	t.Helper()
+	writeURL := "https://" + host + "/workspaces/" + ordinaryWS + "/secrets"
+
+	// 1. Gated write to capture an approval_id.
+	status, body := doTenantJSON(t, "POST", writeURL, token, orgID,
+		fmt.Sprintf(`{"key":%q,"value":%q}`, key, value1))
+	if status != http.StatusAccepted {
+		t.Fatalf("gated secret write (key=%s): want 202, got %d: %s", key, status, body)
+	}
+	if !strings.Contains(body, `"status":"pending_approval"`) {
+		t.Fatalf("gated secret write: expected pending_approval in 202 body, got: %s", body)
+	}
+	if !strings.Contains(body, `"action":"secret_write"`) {
+		t.Fatalf("gated secret write: expected action=secret_write in 202 body, got: %s", body)
+	}
+	approvalID = jsonField(body, "approval_id")
+	if approvalID == "" {
+		t.Fatalf("gated secret write: approval_id missing from 202 body: %s", body)
+	}
+	t.Logf("self-contained gated write captured approval_id=%s (key=%s)", approvalID, key)
+
+	// 2. Grant the approval via the decide endpoint.
+	decideURL := fmt.Sprintf("https://%s/workspaces/%s/approvals/%s/decide",
+		host, ordinaryWS, approvalID)
+	status, body = doTenantJSON(t, "POST", decideURL, token, orgID, `{"decision":"approved"}`)
+	if status != http.StatusOK {
+		t.Fatalf("decide approve (approval_id=%s): HTTP %d: %s", approvalID, status, body)
+	}
+	if !strings.Contains(body, `"status":"approved"`) {
+		t.Fatalf("decide approve: expected approved status, got: %s", body)
+	}
+	t.Logf("approval %s granted", approvalID)
+
+	// 3. Retry the secret write — must succeed.
+	status, body = doTenantJSON(t, "POST", writeURL, token, orgID,
+		fmt.Sprintf(`{"key":%q,"value":%q}`, key, value1))
+	if status != http.StatusOK {
+		t.Fatalf("retry after approval (key=%s): want 200, got %d: %s", key, status, body)
+	}
+	if !strings.Contains(body, `"status":"saved"`) {
+		t.Fatalf("retry after approval: expected saved status, got: %s", body)
+	}
+	t.Logf("secret write succeeded after approval (200, key=%s)", key)
+
+	// 4. Verify the secret IS now present.
+	listStatus, listBody := doTenantJSON(t, "GET", writeURL, token, orgID, "")
+	if listStatus != http.StatusOK {
+		t.Fatalf("list secrets after approved write: HTTP %d: %s", listStatus, listBody)
+	}
+	if !strings.Contains(listBody, key) {
+		t.Fatalf("%s missing from list after approved write: %s", key, listBody)
+	}
+	t.Logf("secret %s present in list after approved write", key)
+
+	// 5. Single-use: a SECOND write with the SAME key must create a NEW
+	//    pending approval (the first one was consumed).
+	status, body = doTenantJSON(t, "POST", writeURL, token, orgID,
+		fmt.Sprintf(`{"key":%q,"value":%q}`, key, value2))
+	if status != http.StatusAccepted {
+		t.Fatalf("second write after consumption (key=%s): want 202, got %d: %s", key, status, body)
+	}
+	if !strings.Contains(body, `"status":"pending_approval"`) {
+		t.Fatalf("second write: expected fresh pending_approval, got: %s", body)
+	}
+	freshApprovalID = jsonField(body, "approval_id")
+	if freshApprovalID == "" {
+		t.Fatalf("second write: fresh approval_id missing: %s", body)
+	}
+	if freshApprovalID == approvalID {
+		t.Fatalf("approval_id reused — the consumed approval was not single-use: %s", freshApprovalID)
+	}
+	t.Logf("single-use verified: second write got fresh approval_id=%s (key=%s)", freshApprovalID, key)
+	return approvalID, freshApprovalID
+}
+
 func TestConciergeApprovalGate_Staging(t *testing.T) {
 	cfg := requireStagingEnv(t)
 
@@ -101,11 +197,13 @@ func TestConciergeApprovalGate_Staging(t *testing.T) {
 	})
 
 	// ── (a): concierge admin-token secret write WITHOUT approval → 202 ────────
-	var secretApprovalID string
+	// Self-contained: no parent-scope state. The approval_id captured here
+	// is local to this subtest. The downstream approval subtest mints its
+	// OWN approval_id rather than reading from this one.
 	t.Run("secret_write_without_approval_rejected", func(t *testing.T) {
 		url := "https://" + host + "/workspaces/" + ordinaryWS + "/secrets"
 		status, body := doTenantJSON(t, "POST", url, token, orgID,
-			`{"key":"E2E_GATED_SECRET","value":"gated-value-123"}`)
+			`{"key":"E2E_GATED_SECRET_REJECT","value":"gated-value-123"}`)
 		if status == http.StatusOK {
 			var resp map[string]interface{}
 			_ = json.Unmarshal([]byte(body), &resp)
@@ -120,82 +218,40 @@ func TestConciergeApprovalGate_Staging(t *testing.T) {
 		if !strings.Contains(body, `"action":"secret_write"`) {
 			t.Fatalf("expected action=secret_write in 202 body, got: %s", body)
 		}
-		secretApprovalID = jsonField(body, "approval_id")
-		if secretApprovalID == "" {
+		approvalID := jsonField(body, "approval_id")
+		if approvalID == "" {
 			t.Fatalf("approval_id missing from 202 body: %s", body)
 		}
-		t.Logf("secret write correctly gated (202, approval_id=%s)", secretApprovalID)
+		t.Logf("secret write correctly gated (202, approval_id=%s, local to this subtest)", approvalID)
 
 		// Verify the secret was NOT actually written.
 		listStatus, listBody := doTenantJSON(t, "GET", url, token, orgID, "")
 		if listStatus != http.StatusOK {
 			t.Fatalf("list secrets: HTTP %d: %s", listStatus, listBody)
 		}
-		if strings.Contains(listBody, "E2E_GATED_SECRET") {
-			t.Fatalf("REGRESSION: E2E_GATED_SECRET appears in list despite zero approvals")
+		if strings.Contains(listBody, "E2E_GATED_SECRET_REJECT") {
+			t.Fatalf("REGRESSION: E2E_GATED_SECRET_REJECT appears in list despite zero approvals")
 		}
-		t.Logf("secret not written — list does not contain E2E_GATED_SECRET")
+		t.Logf("secret not written — list does not contain E2E_GATED_SECRET_REJECT")
 	})
 
 	// ── (b): WITH a granted approval, secret write succeeds ───────────────────
-	// The decide endpoint is /workspaces/:id/approvals/:approvalId/decide.
-	// For secret write, the workspace_id is the ordinary workspace ID.
-	// For org token mint, workspace_id is empty (org-scoped), so there is no
-	// HTTP decide path today — the integration tests cover the mint retry.
+	// Self-contained: calls secretWriteApprovalFlow which mints its own
+	// approval_id via a fresh gated write. No dependency on the upstream
+	// rejection subtest's state. If the upstream subtest were skipped or
+	// failed, this one would still run end-to-end.
 	t.Run("secret_write_with_approval_succeeds", func(t *testing.T) {
-		if secretApprovalID == "" {
-			t.Skip("secret_write_without_approval_rejected did not capture an approval_id")
+		approvalID, freshApprovalID := secretWriteApprovalFlow(t, host, token, orgID,
+			ordinaryWS, "E2E_GATED_SECRET_OK", "gated-value-123", "gated-value-456")
+		if approvalID == "" {
+			t.Fatalf("self-contained helper returned empty approval_id")
 		}
-
-		// 1. Grant the approval via the decide endpoint.
-		decideURL := fmt.Sprintf("https://%s/workspaces/%s/approvals/%s/decide",
-			host, ordinaryWS, secretApprovalID)
-		status, body := doTenantJSON(t, "POST", decideURL, token, orgID, `{"decision":"approved"}`)
-		if status != http.StatusOK {
-			t.Fatalf("approve decision: HTTP %d: %s", status, body)
+		if freshApprovalID == "" {
+			t.Fatalf("self-contained helper returned empty fresh approval_id")
 		}
-		if !strings.Contains(body, `"status":"approved"`) {
-			t.Fatalf("expected approved status in decide body, got: %s", body)
+		if freshApprovalID == approvalID {
+			t.Fatalf("helper violated single-use: fresh == granted (%s)", freshApprovalID)
 		}
-		t.Logf("approval %s granted", secretApprovalID)
-
-		// 2. Retry the secret write with the SAME key.
-		writeURL := "https://" + host + "/workspaces/" + ordinaryWS + "/secrets"
-		status, body = doTenantJSON(t, "POST", writeURL, token, orgID,
-			`{"key":"E2E_GATED_SECRET","value":"gated-value-123"}`)
-		if status != http.StatusOK {
-			t.Fatalf("retry after approval: want 200, got %d: %s", status, body)
-		}
-		if !strings.Contains(body, `"status":"saved"`) {
-			t.Fatalf("expected saved status in 200 body, got: %s", body)
-		}
-		t.Logf("secret write succeeded after approval (200)")
-
-		// 3. Verify the secret IS now present.
-		listStatus, listBody := doTenantJSON(t, "GET", writeURL, token, orgID, "")
-		if listStatus != http.StatusOK {
-			t.Fatalf("list secrets after write: HTTP %d: %s", listStatus, listBody)
-		}
-		if !strings.Contains(listBody, "E2E_GATED_SECRET") {
-			t.Fatalf("E2E_GATED_SECRET missing from list after approved write: %s", listBody)
-		}
-		t.Logf("secret present in list after approved write")
-
-		// 4. Single-use: a SECOND write with the SAME key must create a NEW
-		//    pending approval (the first one was consumed).
-		status, body = doTenantJSON(t, "POST", writeURL, token, orgID,
-			`{"key":"E2E_GATED_SECRET","value":"gated-value-456"}`)
-		if status != http.StatusAccepted {
-			t.Fatalf("second write after consumption: want 202 (fresh pending), got %d: %s", status, body)
-		}
-		if !strings.Contains(body, `"status":"pending_approval"`) {
-			t.Fatalf("expected fresh pending_approval for second write, got: %s", body)
-		}
-		freshApprovalID := jsonField(body, "approval_id")
-		if freshApprovalID == secretApprovalID {
-			t.Fatalf("approval_id reused — the consumed approval was not single-use: %s", freshApprovalID)
-		}
-		t.Logf("single-use verified: second write got fresh approval_id=%s", freshApprovalID)
 	})
 
 	// ── List pending approvals via admin surface ──────────────────────────────
@@ -214,3 +270,86 @@ func TestConciergeApprovalGate_Staging(t *testing.T) {
 		t.Logf("pending approvals list contains org_token_mint (admin can see the gate-fired request)")
 	})
 }
+
+// TestConciergeApprovalGate_Staging_OrderIndependent demonstrates that the
+// secret-write-with-approval flow is now self-contained: the helper
+// secretWriteApprovalFlow has no parent-scope state, so calling it twice
+// in sequence produces two INDEPENDENT approval_ids and two INDEPENDENT
+// secret rows. If the helper were coupled to a parent-scope variable, the
+// second call would either (a) reuse the first call's approval_id, (b)
+// fail because the first call's secret was already at the target value, or
+// (c) produce the same fresh approval_id on the second-call single-use
+// check. This test asserts none of those regressions occur.
+//
+// The test re-runs the full setup (org/tenant/workspace/platform-agent)
+// because it stands alone — there is no shared parent scope. Each
+// call to secretWriteApprovalFlow uses a unique secret key so the
+// Postgres secret-write path doesn't collide.
+func TestConciergeApprovalGate_Staging_OrderIndependent(t *testing.T) {
+	cfg := requireStagingEnv(t)
+
+	slug := fmt.Sprintf("e2e-isolation-%d", time.Now().Unix()%100000000)
+	t.Logf("isolation: slug=%s", slug)
+
+	orgID := adminCreateOrg(t, cfg, slug)
+	t.Cleanup(func() { adminDeleteTenant(t, cfg, slug) })
+	t.Logf("org created: org_id=%s", orgID)
+
+	token := tenantAdminToken(t, cfg, slug)
+	host := slug + "." + cfg.subdomainSuffix
+	waitForHTTP(t, host, http.StatusOK, 10*time.Minute, "tenant /health ready")
+	t.Logf("tenant TLS ready: %s", host)
+
+	ordinaryWS := tenantCreateWorkspace(t, cfg, host, token, orgID)
+	t.Logf("ordinary workspace created: %s", ordinaryWS)
+
+	// Install platform agent so the tenant is a valid concierge surface.
+	platformID := findPlatformRoot(t, host, token, orgID)
+	if platformID == "" {
+		platformID = newUUIDv4(t)
+		body := fmt.Sprintf(`{"id":%q,"name":%q}`, platformID, "E2E Isolation")
+		status, resp := doTenantJSON(t, "POST",
+			"https://"+host+"/admin/org/platform-agent", token, orgID, body)
+		if status != http.StatusOK {
+			t.Fatalf("install platform agent: HTTP %d: %s", status, resp)
+		}
+		t.Logf("platform agent installed: %s", platformID)
+	}
+
+	// First call: complete secret-write-with-approval flow, capture the
+	// granted approval_id and the fresh second-write approval_id.
+	approvalA1, freshA2 := secretWriteApprovalFlow(t, host, token, orgID,
+		ordinaryWS, "E2E_ISO_KEY_A", "iso-A-value-1", "iso-A-value-2")
+	if approvalA1 == "" || freshA2 == "" {
+		t.Fatalf("first helper call returned empty ids (approvalA1=%q, freshA2=%q)", approvalA1, freshA2)
+	}
+	if approvalA1 == freshA2 {
+		t.Fatalf("first helper call violated single-use: approvalA1 == freshA2 == %s", approvalA1)
+	}
+	t.Logf("first call: approvalA1=%s, freshA2=%s (independent)", approvalA1, freshA2)
+
+	// Second call: same helper, different key, must produce INDEPENDENT
+	// approval_ids. If the helper were coupled to a parent-scope variable
+	// (the bug being fixed), the second call would either reuse approvalA1
+	// or fail because state was already set up.
+	approvalB1, freshB2 := secretWriteApprovalFlow(t, host, token, orgID,
+		ordinaryWS, "E2E_ISO_KEY_B", "iso-B-value-1", "iso-B-value-2")
+	if approvalB1 == "" || freshB2 == "" {
+		t.Fatalf("second helper call returned empty ids (approvalB1=%q, freshB2=%q)", approvalB1, freshB2)
+	}
+	if approvalB1 == freshB2 {
+		t.Fatalf("second helper call violated single-use: approvalB1 == freshB2 == %s", approvalB1)
+	}
+	if approvalB1 == approvalA1 {
+		t.Fatalf("isolation violation: second call reused first call's approval_id (%s)", approvalA1)
+	}
+	if freshB2 == freshA2 {
+		t.Fatalf("isolation violation: second call's fresh_id matches first call's fresh_id (%s)", freshA2)
+	}
+	if approvalB1 == freshA2 || freshB2 == approvalA1 {
+		t.Fatalf("isolation violation: cross-call id reuse (approvalA1=%s, freshA2=%s, approvalB1=%s, freshB2=%s)",
+			approvalA1, freshA2, approvalB1, freshB2)
+	}
+	t.Logf("second call: approvalB1=%s, freshB2=%s (no cross-call id reuse)", approvalB1, freshB2)
+	t.Logf("isolation verified: helper secretWriteApprovalFlow is self-contained and order-independent")
+}
-- 
2.52.0