Merge pull request 'test(#2151 ): real-infra integration tests for Activity + Delegation + A2A handlers (CHUNK 1 + CHUNK 2)' (#2166 ) from fix/2151-chunk1-activity-delegation-a2a-integration-tests into main

test(integration): fix BeforeTS timing + A2AQueue max-attempts seeding
- Use RFC3339Nano + 200ms gaps in BeforeTS test to avoid second- truncation and Go/Postgres clock skew. - Pre-set attempts=5 on seeded A2A queue item so MarkQueueItemFailed transitions to 'failed' on first call (attempts are normally incremented by DequeueNext, which the test bypasses). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-03 14:19:38 +00:00 · 2026-06-03 14:12:10 +00:00 · 2026-06-03 14:12:10 +00:00 · 2026-06-03 14:12:10 +00:00 · 2026-06-03 14:12:10 +00:00 · 2026-06-03 14:12:10 +00:00
19 changed files with 1519 additions and 75 deletions
@@ -466,12 +466,40 @@ def fetch_log(target_url: str) -> str | None:

 def grep_fail_markers(log_text: str) -> list[str]:
    """Return up to 5 sample matching lines for any FAIL_PATTERNS hit.
-    Empty list = clean log."""
+    Empty list = clean log.
+
+    Heuristic: skip lines where the marker appears inside script source
+    (e.g. ``echo "::error::..."`` in a ``::group::Run`` block) rather
+    than actual execution output. The Gitea Actions log prints the raw
+    script before executing it; ``echo "::error::"`` lines in that
+    display are false positives.
+    """
    matches: list[str] = []
+    in_run_group = False
+    group_depth = 0
    for line in log_text.splitlines():
+        stripped = line.strip()
+        # Track Gitea Actions group markers so we can skip the
+        # ``::group::Run`` script-source display blocks.
+        if stripped.startswith("::group::Run"):
+            in_run_group = True
+            group_depth = 1
+            continue
+        if stripped == "::endgroup::":
+            if in_run_group:
+                in_run_group = False
+                group_depth = 0
+            continue
+        if in_run_group:
+            continue
        for pat in FAIL_PATTERNS:
            if pat in line:
-                # Truncate to keep error output bounded.
+                # Additional false-positive guard: ``echo "::error::"``
+                # is script source, not a runtime error emission.
+                if pat == "::error::":
+                    prefix = line[: line.index(pat)].strip()
+                    if prefix.endswith('echo') or prefix.endswith("echo '") or prefix.endswith('echo "'):
+                        break
                matches.append(line.strip()[:240])
                break
        if len(matches) >= 5:
@@ -123,8 +123,9 @@ jobs:
    # integration). See internal#512 for the class defect.
    runs-on: docker-host
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
-    continue-on-error: true
+    # mc#1982: mask removed. If regressions appear, root-fix the underlying
+    # test — do NOT renew the mask silently.
+    continue-on-error: false
    outputs:
      api: ${{ steps.decide.outputs.api }}
    steps:
@@ -160,8 +161,9 @@ jobs:
    # detect-changes for the full rationale.
    runs-on: docker-host
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
-    continue-on-error: true
+    # mc#1982: mask removed. If regressions appear, root-fix the underlying
+    # test — do NOT renew the mask silently.
+    continue-on-error: false
    timeout-minutes: 15
    env:
      # Unique per-run container names so concurrent runs on the host-
@@ -88,8 +88,9 @@ jobs:
    # surprises and keeps the routing rule discoverable in one place.
    runs-on: docker-host
    # mc#1982 Phase 3 (RFC §1): surface broken workflows without blocking.
-    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
-    continue-on-error: true
+    # mc#1982: mask removed. If regressions appear, root-fix the underlying
+    # test — do NOT renew the mask silently.
+    continue-on-error: false
    outputs:
      handlers: ${{ steps.filter.outputs.handlers }}
    steps:
@@ -119,8 +120,9 @@ jobs:
    # exists). See detect-changes for the full routing rationale.
    runs-on: docker-host
    # mc#1982 Phase 3 (RFC §1): surface broken workflows without blocking.
-    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
-    continue-on-error: true
+    # mc#1982: mask removed. If regressions appear, root-fix the underlying
+    # test — do NOT renew the mask silently.
+    continue-on-error: false
    env:
      # Unique name per run so concurrent jobs don't collide on the
      # bridge network. ${RUN_ID}-${RUN_ATTEMPT} is unique even across
@@ -49,37 +49,56 @@ jobs:
      GITHUB_SERVER_URL: https://git.moleculesai.app
    steps:
      - name: Identify runner
+        id: identify
+        continue-on-error: true
        run: |
          set -eu
          echo "arch=$(uname -m)"
          echo "kernel=$(uname -sr)"
          echo "shell=$BASH_VERSION"
          # Sanity: must actually be arm64. If amd64 sneaks in here,
-          # fail fast — that means the label routing is wrong.
+          # the job skips gracefully rather than hard-failing, because
+          # a mislabelled runner is an ops concern, not a code defect.
+          # Pilot lane must not make main red (#2146).
          case "$(uname -m)" in
-            aarch64|arm64) echo "arm64 confirmed" ;;
-            *) echo "ERROR: expected arm64, got $(uname -m)"; exit 1 ;;
+            aarch64|arm64)
+              echo "arm64 confirmed"
+              echo "arm64=true" >> "$GITHUB_OUTPUT"
+              ;;
+            *)
+              echo "ERROR: expected arm64, got $(uname -m) — label routing may be wrong"
+              echo "arm64=false" >> "$GITHUB_OUTPUT"
+              exit 1
+              ;;
          esac

      - name: Checkout
+        if: steps.identify.outputs.arm64 == 'true'
        uses: actions/checkout@v4
        with:
          fetch-depth: 1

      - name: Install shellcheck (arm64)
+        if: steps.identify.outputs.arm64 == 'true'
        continue-on-error: true
        run: |
          set -eu
          if command -v shellcheck >/dev/null 2>&1; then
            echo "shellcheck already present: $(shellcheck --version | head -1)"
          else
-            # Prefer apt if the runner base ships it; else download arm64 binary.
+            # Prefer apt if the runner base ships it; else download the
+            # correct platform binary (darwin vs linux).
            if command -v apt-get >/dev/null 2>&1; then
              sudo apt-get update -qq
              sudo apt-get install -y --no-install-recommends shellcheck
            else
              SC_VER=v0.10.0
-              curl -fsSL "https://github.com/koalaman/shellcheck/releases/download/${SC_VER}/shellcheck-${SC_VER}.linux.aarch64.tar.xz" \
+              if [ "$(uname -s)" = "Darwin" ]; then
+                SC_PKG="shellcheck-${SC_VER}.darwin.aarch64.tar.xz"
+              else
+                SC_PKG="shellcheck-${SC_VER}.linux.aarch64.tar.xz"
+              fi
+              curl -fsSL "https://github.com/koalaman/shellcheck/releases/download/${SC_VER}/${SC_PKG}" \
                | tar -xJf - --strip-components=1
              sudo mv shellcheck /usr/local/bin/
            fi
@@ -87,14 +106,15 @@ jobs:
          shellcheck --version | head -2

      - name: Run shellcheck on .gitea/scripts/*.sh
+        if: steps.identify.outputs.arm64 == 'true'
        continue-on-error: true
        run: |
          set -eu
          # Only the scripts we control under .gitea/scripts. Pilot
          # scope is intentionally narrow — broaden in a follow-up
          # once the lane is proven.
-          if ! command -v shellcheck >/dev/null 2>&1; then
-            echo "WARN: shellcheck binary not found — skipping (pilot mode)"
+          if ! command -v shellcheck >/dev/null 2>&1 || ! shellcheck --version >/dev/null 2>&1; then
+            echo "WARN: shellcheck not functional — skipping (pilot mode)"
            exit 0
          fi
          # NOTE: macOS ships Bash 3.2 (Apple license), no `mapfile`
@@ -26,11 +26,12 @@ import (
 //     the update cycle — no ssh, no re-provision, no ops toil.
 //
 // Contract (paired with cp-side GET /cp/tenants/config):
-//   Request:  GET {MOLECULE_CP_URL or https://api.moleculesai.app}/cp/tenants/config
-//             Authorization: Bearer <ADMIN_TOKEN>
-//             X-Molecule-Org-Id: <MOLECULE_ORG_ID>
-//   Response: 200 {"MOLECULE_CP_SHARED_SECRET":"…","MOLECULE_CP_URL":"…", …}
-//             401 on bearer mismatch or unknown org
+//
+//	Request:  GET {MOLECULE_CP_URL or https://api.moleculesai.app}/cp/tenants/config
+//	          Authorization: Bearer <ADMIN_TOKEN>
+//	          X-Molecule-Org-Id: <MOLECULE_ORG_ID>
+//	Response: 200 {"MOLECULE_CP_SHARED_SECRET":"…","MOLECULE_CP_URL":"…", …}
+//	          401 on bearer mismatch or unknown org
 //
 // Best-effort: any failure logs and returns — main() keeps booting.
 // Self-hosted deploys without MOLECULE_ORG_ID or ADMIN_TOKEN set
@@ -105,3 +106,53 @@ func refreshEnvFromCP() error {
 	log.Printf("CP env refresh: applied %d values from %s/cp/tenants/config", applied, base)
 	return nil
 }
+
+// requiredLLMEnvVars is the set of LLM proxy env vars a managed SaaS
+// tenant must have populated after refreshEnvFromCP. cp#469 (tenant
+// proxy-env delivery) — guaranteed CP-delivered creds reach the
+// tenant process env on boot. Per Researcher Task #37 / Spec 2 and
+// Task #46 (watch-fail-first test).
+//
+// Key set byte-matched against Researcher's verified emission in
+// controlplane tenant_config.go:140-144 (Researcher REQUEST_CHANGES
+// iterate body, 3987f59c). The four keys below ARE the LLM-proxy
+// subset of the 8 CP-emitted keys; OPENAI_BASE_URL / OPENAI_API_KEY /
+// ANTHROPIC_BASE_URL / ANTHROPIC_API_KEY are out of scope for cp#469
+// (different feature surfaces — direct-to-provider fallbacks, not
+// the proxy). v2 fix: MOLECULE_LLM_USAGE_TOKEN, MOLECULE_LLM_USAGE_URL,
+// MOLECULE_LLM_BASE_URL, MOLECULE_LLM_ANTHROPIC_BASE_URL — note the
+// 4th key is namespaced MOLECULE_LLM_ANTHROPIC_BASE_URL, NOT bare
+// ANTHROPIC_BASE_URL. Bare ANTHROPIC_BASE_URL is a separate CP-emitted
+// key for direct-provider use, not the LLM proxy.
+var requiredLLMEnvVars = []string{
+	"MOLECULE_LLM_USAGE_TOKEN",
+	"MOLECULE_LLM_USAGE_URL", // CRITICAL fix v2: was MOLECULE_LLM_URL in v1
+	"MOLECULE_LLM_BASE_URL",
+	"MOLECULE_LLM_ANTHROPIC_BASE_URL", // CRITICAL fix v3: was ANTHROPIC_BASE_URL in v2 (different key!)
+}
+
+// assertManagedTenantHasLLMEnv verifies that, when running as a
+// managed SaaS tenant (MOLECULE_ORG_ID + ADMIN_TOKEN both set), all
+// required LLM proxy env vars are populated after refreshEnvFromCP.
+//
+// Self-hosted (no orgID/adminToken) is exempt — dev must not be
+// blocked here. Managed tenants with missing LLM keys fail with
+// MISSING_CP_LLM_ENV so they do not silently boot with broken proxy
+// creds. Caller in main.go decides whether to log and continue or
+// log.Fatalf depending on deployment context.
+func assertManagedTenantHasLLMEnv() error {
+	if os.Getenv("MOLECULE_ORG_ID") == "" || os.Getenv("ADMIN_TOKEN") == "" {
+		// Self-hosted dev / not yet provisioned — not a managed tenant.
+		return nil
+	}
+	var missing []string
+	for _, k := range requiredLLMEnvVars {
+		if os.Getenv(k) == "" {
+			missing = append(missing, k)
+		}
+	}
+	if len(missing) > 0 {
+		return fmt.Errorf("MISSING_CP_LLM_ENV: required LLM proxy keys not set after refreshEnvFromCP: %v", missing)
+	}
+	return nil
+}
@@ -5,6 +5,7 @@ import (
 	"net/http"
 	"net/http/httptest"
 	"os"
+	"strings"
 	"testing"
 )

@@ -59,6 +60,138 @@ func TestRefreshEnvFromCP_AppliesCPResponse(t *testing.T) {
 	}
 }

+// TestRefreshEnvFromCP_ManagedTenantRequiresLLMKeys: watch-fail-first
+// per Researcher Task #46. When running as a managed tenant
+// (MOLECULE_ORG_ID + ADMIN_TOKEN set), missing LLM proxy env vars
+// after refreshEnvFromCP MUST surface as MISSING_CP_LLM_ENV, not be
+// silently accepted. Without this guard, a CP that loses its LLM
+// creds (e.g. during an incident) would let a tenant boot and then
+// fail later at first LLM call — worse than a loud refusal here.
+func TestRefreshEnvFromCP_ManagedTenantRequiresLLMKeys(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		// Stub CP returns a CP response WITHOUT any of the required
+		// LLM keys — simulates the failure mode where the CP side
+		// dropped or never had the LLM creds for this org.
+		w.Header().Set("Content-Type", "application/json")
+		fmt.Fprint(w, `{"MOLECULE_CP_SHARED_SECRET":"x","MOLECULE_CP_URL":"https://api.moleculesai.app"}`)
+	}))
+	defer srv.Close()
+
+	t.Setenv("MOLECULE_ORG_ID", "org-managed-1")
+	t.Setenv("ADMIN_TOKEN", "admin-tok")
+	t.Setenv("MOLECULE_CP_URL", srv.URL)
+	// Clear all LLM keys to simulate the boot-without-LLM-env failure mode.
+	t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "")
+	t.Setenv("MOLECULE_LLM_USAGE_URL", "")
+	t.Setenv("MOLECULE_LLM_BASE_URL", "")
+	t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "")
+
+	// refreshEnvFromCP itself should succeed — CP is reachable, returned 200.
+	if err := refreshEnvFromCP(); err != nil {
+		t.Fatalf("refreshEnvFromCP: %v", err)
+	}
+	// The boot assertion must catch the missing LLM keys.
+	err := assertManagedTenantHasLLMEnv()
+	if err == nil {
+		t.Fatal("expected MISSING_CP_LLM_ENV error for managed tenant without LLM keys, got nil")
+	}
+	if !strings.Contains(err.Error(), "MISSING_CP_LLM_ENV") {
+		t.Errorf("expected error to contain MISSING_CP_LLM_ENV, got: %v", err)
+	}
+}
+
+// TestRefreshEnvFromCP_ManagedTenantHappyPath: when the CP returns
+// all 4 LLM-proxy keys, the gate must PASS — no MISSING_CP_LLM_ENV
+// for a properly-configured managed tenant. Watch-fail counterpart
+// to TestRefreshEnvFromCP_ManagedTenantRequiresLLMKeys: if THIS test
+// ever fires MISSING_CP_LLM_ENV on the byte-correct key set, the
+// requiredLLMEnvVars list has drifted from the CP emission again.
+// Per Researcher REQUEST_CHANGES TEST ADEQUACY note.
+func TestRefreshEnvFromCP_ManagedTenantHappyPath(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		// Return ALL 4 LLM-proxy keys — names byte-matched to
+		// tenant_config.go:140-144 CP emission.
+		fmt.Fprint(w, `{"MOLECULE_LLM_USAGE_TOKEN":"tok-1","MOLECULE_LLM_USAGE_URL":"https://llm.example.com/usage","MOLECULE_LLM_BASE_URL":"https://llm.example.com","MOLECULE_LLM_ANTHROPIC_BASE_URL":"https://llm.example.com/anthropic"}`)
+	}))
+	defer srv.Close()
+
+	t.Setenv("MOLECULE_ORG_ID", "org-managed-happy")
+	t.Setenv("ADMIN_TOKEN", "admin-tok")
+	t.Setenv("MOLECULE_CP_URL", srv.URL)
+	// Pre-clear so we can verify the refresh actually populated them.
+	t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "")
+	t.Setenv("MOLECULE_LLM_USAGE_URL", "")
+	t.Setenv("MOLECULE_LLM_BASE_URL", "")
+	t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "")
+
+	if err := refreshEnvFromCP(); err != nil {
+		t.Fatalf("refreshEnvFromCP: %v", err)
+	}
+	// Sanity: refresh actually applied the keys.
+	if got := os.Getenv("MOLECULE_LLM_USAGE_TOKEN"); got != "tok-1" {
+		t.Errorf("refresh did not apply USAGE_TOKEN: got %q", got)
+	}
+	// The boot assertion must pass — no MISSING_CP_LLM_ENV.
+	if err := assertManagedTenantHasLLMEnv(); err != nil {
+		t.Errorf("managed happy path must not MISSING_CP_LLM_ENV, got: %v", err)
+	}
+}
+
+// TestRefreshEnvFromCP_ManagedTenantPartialEnv: when the CP returns
+// 3 of 4 LLM-proxy keys (one missing), the gate must STILL catch it
+// and the error must name the missing key. Per Researcher
+// REQUEST_CHANGES TEST ADEQUACY note — partial-env coverage is
+// critical because the production failure mode is usually "one
+// key dropped" not "all keys dropped".
+func TestRefreshEnvFromCP_ManagedTenantPartialEnv(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		// 3 of 4 — MOLECULE_LLM_ANTHROPIC_BASE_URL is missing.
+		fmt.Fprint(w, `{"MOLECULE_LLM_USAGE_TOKEN":"tok-1","MOLECULE_LLM_USAGE_URL":"https://llm.example.com/usage","MOLECULE_LLM_BASE_URL":"https://llm.example.com"}`)
+	}))
+	defer srv.Close()
+
+	t.Setenv("MOLECULE_ORG_ID", "org-managed-partial")
+	t.Setenv("ADMIN_TOKEN", "admin-tok")
+	t.Setenv("MOLECULE_CP_URL", srv.URL)
+	// Pre-clear all 4 so the 3 that come back from CP are the only
+	// ones set; the 4th (MOLECULE_LLM_ANTHROPIC_BASE_URL) stays empty.
+	t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "")
+	t.Setenv("MOLECULE_LLM_USAGE_URL", "")
+	t.Setenv("MOLECULE_LLM_BASE_URL", "")
+	t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "")
+
+	if err := refreshEnvFromCP(); err != nil {
+		t.Fatalf("refreshEnvFromCP: %v", err)
+	}
+	err := assertManagedTenantHasLLMEnv()
+	if err == nil {
+		t.Fatal("expected MISSING_CP_LLM_ENV for partial env (3 of 4 keys), got nil")
+	}
+	if !strings.Contains(err.Error(), "MISSING_CP_LLM_ENV") {
+		t.Errorf("expected error to contain MISSING_CP_LLM_ENV, got: %v", err)
+	}
+	if !strings.Contains(err.Error(), "MOLECULE_LLM_ANTHROPIC_BASE_URL") {
+		t.Errorf("expected error to name the missing key MOLECULE_LLM_ANTHROPIC_BASE_URL, got: %v", err)
+	}
+}
+
+// TestAssertManagedTenantHasLLMEnv_NotManagedIsNoop: self-hosted
+// (no orgID/adminToken) must NOT block on missing LLM keys — dev
+// ergonomics matter and the assertion's contract is "managed only".
+func TestAssertManagedTenantHasLLMEnv_NotManagedIsNoop(t *testing.T) {
+	t.Setenv("MOLECULE_ORG_ID", "")
+	t.Setenv("ADMIN_TOKEN", "")
+	t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "")
+	t.Setenv("MOLECULE_LLM_USAGE_URL", "")
+	t.Setenv("MOLECULE_LLM_BASE_URL", "")
+	t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "")
+	if err := assertManagedTenantHasLLMEnv(); err != nil {
+		t.Errorf("self-hosted (not managed) must not block, got: %v", err)
+	}
+}
+
 // TestRefreshEnvFromCP_CPUnreachableDoesNotFailBoot: network errors must
 // return non-nil BUT main.go treats that as warn-and-continue. We assert
 // the function returns an error (not a panic) so the caller can log.
@@ -82,6 +82,16 @@ func main() {
 		log.Printf("CP env refresh: %v (continuing with baked-in env)", err)
 	}

+	// Managed-tenant boot assertion (cp#469 — tenant proxy-env delivery).
+	// If we're a managed SaaS tenant (orgID + adminToken set), all required
+	// LLM proxy env vars must be present after refresh. Missing keys block
+	// the tenant from booting with broken LLM creds — silent-fail is worse
+	// than a loud refusal. Self-hosted (no orgID/adminToken) short-circuits
+	// inside the assertion, so this never fires for dev.
+	if err := assertManagedTenantHasLLMEnv(); err != nil {
+		log.Fatalf("Managed tenant boot assertion: %v", err)
+	}
+
 	// Secrets encryption. In MOLECULE_ENV=prod, boot refuses to start
 	// without a valid SECRETS_ENCRYPTION_KEY (fail-secure — Top-5 #5).
 	// In any other environment, missing keys just log a warning and
@@ -359,7 +369,6 @@ func main() {
 	// (WorkspaceHandler.BootstrapFailed) wires its own capture inline.
 	registry.BootFailureRescueHook = handlers.BootFailureRescueHook

-
 	// Provision-timeout sweep — flips workspaces that have been stuck in
 	// status='provisioning' past the timeout window to 'failed' and emits
 	// WORKSPACE_PROVISION_TIMEOUT. Without this the UI banner is cosmetic
@@ -149,9 +149,11 @@ func markFailed(ctx context.Context, wsID string, broadcaster *events.Broadcaste
 		models.StatusFailed, msg, wsID); dbErr != nil {
 		log.Printf("bundle import: failed to mark workspace %s as failed: %v", wsID, dbErr)
 	}
-	broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceProvisionFailed), wsID, map[string]interface{}{
+	if bcErr := broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceProvisionFailed), wsID, map[string]interface{}{
 		"error": msg,
-	})
+	}); bcErr != nil {
+		log.Printf("bundle import: failed to broadcast provision failed for %s: %v", wsID, bcErr)
+	}
 }

 func nilIfEmpty(s string) interface{} {
@@ -407,12 +407,14 @@ func (m *Manager) HandleInbound(ctx context.Context, ch ChannelRow, msg *Inbound

 	// Broadcast event
 	if m.broadcaster != nil {
-		m.broadcaster.RecordAndBroadcast(ctx, string(events.EventChannelMessage), ch.WorkspaceID, map[string]interface{}{
+		if err := m.broadcaster.RecordAndBroadcast(ctx, string(events.EventChannelMessage), ch.WorkspaceID, map[string]interface{}{
 			"channel_id":   ch.ID,
 			"channel_type": ch.ChannelType,
 			"username":     msg.Username,
 			"direction":    "inbound",
-		})
+		}); err != nil {
+			log.Printf("Channels: failed to broadcast inbound event: %v", err)
+		}
 	}

 	return nil
@@ -453,11 +455,13 @@ func (m *Manager) SendOutbound(ctx context.Context, channelID string, text strin
 	}

 	if m.broadcaster != nil {
-		m.broadcaster.RecordAndBroadcast(ctx, string(events.EventChannelMessage), ch.WorkspaceID, map[string]interface{}{
+		if err := m.broadcaster.RecordAndBroadcast(ctx, string(events.EventChannelMessage), ch.WorkspaceID, map[string]interface{}{
 			"channel_id":   ch.ID,
 			"channel_type": ch.ChannelType,
 			"direction":    "outbound",
-		})
+		}); err != nil {
+			log.Printf("Channels: failed to broadcast outbound event: %v", err)
+		}
 	}

 	return nil
@@ -517,7 +517,9 @@ func (t *TelegramAdapter) StartPolling(ctx context.Context, config map[string]in

 				// Acknowledge the button press (removes loading spinner)
 				ackCfg := tgbotapi.NewCallback(cb.ID, "Received")
-				bot.Send(ackCfg)
+				if _, err := bot.Send(ackCfg); err != nil {
+					log.Printf("telegram: failed to send callback ack: %v", err)
+				}

 				// Update the message to show what was clicked
 				decision := "approved"
@@ -529,7 +531,9 @@ func (t *TelegramAdapter) StartPolling(ctx context.Context, config map[string]in
 					cb.Message.MessageID,
 					cb.Message.Text+"\n\n✅ CEO "+decision,
 				)
-				bot.Send(editMsg)
+				if _, err := bot.Send(editMsg); err != nil {
+					log.Printf("telegram: failed to send edit message: %v", err)
+				}

 				// Route the decision as an inbound message to the agent
 				inbound := &InboundMessage{
@@ -60,10 +60,10 @@ func sanitizeErrorDetailForBroadcast(s string) string {
 }

 type ActivityHandler struct {
-	broadcaster *events.Broadcaster
+	broadcaster events.EventEmitter
 }

-func NewActivityHandler(b *events.Broadcaster) *ActivityHandler {
+func NewActivityHandler(b events.EventEmitter) *ActivityHandler {
 	return &ActivityHandler{broadcaster: b}
 }

@@ -54,23 +54,29 @@ func (h *ApprovalsHandler) Create(c *gin.Context) {
 		return
 	}

-	h.broadcaster.RecordAndBroadcast(ctx, string(events.EventApprovalRequested), workspaceID, map[string]interface{}{
+	if err := h.broadcaster.RecordAndBroadcast(ctx, string(events.EventApprovalRequested), workspaceID, map[string]interface{}{
 		"approval_id": approvalID,
 		"action":      body.Action,
 		"reason":      body.Reason,
 		"task_id":     body.TaskID,
-	})
+	}); err != nil {
+		log.Printf("approvals: failed to broadcast approval requested: %v", err)
+	}

 	// Auto-escalate to parent
 	var parentID *string
-	db.DB.QueryRowContext(ctx, `SELECT parent_id FROM workspaces WHERE id = $1`, workspaceID).Scan(&parentID)
+	if err := db.DB.QueryRowContext(ctx, `SELECT parent_id FROM workspaces WHERE id = $1`, workspaceID).Scan(&parentID); err != nil {
+		log.Printf("approvals: failed to lookup parent for escalation: %v", err)
+	}
 	if parentID != nil {
-		h.broadcaster.RecordAndBroadcast(ctx, string(events.EventApprovalEscalated), *parentID, map[string]interface{}{
+		if err := h.broadcaster.RecordAndBroadcast(ctx, string(events.EventApprovalEscalated), *parentID, map[string]interface{}{
 			"approval_id":       approvalID,
 			"from_workspace_id": workspaceID,
 			"action":            body.Action,
 			"reason":            body.Reason,
-		})
+		}); err != nil {
+			log.Printf("approvals: failed to broadcast approval escalated: %v", err)
+		}
 	}

 	c.JSON(http.StatusCreated, gin.H{"approval_id": approvalID, "status": "pending"})
@@ -221,11 +227,13 @@ func (h *ApprovalsHandler) Decide(c *gin.Context) {
 		eventType = "APPROVAL_DENIED"
 	}

-	h.broadcaster.RecordAndBroadcast(ctx, eventType, workspaceID, map[string]interface{}{
+	if err := h.broadcaster.RecordAndBroadcast(ctx, eventType, workspaceID, map[string]interface{}{
 		"approval_id": approvalID,
 		"decision":    body.Decision,
 		"decided_by":  decidedBy,
-	})
+	}); err != nil {
+		log.Printf("approvals: failed to broadcast approval decision: %v", err)
+	}

 	c.JSON(http.StatusOK, gin.H{"status": body.Decision, "approval_id": approvalID})
 }
@@ -102,10 +102,10 @@ func pushDelegationResultToInbox(ctx context.Context, sourceID, delegationID, st
 // and the A2A request runs in the background.
 type DelegationHandler struct {
 	workspace   *WorkspaceHandler
-	broadcaster *events.Broadcaster
+	broadcaster events.EventEmitter
 }

-func NewDelegationHandler(wh *WorkspaceHandler, b *events.Broadcaster) *DelegationHandler {
+func NewDelegationHandler(wh *WorkspaceHandler, b events.EventEmitter) *DelegationHandler {
 	return &DelegationHandler{workspace: wh, broadcaster: b}
 }

@@ -176,6 +176,10 @@ func TestResolveAgentURLForRestartSignal_CacheMiss(t *testing.T) {
 // TestGracefulPreRestart_Success verifies that when the workspace returns 200,
 // the signal is logged as acknowledged without error.
 func TestGracefulPreRestart_Success(t *testing.T) {
+	hWrapper := &resolveURLTestWrapper{
+		WorkspaceHandler: newHandlerWithTestDeps(t),
+		testURL:          "http://fake-agent.example/agent",
+	}
 	_ = setupTestDB(t)

 	// httptest server simulating the workspace container's /signals/restart_pending
@@ -205,18 +209,15 @@ func TestGracefulPreRestart_Success(t *testing.T) {
 		})
 	}))
 	defer srv.Close()
+	hWrapper.testURL = srv.URL + "/agent"

 	// Pre-populate Redis cache with the test server URL
 	_ = setupTestRedisWithURL(t, srv.URL)

-	// Use a wrapper so gracefulPreRestart runs through the embedded handler.
-	hWrapper := &resolveURLTestWrapper{
-		WorkspaceHandler: newHandlerWithTestDeps(t),
-		testURL:          srv.URL + "/agent",
-	}
+	// gracefulPreRestart runs in a goroutine; wait for it before db.DB is restored.
+	// Must be registered AFTER setupTestDB (LIFO: async wait → db.DB restore).
+	waitForHandlerAsyncBeforeDBCleanup(t, hWrapper.WorkspaceHandler)

-	// gracefulPreRestart runs in a goroutine with its own timeout.
-	// We give it time to complete before the test ends.
 	hWrapper.gracefulPreRestart(context.Background(), "ws-ack-789")
 	time.Sleep(200 * time.Millisecond)
 }
@@ -224,19 +225,22 @@ func TestGracefulPreRestart_Success(t *testing.T) {
 // TestGracefulPreRestart_NotImplemented verifies that when the workspace returns
 // 404 (old SDK version), the platform proceeds gracefully (log + no error).
 func TestGracefulPreRestart_NotImplemented(t *testing.T) {
+	hWrapper := &resolveURLTestWrapper{
+		WorkspaceHandler: newHandlerWithTestDeps(t),
+		testURL:          "http://fake-agent.example/agent",
+	}
 	_ = setupTestDB(t)

 	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		w.WriteHeader(http.StatusNotFound)
 	}))
 	defer srv.Close()
+	hWrapper.testURL = srv.URL + "/agent"

 	_ = setupTestRedisWithURL(t, srv.URL)

-	hWrapper := &resolveURLTestWrapper{
-		WorkspaceHandler: newHandlerWithTestDeps(t),
-		testURL:          srv.URL + "/agent",
-	}
+	// Must be registered AFTER setupTestDB so LIFO order is: async wait → db.DB restore.
+	waitForHandlerAsyncBeforeDBCleanup(t, hWrapper.WorkspaceHandler)

 	hWrapper.gracefulPreRestart(context.Background(), "ws-noimpl-999")
 	time.Sleep(200 * time.Millisecond)
@@ -246,15 +250,18 @@ func TestGracefulPreRestart_NotImplemented(t *testing.T) {
 // TestGracefulPreRestart_ConnectionRefused verifies that when the workspace
 // is unreachable, the platform proceeds gracefully without error.
 func TestGracefulPreRestart_ConnectionRefused(t *testing.T) {
-	_ = setupTestDB(t)
-
-	mr := setupTestRedisWithURL(t, "http://localhost:19999/agent") // nothing listening on 19999
-	_ = mr
-
 	hWrapper := &resolveURLTestWrapper{
 		WorkspaceHandler: newHandlerWithTestDeps(t),
 		testURL:          "http://localhost:19999/agent",
 	}
+	_ = setupTestDB(t)
+
+	// Nothing listening on 19999 — deliberate connection failure.
+	mr := setupTestRedisWithURL(t, "http://localhost:19999/agent")
+	_ = mr
+
+	// Must be registered AFTER setupTestDB so LIFO order is: async wait → db.DB restore.
+	waitForHandlerAsyncBeforeDBCleanup(t, hWrapper.WorkspaceHandler)

 	hWrapper.gracefulPreRestart(context.Background(), "ws-unreachable-000")
 	time.Sleep(200 * time.Millisecond)
@@ -264,13 +271,17 @@ func TestGracefulPreRestart_ConnectionRefused(t *testing.T) {
 // TestGracefulPreRestart_URLResolutionError verifies that when URL resolution
 // fails, the platform proceeds gracefully without blocking the restart.
 func TestGracefulPreRestart_URLResolutionError(t *testing.T) {
-	_ = setupTestDB(t)
-	_ = setupTestRedis(t) // empty → URL resolution will fail in resolveAgentURLForRestartSignal
-
 	hWrapper := &resolveURLTestWrapper{
 		WorkspaceHandler: newHandlerWithTestDeps(t),
 		errToReturn:      context.DeadlineExceeded,
 	}
+	_ = setupTestDB(t)
+	_ = setupTestRedis(t) // empty → URL resolution will fail in resolveAgentURLForRestartSignal
+
+	// Must be registered AFTER setupTestDB so LIFO order is: async wait → db.DB restore.
+	// This ensures goroutines (which access both DB and Redis) are drained before
+	// any cleanup fires. setupTestRedis comes after newHandlerWithTestDeps
+	// so the handler holds the correct Redis client reference.
 	waitForHandlerAsyncBeforeDBCleanup(t, hWrapper.WorkspaceHandler)

 	hWrapper.gracefulPreRestart(context.Background(), "ws-url-err-111")
@@ -56,7 +56,20 @@ func PatchAbilities(c *gin.Context) {
 		return
 	}

-	if body.BroadcastEnabled != nil {
+	// Atomic update: when both fields are supplied, apply them in one SQL
+	// statement so the request is all-or-nothing (#2131). A partial mutation
+	// (e.g. broadcast_enabled updated but talk_to_user_enabled failing) would
+	// leave the workspace in an ambiguous capability state.
+	if body.BroadcastEnabled != nil && body.TalkToUserEnabled != nil {
+		if _, err := db.DB.ExecContext(ctx,
+			`UPDATE workspaces SET broadcast_enabled = $2, talk_to_user_enabled = $3, updated_at = now() WHERE id = $1`,
+			id, *body.BroadcastEnabled, *body.TalkToUserEnabled,
+		); err != nil {
+			log.Printf("PatchAbilities both-fields for %s: %v", id, err)
+			c.JSON(http.StatusInternalServerError, gin.H{"error": "update failed"})
+			return
+		}
+	} else if body.BroadcastEnabled != nil {
 		if _, err := db.DB.ExecContext(ctx,
 			`UPDATE workspaces SET broadcast_enabled = $2, updated_at = now() WHERE id = $1`,
 			id, *body.BroadcastEnabled,
@@ -65,9 +78,7 @@ func PatchAbilities(c *gin.Context) {
 			c.JSON(http.StatusInternalServerError, gin.H{"error": "update failed"})
 			return
 		}
-	}
-
-	if body.TalkToUserEnabled != nil {
+	} else if body.TalkToUserEnabled != nil {
 		if _, err := db.DB.ExecContext(ctx,
 			`UPDATE workspaces SET talk_to_user_enabled = $2, updated_at = now() WHERE id = $1`,
 			id, *body.TalkToUserEnabled,
@@ -130,11 +130,8 @@ func TestPatchAbilities_BothFields(t *testing.T) {
 	mock.ExpectQuery(`SELECT EXISTS\(SELECT 1 FROM workspaces WHERE id = \$1 AND status != 'removed'\)`).
 		WithArgs(wsUUID1).
 		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
-	mock.ExpectExec(`UPDATE workspaces SET broadcast_enabled = \$2, updated_at = now\(\) WHERE id = \$1`).
-		WithArgs(wsUUID1, true).
-		WillReturnResult(sqlmock.NewResult(0, 1))
-	mock.ExpectExec(`UPDATE workspaces SET talk_to_user_enabled = \$2, updated_at = now\(\) WHERE id = \$1`).
-		WithArgs(wsUUID1, true).
+	mock.ExpectExec(`UPDATE workspaces SET broadcast_enabled = \$2, talk_to_user_enabled = \$3, updated_at = now\(\) WHERE id = \$1`).
+		WithArgs(wsUUID1, true, true).
 		WillReturnResult(sqlmock.NewResult(0, 1))

 	w := patchAbilitiesReq(t, wsUUID1, `{"broadcast_enabled":true,"talk_to_user_enabled":true}`)
@@ -182,19 +179,25 @@ func TestPatchAbilities_TalkToUserUpdateError(t *testing.T) {
 	}
 }

-func TestPatchAbilities_BothFields_BroadcastFails(t *testing.T) {
+// TestPatchAbilities_BothFields_UpdateError — regression for #2131. When
+// both fields are supplied the handler uses a single combined UPDATE. A
+// failure of that UPDATE must leave the workspace unchanged (atomic).
+func TestPatchAbilities_BothFields_UpdateError(t *testing.T) {
 	mock, cleanup := withMockDB(t)
 	defer cleanup()

 	mock.ExpectQuery(`SELECT EXISTS\(SELECT 1 FROM workspaces WHERE id = \$1 AND status != 'removed'\)`).
 		WithArgs(wsUUID1).
 		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
-	mock.ExpectExec(`UPDATE workspaces SET broadcast_enabled = \$2, updated_at = now\(\) WHERE id = \$1`).
-		WithArgs(wsUUID1, true).
+	mock.ExpectExec(`UPDATE workspaces SET broadcast_enabled = \$2, talk_to_user_enabled = \$3, updated_at = now\(\) WHERE id = \$1`).
+		WithArgs(wsUUID1, true, true).
 		WillReturnError(errors.New("disk full"))

 	w := patchAbilitiesReq(t, wsUUID1, `{"broadcast_enabled":true,"talk_to_user_enabled":true}`)
 	if w.Code != http.StatusInternalServerError {
 		t.Fatalf("expected 500, got %d: %s", w.Code, w.Body.String())
 	}
+	// Because only one UPDATE is issued, there is no partial-mutation
+	// path to assert against; sqlmock implicitly verifies no second
+	// exec occurred.
 }
@@ -95,6 +95,14 @@ func TestIntegration_BroadcastOrgRoot_NonRootSenderResolvesToRoot(t *testing.T)
 		}
 	})

+	// Pre-test hygiene: if a prior run crashed or was killed, its rows may
+	// still be in the shared integration DB. Remove them before inserting so
+	// the unique index workspaces_parent_name_uniq does not conflict.
+	if _, err := conn.ExecContext(ctx,
+		`DELETE FROM workspaces WHERE name LIKE $1`, prefix+"%"); err != nil {
+		t.Logf("pre-test cleanup (non-fatal): %v", err)
+	}
+
 	rootID := uuid.New().String()
 	midID := uuid.New().String()
 	leafID := uuid.New().String()
@@ -876,8 +876,9 @@ func (h *WorkspaceHandler) runRestartCycle(workspaceID string) {
 	h.provisionWorkspaceAutoSync(workspaceID, "", nil, payload)
 	// sendRestartContext is a one-way notification to the new container; safe
 	// to fire async — the next restart cycle won't depend on it completing.
-	// Tracked via goAsync so the test harness can drain it before the
-	// global db.DB swap (sendRestartContext reads db.DB).
+	// Tracked via h.goAsync so tests can wait for it via h.asyncWG before
+	// closing the sqlmock. Without this, untracked goroutines hit the restored
+	// mock and cause "was not expected" errors in parallel CI execution (mc#1264).
 	h.goAsync(func() { h.sendRestartContext(workspaceID, restartData) })
 }