diff --git a/workspace-server/internal/handlers/a2a_proxy_helpers.go b/workspace-server/internal/handlers/a2a_proxy_helpers.go index d9d9c8ec..afe8a69c 100644 --- a/workspace-server/internal/handlers/a2a_proxy_helpers.go +++ b/workspace-server/internal/handlers/a2a_proxy_helpers.go @@ -437,8 +437,26 @@ func (h *WorkspaceHandler) logA2ASuccess(ctx context.Context, workspaceID, calle } } +// nilIfEmpty returns nil for empty OR system-caller strings. The second +// branch matters: a system-caller prefix like "system:restart-context" +// is a non-UUID string (it's a routing marker, not a real workspace id). +// Persisting it to activity_logs.source_id would poison the column — +// downstream joins (e.g. activity.go:443 LEFT JOIN workspaces w ON w.id = +// activity_logs.source_id) and UUID-cast lookups would return NULL or +// error. Normalizing system callers to NULL source_id here keeps the +// "who-did-what" association intact (the callerID is still in +// activity_logs.caller_id as a free-form field if needed) without +// poisoning the source_id FK contract. +// +// This is the production-code root cause of #2680 (post-restart wedge): +// restart_context.go:296 calls ProxyA2ARequest with callerID = +// "system:restart-context" → nilIfEmpty("system:restart-context") +// returned a non-nil pointer to the system prefix → LogActivity +// persisted it to activity_logs.source_id as the literal string → +// downstream lookups failed → wedge-detector side-effects → workspace +// stayed degraded instead of online. See #2693. func nilIfEmpty(s string) *string { - if s == "" { + if s == "" || isSystemCaller(s) { return nil } return &s diff --git a/workspace-server/internal/handlers/a2a_proxy_helpers_test.go b/workspace-server/internal/handlers/a2a_proxy_helpers_test.go index b3677cc1..cc6085e5 100644 --- a/workspace-server/internal/handlers/a2a_proxy_helpers_test.go +++ b/workspace-server/internal/handlers/a2a_proxy_helpers_test.go @@ -27,6 +27,54 @@ func TestNilIfEmpty_NonEmptyString(t *testing.T) { } } +// System-caller prefixes (webhook:, system:, test:, channel:) are +// non-UUID routing markers, not real workspace ids. Persisting them +// to activity_logs.source_id would poison the column — downstream +// joins (e.g. activity.go:443 LEFT JOIN workspaces w ON w.id = +// activity_logs.source_id) and UUID-cast lookups would return NULL +// or error. nilIfEmpty must normalize all system-caller prefixes to +// nil. This is the production-code root cause of #2680 (post-restart +// wedge): restart_context.go:296 calls ProxyA2ARequest with callerID +// = "system:restart-context" → nilIfEmpty("system:restart-context") +// returned a non-nil pointer to the system prefix → LogActivity +// persisted it to activity_logs.source_id as the literal string → +// downstream lookups failed → wedge-detector side-effects → workspace +// stayed degraded instead of online. See #2693. + +func TestNilIfEmpty_SystemCallerPrefixes(t *testing.T) { + // The four systemCallerPrefixes from a2a_proxy.go:82-84. All must + // return nil from nilIfEmpty (the post-#2680 contract). isSystemCaller + // uses strings.HasPrefix, so the prefix must be at the start. + systemPrefixes := []string{ + "webhook:github", + "system:restart-context", // the actual offender + "system:other-svc", + "test:integration-1", + "channel:discord", + } + for _, p := range systemPrefixes { + got := nilIfEmpty(p) + if got != nil { + t.Errorf("system-caller %q: got non-nil pointer (would poison activity_logs.source_id), want nil", p) + } + } +} + +func TestNilIfEmpty_RealWorkspaceUUIDStillPreserved(t *testing.T) { + // The fix must NOT regress the canonical case: a real workspace + // UUID (no system prefix) must STILL be passed through to + // activity_logs.source_id as a non-nil pointer. Otherwise we'd + // hide real-workspace attribution. + realUUID := "9a40df22-ba4b-3fc0-75c1-66dd6869ff25" // a real UUID-shaped string + got := nilIfEmpty(realUUID) + if got == nil { + t.Fatal("real workspace UUID: got nil, want non-nil pointer") + } + if *got != realUUID { + t.Errorf("real workspace UUID: got %q, want %q", *got, realUUID) + } +} + // ───────────────────────────────────────────────────────────────────────────── // extractToolTrace tests // ─────────────────────────────────────────────────────────────────────────────