From e0f9434eafca4bba8cc520223ea13da06da6904d Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Mon, 4 May 2026 19:58:49 -0700 Subject: [PATCH 01/43] fix(files-eic): silence ssh known-hosts warning that 500'd Hermes config load MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GET /workspaces/:id/files/config.yaml on hongming.moleculesai.app's Hermes workspace returned 500 with body: ssh cat: exit status 1 (Warning: Permanently added '[127.0.0.1]:37951' (ED25519) to the list of known hosts.) Root cause: ssh emits the "Permanently added" notice on every fresh tunnel connection, even with UserKnownHostsFile=/dev/null (that prevents persistence, not the warning). It lands on stderr, fooling readFileViaEIC's classifier: if len(out) == 0 && stderr.Len() == 0 { return nil, os.ErrNotExist } return nil, fmt.Errorf("ssh cat: %w (%s)", runErr, ...) stderr was non-empty (the warning), so we returned the wrapped error → 500 from the HTTP layer instead of 404. Fix: add `-o LogLevel=ERROR` to BOTH writeFileViaEIC and readFileViaEIC ssh invocations. Silences info+warning while keeping real auth/tunnel errors visible (those emit at ERROR level). Test: TestSSHArgs_LogLevelErrorBothSites pins the flag in both blocks. Mutation-tested: stripping the flag from one site fails the gate. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../internal/handlers/template_files_eic.go | 20 ++++++++++++ .../handlers/template_files_eic_test.go | 32 +++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/workspace-server/internal/handlers/template_files_eic.go b/workspace-server/internal/handlers/template_files_eic.go index da8a7d14..edce34fc 100644 --- a/workspace-server/internal/handlers/template_files_eic.go +++ b/workspace-server/internal/handlers/template_files_eic.go @@ -178,6 +178,16 @@ func writeFileViaEIC(ctx context.Context, instanceID, runtime, relPath string, c "-i", keyPath, "-o", "StrictHostKeyChecking=no", "-o", "UserKnownHostsFile=/dev/null", + // LogLevel=ERROR silences the benign "Warning: Permanently + // added '[127.0.0.1]:NNNNN' to known hosts" notice that ssh + // emits on every fresh tunnel connection. Without this, the + // notice lands on stderr and fools readFileViaEIC's "empty + // stdout + empty stderr → file not found" classifier into + // thinking the warning is a real ssh-layer error → 500 + // instead of 404 (Hermes config.yaml load, hongming tenant, + // 2026-05-05 02:38). Real auth/tunnel errors stay visible + // because they're emitted at ERROR level. + "-o", "LogLevel=ERROR", "-o", "ServerAliveInterval=15", "-p", fmt.Sprintf("%d", localPort), fmt.Sprintf("%s@127.0.0.1", osUser), @@ -292,6 +302,16 @@ func readFileViaEIC(ctx context.Context, instanceID, runtime, relPath string) ([ "-i", keyPath, "-o", "StrictHostKeyChecking=no", "-o", "UserKnownHostsFile=/dev/null", + // LogLevel=ERROR silences the benign "Warning: Permanently + // added '[127.0.0.1]:NNNNN' to known hosts" notice that ssh + // emits on every fresh tunnel connection. Without this, the + // notice lands on stderr and fools readFileViaEIC's "empty + // stdout + empty stderr → file not found" classifier into + // thinking the warning is a real ssh-layer error → 500 + // instead of 404 (Hermes config.yaml load, hongming tenant, + // 2026-05-05 02:38). Real auth/tunnel errors stay visible + // because they're emitted at ERROR level. + "-o", "LogLevel=ERROR", "-o", "ServerAliveInterval=15", "-p", fmt.Sprintf("%d", localPort), fmt.Sprintf("%s@127.0.0.1", osUser), diff --git a/workspace-server/internal/handlers/template_files_eic_test.go b/workspace-server/internal/handlers/template_files_eic_test.go index 30bd9988..e5bc8a48 100644 --- a/workspace-server/internal/handlers/template_files_eic_test.go +++ b/workspace-server/internal/handlers/template_files_eic_test.go @@ -1,6 +1,8 @@ package handlers import ( + "os" + "regexp" "strings" "testing" ) @@ -66,6 +68,36 @@ func TestResolveWorkspaceFilePath_RejectsTraversal(t *testing.T) { } } +// TestSSHArgs_LogLevelErrorBothSites pins that BOTH ssh invocations +// (writeFileViaEIC + readFileViaEIC) include `-o LogLevel=ERROR`. +// +// Without that flag, ssh emits a "Warning: Permanently added +// '[127.0.0.1]:NNNNN' (ED25519) to the list of known hosts." line on +// every fresh tunnel connection (even with UserKnownHostsFile=/dev/null +// — that prevents persistence, not the warning). The warning lands on +// stderr, which fools readFileViaEIC's "empty stdout + empty stderr → +// file not found" classifier into thinking the warning is a real +// ssh-layer error and returning 500 instead of 404. +// +// Caught 2026-05-05 02:38 on hongming.moleculesai.app: opening Hermes +// workspace's Config tab returned 500 with body +// `ssh cat: exit status 1 (Warning: Permanently added '[127.0.0.1]:37951'…)`. +// +// LogLevel=ERROR silences info+warning while keeping real auth/tunnel +// errors visible. This test reads the source and asserts the flag +// appears at least twice (one per ssh block) — fires if a future edit +// removes it from either site. +func TestSSHArgs_LogLevelErrorBothSites(t *testing.T) { + src, err := os.ReadFile("template_files_eic.go") + if err != nil { + t.Fatalf("read source: %v", err) + } + matches := regexp.MustCompile(`"-o", "LogLevel=ERROR"`).FindAllIndex(src, -1) + if len(matches) < 2 { + t.Errorf("expected LogLevel=ERROR in BOTH ssh blocks (write + read); found %d occurrences", len(matches)) + } +} + // TestShellQuote — the sole piece of variable data in the remote ssh // command is the absolute path. It's already built from a map + Clean() // so traversal is impossible, but we still single-quote as defence-in- From c0bfd19b9e3eebd36ec021548c9cc093e1b13fe9 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Mon, 4 May 2026 19:58:56 -0700 Subject: [PATCH 02/43] feat(external-templates): codex tab uses plain pip install for bridge daemon `codex-channel-molecule` 0.1.0 is now on PyPI, so operators no longer need the `git+https://...` URL workaround. Verified: `pip install codex-channel-molecule` from a clean venv installs the wheel and the `codex-channel-molecule --help` console script runs. PyPI: https://pypi.org/project/codex-channel-molecule/0.1.0/ Co-Authored-By: Claude Opus 4.7 (1M context) --- workspace-server/internal/handlers/external_connection.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workspace-server/internal/handlers/external_connection.go b/workspace-server/internal/handlers/external_connection.go index 249e74a7..a1b94d4c 100644 --- a/workspace-server/internal/handlers/external_connection.go +++ b/workspace-server/internal/handlers/external_connection.go @@ -314,7 +314,7 @@ const externalCodexTemplate = `# Codex external setup — outbound tools (MCP) + # 1. Install codex CLI, the workspace runtime, and the bridge daemon: npm install -g @openai/codex@^0.57 pip install molecule-ai-workspace-runtime -pip install 'git+https://github.com/Molecule-AI/codex-channel-molecule.git' +pip install codex-channel-molecule # 2. Wire the molecule MCP server into codex's config.toml — this is # the OUTBOUND path (codex calls list_peers / delegate_task / From 11c9ed2a46d26f2310260859ba68a64816a2acdc Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Mon, 4 May 2026 20:00:23 -0700 Subject: [PATCH 03/43] =?UTF-8?q?fix(provision):=20StopWorkspaceAuto=20mir?= =?UTF-8?q?ror=20=E2=80=94=20close=20SaaS=20EC2-leak=20class?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #2813 (team-collapse) and #2814 (workspace delete). Two leaks, one class. Both call sites had the same shape pre-fix: if h.provisioner != nil { h.provisioner.Stop(ctx, wsID) } On SaaS where h.provisioner (Docker) is nil and h.cpProv is set, that gate evaluates false and the EC2 keeps running. Workspace gets marked removed in DB; EC2 lives on until the orphan sweeper catches it. Same drift class as PR #2811's org-import provision bug — a Docker- only check on what should be a both-backend operation. Confirmed in production: PR #2811's verification step deleted a test workspace and the EC2 stayed running until I terminated it manually. Fix: WorkspaceHandler.StopWorkspaceAuto(ctx, wsID) — symmetric mirror of provisionWorkspaceAuto. CP first, Docker second, no-op when neither is wired (a workspace nobody is running can't be stopped — that's a no-op, not a failure, distinct from provision's mark-failed contract). Three call-site changes: - team.go:208 (Collapse) → h.wh.StopWorkspaceAuto(ctx, childID) - workspace_crud.go:432 (stopAndRemove) → h.StopWorkspaceAuto(...); RemoveVolume stays Docker-only behind an explicit gate since CP-managed workspaces have no host-bind volumes - TeamHandler.provisioner field + NewTeamHandler's *Provisioner param removed as dead code (Stop was the only call site) Volume cleanup separation is intentional: the abstraction is "stop the running workload," not "tear down all state." Callers that need volume cleanup keep their `if h.provisioner != nil { RemoveVolume }` gate AFTER the Stop call. Tests: - TestStopWorkspaceAuto_RoutesToCPWhenSet — SaaS path - TestStopWorkspaceAuto_RoutesToDockerWhenOnlyDocker — self-hosted - TestStopWorkspaceAuto_NoBackendIsNoOp — pins the contract distinction from provisionWorkspaceAuto's mark-failed - TestNoCallSiteCallsBareStop — source-level pin against `.provisioner.Stop(` / `.cpProv.Stop(` outside the dispatcher, per-backend bodies, restart helper, and the Docker-daemon-direct short-lived-container path. Strips Go comments before substring match so archaeology in code comments doesn't trip the gate. - Verified: pin FAILS against the buggy shape (workspace_crud.go reversion); team.go reversion compile-fails because the field is gone — even stronger than the test. Out of scope (tracked under #2799): - workspace_restart.go's manual if-cpProv-else dispatch with retry semantics tuned for the restart hot path. Functionally equivalent + wraps cpStopWithRetry, so it's not the bug class this PR closes. Co-Authored-By: Claude Opus 4.7 (1M context) --- workspace-server/internal/handlers/team.go | 29 ++- .../internal/handlers/team_test.go | 12 +- .../internal/handlers/workspace.go | 37 ++++ .../internal/handlers/workspace_crud.go | 37 ++-- .../handlers/workspace_provision_auto_test.go | 208 +++++++++++++++++- workspace-server/internal/router/router.go | 2 +- 6 files changed, 288 insertions(+), 37 deletions(-) diff --git a/workspace-server/internal/handlers/team.go b/workspace-server/internal/handlers/team.go index c4a481f9..0ac5aac4 100644 --- a/workspace-server/internal/handlers/team.go +++ b/workspace-server/internal/handlers/team.go @@ -11,7 +11,6 @@ import ( "github.com/Molecule-AI/molecule-monorepo/platform/internal/db" "github.com/Molecule-AI/molecule-monorepo/platform/internal/events" "github.com/Molecule-AI/molecule-monorepo/platform/internal/models" - "github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner" "github.com/gin-gonic/gin" "github.com/google/uuid" "gopkg.in/yaml.v3" @@ -25,28 +24,21 @@ import ( // NULL auth_token — same drift class as the SaaS bug fixed in #2366. type TeamHandler struct { broadcaster *events.Broadcaster - // provisioner is interface-typed (#2369) for the same reason as - // WorkspaceHandler.provisioner — Stop is the only call site here - // and it's on the LocalProvisionerAPI surface, so widening is free - // and symmetric with WorkspaceHandler. - provisioner provisioner.LocalProvisionerAPI wh *WorkspaceHandler platformURL string configsDir string } -func NewTeamHandler(b *events.Broadcaster, p *provisioner.Provisioner, wh *WorkspaceHandler, platformURL, configsDir string) *TeamHandler { - h := &TeamHandler{ +// NewTeamHandler constructs a TeamHandler. Backend selection (Docker vs +// CP) goes through h.wh.StopWorkspaceAuto + h.wh.provisionWorkspaceAuto; +// no per-handler provisioner field is needed here. +func NewTeamHandler(b *events.Broadcaster, wh *WorkspaceHandler, platformURL, configsDir string) *TeamHandler { + return &TeamHandler{ broadcaster: b, wh: wh, platformURL: platformURL, configsDir: configsDir, } - // Avoid the typed-nil interface trap (see NewWorkspaceHandler note). - if p != nil { - h.provisioner = p - } - return h } // Expand handles POST /workspaces/:id/expand @@ -203,9 +195,14 @@ func (h *TeamHandler) Collapse(c *gin.Context) { continue } - // Stop container if provisioner available - if h.provisioner != nil { - h.provisioner.Stop(ctx, childID) + // Stop the workload via the backend dispatcher (CP for SaaS, + // Docker for self-hosted). Pre-2026-05-05 this was + // `if h.provisioner != nil { h.provisioner.Stop(...) }`, which + // silently skipped on every SaaS tenant — child EC2s kept running + // after team-collapse until the orphan sweeper caught them + // (issue #2813). + if err := h.wh.StopWorkspaceAuto(ctx, childID); err != nil { + log.Printf("Team collapse: stop %s failed: %v — orphan sweeper will reconcile", childID, err) } // Mark as removed diff --git a/workspace-server/internal/handlers/team_test.go b/workspace-server/internal/handlers/team_test.go index e909308d..1967ee1f 100644 --- a/workspace-server/internal/handlers/team_test.go +++ b/workspace-server/internal/handlers/team_test.go @@ -34,7 +34,7 @@ func TestTeamCollapse_NoChildren(t *testing.T) { mock := setupTestDB(t) setupTestRedis(t) broadcaster := newTestBroadcaster() - handler := NewTeamHandler(broadcaster, nil, nil, "http://localhost:8080", "/tmp/configs") + handler := NewTeamHandler(broadcaster, NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()), "http://localhost:8080", "/tmp/configs") // No children mock.ExpectQuery("SELECT id, name FROM workspaces WHERE parent_id"). @@ -66,7 +66,7 @@ func TestTeamCollapse_WithChildren(t *testing.T) { mock := setupTestDB(t) setupTestRedis(t) broadcaster := newTestBroadcaster() - handler := NewTeamHandler(broadcaster, nil, nil, "http://localhost:8080", "/tmp/configs") + handler := NewTeamHandler(broadcaster, NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()), "http://localhost:8080", "/tmp/configs") // Two children mock.ExpectQuery("SELECT id, name FROM workspaces WHERE parent_id"). @@ -122,7 +122,7 @@ func TestTeamCollapse_WithChildren(t *testing.T) { func TestTeamExpand_WorkspaceNotFound(t *testing.T) { mock := setupTestDB(t) setupTestRedis(t) - handler := NewTeamHandler(newTestBroadcaster(), nil, nil, "http://localhost:8080", "/tmp/configs") + handler := NewTeamHandler(newTestBroadcaster(), NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir()), "http://localhost:8080", "/tmp/configs") mock.ExpectQuery("SELECT name, tier, status FROM workspaces WHERE id"). WithArgs("ws-missing"). @@ -143,7 +143,7 @@ func TestTeamExpand_WorkspaceNotFound(t *testing.T) { func TestTeamExpand_NoConfigFound(t *testing.T) { mock := setupTestDB(t) setupTestRedis(t) - handler := NewTeamHandler(newTestBroadcaster(), nil, nil, "http://localhost:8080", t.TempDir()) + handler := NewTeamHandler(newTestBroadcaster(), NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir()), "http://localhost:8080", t.TempDir()) mock.ExpectQuery("SELECT name, tier, status FROM workspaces WHERE id"). WithArgs("ws-1"). @@ -167,7 +167,7 @@ func TestTeamExpand_EmptySubWorkspaces(t *testing.T) { setupTestRedis(t) configDir := makeTeamConfigDir(t, "myagent", "name: MyAgent\nsub_workspaces: []\n") - handler := NewTeamHandler(newTestBroadcaster(), nil, nil, "http://localhost:8080", configDir) + handler := NewTeamHandler(newTestBroadcaster(), NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir()), "http://localhost:8080", configDir) mock.ExpectQuery("SELECT name, tier, status FROM workspaces WHERE id"). WithArgs("ws-1"). @@ -199,7 +199,7 @@ sub_workspaces: role: code-reviewer ` configDir := makeTeamConfigDir(t, "teamlead", yaml) - handler := NewTeamHandler(broadcaster, nil, nil, "http://localhost:8080", configDir) + handler := NewTeamHandler(broadcaster, NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()), "http://localhost:8080", configDir) mock.ExpectQuery("SELECT name, tier, status FROM workspaces WHERE id"). WithArgs("ws-lead"). diff --git a/workspace-server/internal/handlers/workspace.go b/workspace-server/internal/handlers/workspace.go index 27051e23..8d25ed6e 100644 --- a/workspace-server/internal/handlers/workspace.go +++ b/workspace-server/internal/handlers/workspace.go @@ -170,6 +170,43 @@ func (h *WorkspaceHandler) provisionWorkspaceAuto(workspaceID, templatePath stri return false } +// StopWorkspaceAuto picks the backend (CP for SaaS, local Docker for +// self-hosted) and stops the workspace synchronously. Returns nil when +// neither backend is wired (a workspace nobody is running can't be +// stopped — that's a no-op, not an error). +// +// Single source of truth for "stop a workspace" — symmetric with +// provisionWorkspaceAuto. Pre-2026-05-05 the stop side had no Auto +// dispatcher and every caller wrote `if h.provisioner != nil { Stop }`, +// which silently leaked EC2s on SaaS: +// - team.go:208 (Collapse) — issue #2813 +// - workspace_crud.go:432 (stopAndRemove during Delete) — issue #2814 +// +// Both bugs reproduced for ~6 months. The pattern is the same drift +// class as the org-import provision bug closed by PR #2811. +// +// Why CP wins when both are wired (matching provisionWorkspaceAuto): +// production runs exactly one backend at a time — a SaaS tenant has +// cpProv set + provisioner nil; a self-hosted operator has provisioner +// set + cpProv nil. The "both set" case only arises in test fixtures, +// and the CP-wins ordering matches how Auto picks for provisioning so +// the test stubs stay on a single side. +// +// Volume cleanup (workspace_crud.go) stays Docker-only — CP-managed +// workspaces have no volumes to clean. Callers that need that extra +// step keep their `if h.provisioner != nil { RemoveVolume(...) }` +// gate AFTER calling StopWorkspaceAuto. The abstraction here is "stop +// the running workload," not "tear down all state." +func (h *WorkspaceHandler) StopWorkspaceAuto(ctx context.Context, workspaceID string) error { + if h.cpProv != nil { + return h.cpProv.Stop(ctx, workspaceID) + } + if h.provisioner != nil { + return h.provisioner.Stop(ctx, workspaceID) + } + return nil +} + // SetEnvMutators wires a provisionhook.Registry into the handler. Plugins // living in separate repos register on the same Registry instance during // boot (see cmd/server/main.go) and main.go calls this setter once before diff --git a/workspace-server/internal/handlers/workspace_crud.go b/workspace-server/internal/handlers/workspace_crud.go index f254ea86..4e58804f 100644 --- a/workspace-server/internal/handlers/workspace_crud.go +++ b/workspace-server/internal/handlers/workspace_crud.go @@ -420,22 +420,33 @@ func (h *WorkspaceHandler) Delete(c *gin.Context) { var stopErrs []error stopAndRemove := func(wsID string) { - if h.provisioner == nil { - return - } - // Check Stop's error before attempting RemoveVolume — the - // previous code discarded it and immediately tried the - // volume remove, which always fails with "volume in use" - // when Stop didn't actually kill the container. The orphan - // sweeper (registry/orphan_sweeper.go) catches what we - // skip here on the next reconcile pass. - if err := h.provisioner.Stop(cleanupCtx, wsID); err != nil { - log.Printf("Delete %s container stop failed: %v — leaving volume for orphan sweeper", wsID, err) + // Stop the workload first via the backend dispatcher (CP for + // SaaS, Docker for self-hosted). Pre-2026-05-05 this gate was + // `if h.provisioner == nil { return }` — early-returning on + // every SaaS tenant left the EC2 running with no DB row to + // track it (issue #2814; the comment below claimed "loud-fail + // instead of silent-leak" but the early-return made it the + // silent path on SaaS). + // + // Check Stop's error before any volume cleanup — the previous + // code discarded it and immediately tried RemoveVolume, which + // always fails with "volume in use" when Stop didn't actually + // kill the container. The orphan sweeper + // (registry/orphan_sweeper.go) catches what we skip here on + // the next reconcile pass. + if err := h.StopWorkspaceAuto(cleanupCtx, wsID); err != nil { + log.Printf("Delete %s stop failed: %v — leaving cleanup for orphan sweeper", wsID, err) stopErrs = append(stopErrs, fmt.Errorf("stop %s: %w", wsID, err)) return } - if err := h.provisioner.RemoveVolume(cleanupCtx, wsID); err != nil { - log.Printf("Delete %s volume removal warning: %v", wsID, err) + // Volume cleanup is Docker-only — CP-managed workspaces have + // no host-bind volumes to remove. Skip silently when no Docker + // provisioner is wired (the SaaS path already terminated the + // EC2 above; nothing left to do). + if h.provisioner != nil { + if err := h.provisioner.RemoveVolume(cleanupCtx, wsID); err != nil { + log.Printf("Delete %s volume removal warning: %v", wsID, err) + } } } diff --git a/workspace-server/internal/handlers/workspace_provision_auto_test.go b/workspace-server/internal/handlers/workspace_provision_auto_test.go index 9198e8f6..ff9bdede 100644 --- a/workspace-server/internal/handlers/workspace_provision_auto_test.go +++ b/workspace-server/internal/handlers/workspace_provision_auto_test.go @@ -41,7 +41,9 @@ import ( type trackingCPProv struct { mu sync.Mutex started []string + stopped []string startErr error + stopErr error } func (r *trackingCPProv) Start(_ context.Context, cfg provisioner.WorkspaceConfig) (string, error) { @@ -53,12 +55,25 @@ func (r *trackingCPProv) Start(_ context.Context, cfg provisioner.WorkspaceConfi } return "i-stub-" + cfg.WorkspaceID, nil } -func (r *trackingCPProv) Stop(_ context.Context, _ string) error { return nil } +func (r *trackingCPProv) Stop(_ context.Context, workspaceID string) error { + r.mu.Lock() + r.stopped = append(r.stopped, workspaceID) + r.mu.Unlock() + return r.stopErr +} func (r *trackingCPProv) GetConsoleOutput(_ context.Context, _ string) (string, error) { return "", nil } func (r *trackingCPProv) IsRunning(_ context.Context, _ string) (bool, error) { return true, nil } +func (r *trackingCPProv) stoppedSnapshot() []string { + r.mu.Lock() + defer r.mu.Unlock() + out := make([]string, len(r.stopped)) + copy(out, r.stopped) + return out +} + func (r *trackingCPProv) startedSnapshot() []string { r.mu.Lock() defer r.mu.Unlock() @@ -432,3 +447,194 @@ func TestOrgImportGate_UsesHasProvisionerNotBareField(t *testing.T) { t.Errorf("org_import.go must call h.workspace.HasProvisioner() in the provisioning gate — current code does not") } } + +// TestStopWorkspaceAuto_RoutesToCPWhenSet — symmetric with the +// provision dispatcher test above. SaaS tenants run with cpProv set +// and the local Docker provisioner nil; Auto must route Stop to CP +// (= terminate the EC2). Pre-2026-05-05 the absence of this dispatcher +// meant team-collapse + workspace-delete called h.provisioner.Stop +// directly, no-oping on every SaaS tenant — issue #2813 (collapse) and +// #2814 (delete) both leak EC2s for ~6 months. +func TestStopWorkspaceAuto_RoutesToCPWhenSet(t *testing.T) { + rec := &trackingCPProv{} + bcast := &concurrentSafeBroadcaster{} + h := NewWorkspaceHandler(bcast, nil, "http://localhost:8080", t.TempDir()) + h.SetCPProvisioner(rec) + + wsID := "ws-stop-routes-cp" + if err := h.StopWorkspaceAuto(context.Background(), wsID); err != nil { + t.Fatalf("StopWorkspaceAuto returned err with CP wired: %v", err) + } + got := rec.stoppedSnapshot() + if len(got) != 1 || got[0] != wsID { + t.Errorf("expected cpProv.Stop invoked once with %q, got %v", wsID, got) + } +} + +// TestStopWorkspaceAuto_RoutesToDockerWhenOnlyDocker — self-hosted +// operators run with the local Docker provisioner wired and cpProv nil. +// Auto must route to Docker. +// +// Stub-injects a LocalProvisionerAPI via a private constructor pattern +// so we don't need a real Docker daemon. NewWorkspaceHandler's +// constructor takes *provisioner.Provisioner (concrete) so we set the +// interface field directly. +func TestStopWorkspaceAuto_RoutesToDockerWhenOnlyDocker(t *testing.T) { + bcast := &concurrentSafeBroadcaster{} + h := NewWorkspaceHandler(bcast, nil, "http://localhost:8080", t.TempDir()) + stub := &stoppingLocalProv{} + h.provisioner = stub + + wsID := "ws-stop-routes-docker" + if err := h.StopWorkspaceAuto(context.Background(), wsID); err != nil { + t.Fatalf("StopWorkspaceAuto returned err with Docker wired: %v", err) + } + if len(stub.stopped) != 1 || stub.stopped[0] != wsID { + t.Errorf("expected Docker provisioner.Stop invoked once with %q, got %v", wsID, stub.stopped) + } +} + +// TestStopWorkspaceAuto_NoBackendIsNoOp — when neither backend is wired +// (misconfigured deployment, or test fixture), StopWorkspaceAuto returns +// nil silently. Distinct from provisionWorkspaceAuto's mark-failed +// behavior: there's no row state to mark "failed to stop" against, and +// the absence of a backend means nothing was running to stop. +func TestStopWorkspaceAuto_NoBackendIsNoOp(t *testing.T) { + bcast := &concurrentSafeBroadcaster{} + h := NewWorkspaceHandler(bcast, nil, "http://localhost:8080", t.TempDir()) + // Neither SetCPProvisioner nor a Docker provisioner — both nil. + + if err := h.StopWorkspaceAuto(context.Background(), "ws-noback"); err != nil { + t.Errorf("expected nil error on no-backend stop, got %v", err) + } +} + +// stoppingLocalProv is a minimal LocalProvisionerAPI stub that records +// Stop invocations. Other methods panic — guards against accidental +// use by tests that should be using a different stub. +type stoppingLocalProv struct { + stopped []string +} + +func (s *stoppingLocalProv) Stop(_ context.Context, workspaceID string) error { + s.stopped = append(s.stopped, workspaceID) + return nil +} +func (s *stoppingLocalProv) Start(_ context.Context, _ provisioner.WorkspaceConfig) (string, error) { + panic("stoppingLocalProv: Start not implemented for this test") +} +func (s *stoppingLocalProv) IsRunning(_ context.Context, _ string) (bool, error) { + panic("stoppingLocalProv: IsRunning not implemented for this test") +} +func (s *stoppingLocalProv) ExecRead(_ context.Context, _, _ string) ([]byte, error) { + panic("stoppingLocalProv: ExecRead not implemented for this test") +} +func (s *stoppingLocalProv) RemoveVolume(_ context.Context, _ string) error { + panic("stoppingLocalProv: RemoveVolume not implemented for this test") +} +func (s *stoppingLocalProv) VolumeHasFile(_ context.Context, _, _ string) (bool, error) { + panic("stoppingLocalProv: VolumeHasFile not implemented for this test") +} +func (s *stoppingLocalProv) WriteAuthTokenToVolume(_ context.Context, _, _ string) error { + panic("stoppingLocalProv: WriteAuthTokenToVolume not implemented for this test") +} + +// TestNoCallSiteCallsBareStop — source-level pin against the bug +// pattern that motivated this PR. Any non-test handler that wants to +// "stop the workload" must go through h.X.StopWorkspaceAuto, not bare +// h.X.provisioner.Stop / h.X.cpProv.Stop / h.X.Stop. Pre-2026-05-05 +// team.go and workspace_crud.go both called h.provisioner.Stop directly +// inside `if h.provisioner != nil { ... }` gates — silent no-op on +// SaaS, EC2 leak (#2813, #2814). +// +// Allowed exceptions: +// - workspace.go: defines StopWorkspaceAuto (the dispatcher itself). +// - workspace_provision.go: defines per-backend Start/Stop bodies. +// - workspace_restart.go: pre-dates the dispatchers and uses manual +// if-cpProv-else dispatch with retry semantics tuned for the +// restart hot path. Functionally equivalent + wraps cpStopWithRetry, +// so it's not the bug class this gate targets — but it IS +// architectural duplication, tracked under #2799. +// - container_files.go: drives Docker daemon directly for file-copy +// short-lived containers; no workspace-level Stop semantics. +func TestNoCallSiteCallsBareStop(t *testing.T) { + wd, err := os.Getwd() + if err != nil { + t.Fatalf("getwd: %v", err) + } + entries, err := os.ReadDir(wd) + if err != nil { + t.Fatalf("readdir: %v", err) + } + bareShapes := []string{ + ".provisioner.Stop(", + ".cpProv.Stop(", + } + allowedFiles := map[string]bool{ + "workspace.go": true, + "workspace_provision.go": true, + "workspace_restart.go": true, + "container_files.go": true, + } + for _, entry := range entries { + name := entry.Name() + if filepath.Ext(name) != ".go" { + continue + } + if len(name) > len("_test.go") && + name[len(name)-len("_test.go"):] == "_test.go" { + continue + } + if allowedFiles[name] { + continue + } + src, err := os.ReadFile(filepath.Join(wd, name)) + if err != nil { + t.Fatalf("read %s: %v", name, err) + } + // Strip line + block comments before substring check — the gate + // targets call expressions in real code, not historical + // references in documentation/comments. Without this, comments + // describing the old buggy shape (kept on purpose for + // archaeology) trip the test. + stripped := stripGoComments(src) + for _, needle := range bareShapes { + if bytes.Contains(stripped, []byte(needle)) { + t.Errorf("%s contains bare `%s` — must go through h.X.StopWorkspaceAuto so SaaS tenants route to CP. "+ + "Pre-2026-05-05 team.go and workspace_crud.go did this and silently leaked EC2s on every SaaS collapse / delete (#2813, #2814).", name, needle) + } + } + } +} + +// stripGoComments removes // line comments and /* */ block comments +// from Go source. Imperfect (doesn't handle comments-inside-strings) +// but adequate for the source-level pin tests in this file — none of +// our gated needles legitimately appear inside string literals in the +// handlers package. +func stripGoComments(src []byte) []byte { + out := make([]byte, 0, len(src)) + for i := 0; i < len(src); i++ { + // Block comment + if i+1 < len(src) && src[i] == '/' && src[i+1] == '*' { + i += 2 + for i+1 < len(src) && !(src[i] == '*' && src[i+1] == '/') { + i++ + } + i++ // skip closing / + continue + } + // Line comment — preserve the newline so line counts stay sane + if i+1 < len(src) && src[i] == '/' && src[i+1] == '/' { + for i < len(src) && src[i] != '\n' { + i++ + } + if i < len(src) { + out = append(out, '\n') + } + continue + } + out = append(out, src[i]) + } + return out +} diff --git a/workspace-server/internal/router/router.go b/workspace-server/internal/router/router.go index 516aa99d..bd542e56 100644 --- a/workspace-server/internal/router/router.go +++ b/workspace-server/internal/router/router.go @@ -230,7 +230,7 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi r.GET("/approvals/pending", middleware.AdminAuth(db.DB), apph.ListAll) // Team Expansion - teamh := handlers.NewTeamHandler(broadcaster, prov, wh, platformURL, configsDir) + teamh := handlers.NewTeamHandler(broadcaster, wh, platformURL, configsDir) wsAuth.POST("/expand", teamh.Expand) wsAuth.POST("/collapse", teamh.Collapse) From beab899501db790968891eecf77414c34b5f4e00 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Mon, 4 May 2026 20:02:05 -0700 Subject: [PATCH 04/43] feat(ConfigTab): drop Skills/Tools tag inputs, give Prompt Files its own section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User feedback (2026-05-04 conversation): > "Skills and Tools are having their own tab as plugin, and Prompt > Files are in the file system which can be directly edited. Am I > missing something?" > "Tools should be merged into plugin then, and for prompt files... it > should be in another section than in skill& tools" The "Skills & Tools" section in ConfigTab had three TagList inputs: - Skills: managed via the dedicated SkillsTab (per-workspace skill folders) — duplicate UI affordance - Tools: managed via the Plugins tab (install a plugin → its tools become available) — duplicate UI affordance - Prompt Files: load order for system-prompt files — semantically unrelated to skills/tools Drop the Skills + Tools inputs. Move Prompt Files into its own section with explanatory copy that names the auto-loaded files (system-prompt.md, CLAUDE.md, AGENTS.md) and points users at the Files tab for actual editing. Schema fields `config.skills` and `config.tools` are KEPT (load-bearing for runtime skill loading + tool registry); only the inline editor goes away. Operators who need to edit them can still use the Raw YAML toggle. Tests: - New ConfigTab.sections.test.tsx with 4 cases: 1. "Skills & Tools" section title is gone 2. Skills tag input is absent 3. Tools tag input is absent 4. Prompt Files section exists with explanatory copy Sibling ConfigTab tests (hermes, provider) all still pass (20/20). Co-Authored-By: Claude Opus 4.7 (1M context) --- canvas/src/components/tabs/ConfigTab.tsx | 22 ++- .../__tests__/ConfigTab.sections.test.tsx | 125 ++++++++++++++++++ 2 files changed, 143 insertions(+), 4 deletions(-) create mode 100644 canvas/src/components/tabs/__tests__/ConfigTab.sections.test.tsx diff --git a/canvas/src/components/tabs/ConfigTab.tsx b/canvas/src/components/tabs/ConfigTab.tsx index fdb8eb38..9447d09a 100644 --- a/canvas/src/components/tabs/ConfigTab.tsx +++ b/canvas/src/components/tabs/ConfigTab.tsx @@ -886,10 +886,24 @@ export function ConfigTab({ workspaceId }: Props) { )} -
- update("skills", v)} placeholder="e.g. code-review" /> - update("tools", v)} placeholder="e.g. web_search, filesystem" /> - update("prompt_files", v)} placeholder="e.g. system-prompt.md" /> + {/* Skills + Tools used to live here as TagList inputs. They were + redundant with their dedicated tabs: + - Skills → managed via SkillsTab (per-workspace skill folders) + - Tools → managed via the Plugins tab (install/uninstall) + Editing them here only set the config.yaml field; the + actual install/load happened elsewhere. Removed to stop + showing the misnamed list-input affordance. */} + +
+

+ Markdown files that compose this workspace's system prompt. + Loaded in order at boot from the workspace config dir + (e.g. system-prompt.md,{' '} + CLAUDE.md,{' '} + AGENTS.md). Edit the file + contents directly via the Files tab. +

+ update("prompt_files", v)} placeholder="e.g. system-prompt.md" />
diff --git a/canvas/src/components/tabs/__tests__/ConfigTab.sections.test.tsx b/canvas/src/components/tabs/__tests__/ConfigTab.sections.test.tsx new file mode 100644 index 00000000..2e083fb3 --- /dev/null +++ b/canvas/src/components/tabs/__tests__/ConfigTab.sections.test.tsx @@ -0,0 +1,125 @@ +// @vitest-environment jsdom +// +// Regression tests for the ConfigTab section restructure (user feedback +// 2026-05-04: "Skills and Tools are having their own tab as plugin, and +// Prompt Files are in the file system which can be directly edited. Am +// I missing something?" + "Tools should be merged into plugin then, and +// for prompt files... should be in another section than in skill& tools"). +// +// What this pins: +// 1. The "Skills & Tools" section title is gone. +// 2. Editable Skills + Tools tag inputs are gone (managed elsewhere). +// 3. A dedicated "Prompt Files" section exists with explanatory text. +// +// If a future PR re-adds the Skills/Tools tag inputs to ConfigTab, this +// suite catches it. + +import { describe, it, expect, vi, afterEach, beforeEach } from "vitest"; +import { render, screen, cleanup, waitFor, fireEvent } from "@testing-library/react"; +import React from "react"; + +afterEach(cleanup); + +const apiGet = vi.fn(); +vi.mock("@/lib/api", () => ({ + api: { + get: (path: string) => apiGet(path), + patch: vi.fn(), + put: vi.fn(), + post: vi.fn(), + del: vi.fn(), + }, +})); + +const storeUpdateNodeData = vi.fn(); +const storeRestartWorkspace = vi.fn(); +vi.mock("@/store/canvas", () => ({ + useCanvasStore: Object.assign( + (selector: (s: unknown) => unknown) => + selector({ restartWorkspace: storeRestartWorkspace, updateNodeData: storeUpdateNodeData }), + { + getState: () => ({ + restartWorkspace: storeRestartWorkspace, + updateNodeData: storeUpdateNodeData, + }), + }, + ), +})); + +vi.mock("../AgentCardSection", () => ({ + AgentCardSection: () =>
, +})); + +import { ConfigTab } from "../ConfigTab"; + +beforeEach(() => { + apiGet.mockReset(); + apiGet.mockImplementation((path: string) => { + if (path === `/workspaces/ws-test`) { + return Promise.resolve({ runtime: "claude-code" }); + } + if (path === `/workspaces/ws-test/model`) { + return Promise.resolve({ model: "claude-opus-4-7" }); + } + if (path === `/workspaces/ws-test/provider`) { + return Promise.resolve({ provider: "anthropic-oauth", source: "default" }); + } + if (path === `/workspaces/ws-test/files/config.yaml`) { + return Promise.resolve({ content: "name: test\nruntime: claude-code\n" }); + } + if (path === "/templates") { + return Promise.resolve([ + { id: "claude-code", name: "Claude Code", runtime: "claude-code", providers: [] }, + ]); + } + return Promise.reject(new Error(`unmocked api.get: ${path}`)); + }); +}); + +describe("ConfigTab section restructure", () => { + it("does not render a 'Skills & Tools' section title", async () => { + render(); + await waitFor(() => expect(apiGet).toHaveBeenCalled()); + // Section button uses the title as its accessible name; should be absent. + expect(screen.queryByRole("button", { name: /Skills\s*&\s*Tools/i })).toBeNull(); + }); + + it("does not render an editable Skills tag input", async () => { + render(); + await waitFor(() => expect(apiGet).toHaveBeenCalled()); + // TagList renders its label; check no input labelled "Skills" in the form. + // (Skills are managed via the dedicated Skills tab.) + const skillsLabels = screen + .queryAllByText(/^Skills$/) + .filter((el) => el.tagName.toLowerCase() === "label"); + expect(skillsLabels).toHaveLength(0); + }); + + it("does not render an editable Tools tag input", async () => { + render(); + await waitFor(() => expect(apiGet).toHaveBeenCalled()); + // Tools are managed via the Plugins tab — install a plugin → its tools + // become available. No reason to type tool names here. + const toolsLabels = screen + .queryAllByText(/^Tools$/) + .filter((el) => el.tagName.toLowerCase() === "label"); + expect(toolsLabels).toHaveLength(0); + }); + + it("renders a dedicated 'Prompt Files' section with explanatory copy", async () => { + render(); + await waitFor(() => expect(apiGet).toHaveBeenCalled()); + // Section is collapsed by default — find + expand first. + const sectionButton = screen.getByRole("button", { name: /Prompt Files/i }); + expect(sectionButton).toBeTruthy(); + fireEvent.click(sectionButton); + // Explanatory copy mentions system-prompt.md (split across tags + // so use textContent on any element rather than the default text matcher). + await waitFor(() => { + const matches = screen.queryAllByText((_, el) => + (el?.textContent || "").includes("system-prompt.md"), + ); + expect(matches.length).toBeGreaterThan(0); + }); + }); +}); From 8152cfc81ef0d0d00f7961f2a4745f344b7df60b Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Mon, 4 May 2026 20:12:01 -0700 Subject: [PATCH 05/43] =?UTF-8?q?feat(canvas/chat):=20lazy-load=20history?= =?UTF-8?q?=20=E2=80=94=2010=20newest=20on=20mount,=2020=20per=20scroll-up?= =?UTF-8?q?=20batch?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-fix ChatTab fetched the newest 50 messages on every mount and scrolled to bottom, paying full DOM cost up-front even when the user only wanted to read the last few bubbles. On a long-running workspace this meant 50× message-bubble paint + DOM cost on every tab swap. Now: - Initial fetch limit=10 (newest-first slice). - IntersectionObserver on a top sentinel (rootMargin 200px) fires loadOlder() the moment the user scrolls within 200px of the top. - loadOlder() uses the oldest loaded message's timestamp as `before_ts` (RFC3339 cursor the /activity endpoint already supports) and fetches OLDER_HISTORY_BATCH (20) more. - hasMore turns false when the server returns < limit rows; the sentinel unmounts and the IO observer disconnects — no spinner on a short conversation. - useLayoutEffect handles scroll behavior across messages updates: a prepend (loadOlder landed) restores the user's saved distance-from-bottom (captured via scrollAnchorRef before the fetch) so their reading position doesn't jump; an append / initial load pins to the latest bubble. Tests: 4 new in ChatTab.lazyHistory.test.tsx pinning the limit=10 on initial fetch, hasMore=false on short-history, full-page rendering on exactly-the-limit, and limit=10 on retry-after-failure. Doesn't exercise the IO/scroll-anchor in jsdom — that's brittler than trusting the synth-canary against a live tenant. Build clean. Existing 1250 tests + 4 new = 1254 pass. --- canvas/src/components/tabs/ChatTab.tsx | 181 ++++++++++++++++-- .../__tests__/ChatTab.lazyHistory.test.tsx | 165 ++++++++++++++++ 2 files changed, 329 insertions(+), 17 deletions(-) create mode 100644 canvas/src/components/tabs/__tests__/ChatTab.lazyHistory.test.tsx diff --git a/canvas/src/components/tabs/ChatTab.tsx b/canvas/src/components/tabs/ChatTab.tsx index fbe53b7c..af6e8b63 100644 --- a/canvas/src/components/tabs/ChatTab.tsx +++ b/canvas/src/components/tabs/ChatTab.tsx @@ -1,6 +1,6 @@ "use client"; -import { useState, useRef, useEffect, useCallback } from "react"; +import { useState, useRef, useEffect, useCallback, useLayoutEffect } from "react"; import ReactMarkdown from "react-markdown"; import remarkGfm from "remark-gfm"; import { api } from "@/lib/api"; @@ -124,14 +124,43 @@ function extractReplyText(resp: A2AResponse): string { // doesn't). Single source of truth for file-part parsing across // live chat, activity log replay, and any future consumers. +/** Initial chat history page size. The newest N messages are rendered + * on first paint; older history is fetched on demand via loadOlder() + * when the user scrolls the top sentinel into view. */ +const INITIAL_HISTORY_LIMIT = 10; +/** Subsequent older-history batch size. Larger than INITIAL so a long + * scroll-back doesn't fan out into many round-trips. */ +const OLDER_HISTORY_BATCH = 20; + /** * Load chat history from the activity_logs database via the platform API. * Uses source=canvas to only get user-initiated messages (not agent-to-agent). + * + * Pagination: + * - Pass `limit` to bound the page size (newest-first from server). + * - Pass `beforeTs` (RFC3339) to fetch rows STRICTLY OLDER than that + * timestamp. Combined with limit, this yields the next-older page + * when scrolling backward through history. + * + * `reachedEnd` is true when the server returned fewer rows than asked + * for — caller uses this to disable further older-batch fetches. + * (Counts row-level returns, not chat-bubble count: each row may + * produce 1-2 bubbles.) */ -async function loadMessagesFromDB(workspaceId: string): Promise<{ messages: ChatMessage[]; error: string | null }> { +async function loadMessagesFromDB( + workspaceId: string, + limit: number, + beforeTs?: string, +): Promise<{ messages: ChatMessage[]; error: string | null; reachedEnd: boolean }> { try { + const params = new URLSearchParams({ + type: "a2a_receive", + source: "canvas", + limit: String(limit), + }); + if (beforeTs) params.set("before_ts", beforeTs); const activities = await api.get( - `/workspaces/${workspaceId}/activity?type=a2a_receive&source=canvas&limit=50`, + `/workspaces/${workspaceId}/activity?${params.toString()}`, ); const messages: ChatMessage[] = []; @@ -142,11 +171,12 @@ async function loadMessagesFromDB(workspaceId: string): Promise<{ messages: Chat for (const a of [...activities].reverse()) { messages.push(...activityRowToMessages(a, isInternalSelfMessage)); } - return { messages, error: null }; + return { messages, error: null, reachedEnd: activities.length < limit }; } catch (err) { return { messages: [], error: err instanceof Error ? err.message : "Failed to load chat history", + reachedEnd: true, }; } } @@ -256,6 +286,23 @@ function MyChatPanel({ workspaceId, data }: Props) { const [error, setError] = useState(null); const [confirmRestart, setConfirmRestart] = useState(false); const bottomRef = useRef(null); + // Lazy-load older history on scroll-up. + // - containerRef = the scrollable messages viewport + // - topRef = sentinel above the messages list; IO observes it + // and triggers loadOlder() when it enters view + // - hasMore = false once a fetch returns < limit rows; stops IO + // - loadingOlder = guards against duplicate loadOlder() calls while + // one is already in flight (fast scroll-flick) + // - scrollAnchorRef = saves distance-from-bottom before a prepend + // so the useLayoutEffect below can restore the + // user's exact viewport position. Without this, + // prepending older messages would jump the scroll + // position by the height of the new content. + const containerRef = useRef(null); + const topRef = useRef(null); + const [hasMore, setHasMore] = useState(true); + const [loadingOlder, setLoadingOlder] = useState(false); + const scrollAnchorRef = useRef<{ savedDistanceFromBottom: number } | null>(null); // Files the user has picked but not yet sent. Cleared on send // (upload success) or by the × on each pill. const [pendingFiles, setPendingFiles] = useState([]); @@ -294,17 +341,82 @@ function MyChatPanel({ workspaceId, data }: Props) { sendInFlightRef.current = false; }, []); - // Load chat history from database on mount + // Load chat history from database on mount. + // Initial load is bounded to INITIAL_HISTORY_LIMIT (newest 10) — the + // rest streams in as the user scrolls up via loadOlder() below. Pre- + // 2026-05-05 this fetched the newest 50 in one shot; on a long-running + // workspace that meant 50× message-bubble paint + DOM cost on every + // tab-open even when the user only wanted to read the last few. useEffect(() => { setLoading(true); setLoadError(null); - loadMessagesFromDB(workspaceId).then(({ messages: msgs, error: fetchErr }) => { - setMessages(msgs); - setLoadError(fetchErr); - setLoading(false); - }); + setHasMore(true); + loadMessagesFromDB(workspaceId, INITIAL_HISTORY_LIMIT).then( + ({ messages: msgs, error: fetchErr, reachedEnd }) => { + setMessages(msgs); + setLoadError(fetchErr); + setHasMore(!reachedEnd); + setLoading(false); + }, + ); }, [workspaceId]); + // Fetch the next-older batch and prepend. Caller responsibility: + // already check loadingOlder + hasMore (we re-check defensively for + // race-safety against the IO callback firing twice). + const loadOlder = useCallback(async () => { + if (loadingOlder || !hasMore) return; + if (messages.length === 0) return; + const oldest = messages[0]; + if (!oldest) return; + const container = containerRef.current; + if (!container) return; + // Capture the user's distance-from-bottom BEFORE we prepend so the + // useLayoutEffect can restore it after the new DOM lands. Without + // this anchor, the user reading mid-history would get yanked + // upward by the height of the newly-prepended messages. + scrollAnchorRef.current = { + savedDistanceFromBottom: container.scrollHeight - container.scrollTop, + }; + setLoadingOlder(true); + const { messages: older, reachedEnd } = await loadMessagesFromDB( + workspaceId, + OLDER_HISTORY_BATCH, + oldest.timestamp, + ); + if (older.length > 0) { + setMessages((prev) => [...older, ...prev]); + } else { + // Nothing came back — clear the anchor so the next paint doesn't + // try to "restore" against a no-op prepend. + scrollAnchorRef.current = null; + } + setHasMore(!reachedEnd); + setLoadingOlder(false); + }, [workspaceId, messages, loadingOlder, hasMore]); + + // IntersectionObserver on the top sentinel. Fires loadOlder() the + // moment the user scrolls within 200px of the top. AbortController + // unwires cleanly on workspace switch / unmount; root is the + // scrollable container so we observe only what's visible inside it. + useEffect(() => { + const top = topRef.current; + const container = containerRef.current; + if (!top || !container) return; + if (!hasMore) return; // stop observing when no older history exists + const ac = new AbortController(); + const io = new IntersectionObserver( + (entries) => { + if (ac.signal.aborted) return; + if (entries[0]?.isIntersecting) loadOlder(); + }, + { root: container, rootMargin: "200px 0px 0px 0px", threshold: 0 }, + ); + io.observe(top); + ac.signal.addEventListener("abort", () => io.disconnect()); + return () => ac.abort(); + }, [loadOlder, hasMore]); + // Agent reachability useEffect(() => { const reachable = data.status === "online" || data.status === "degraded"; @@ -316,7 +428,20 @@ function MyChatPanel({ workspaceId, data }: Props) { currentTaskRef.current = data.currentTask; }, [data.currentTask]); - useEffect(() => { + // Scroll behavior across messages updates: + // - Prepend (loadOlder landed) → restore the user's saved + // distance-from-bottom so their reading position is unchanged. + // - Append / initial → pin to latest bubble. + // useLayoutEffect (not useEffect) so scroll restoration runs BEFORE + // paint — otherwise the user sees the page jump for one frame. + useLayoutEffect(() => { + const container = containerRef.current; + if (scrollAnchorRef.current && container) { + container.scrollTop = + container.scrollHeight - scrollAnchorRef.current.savedDistanceFromBottom; + scrollAnchorRef.current = null; + return; + } bottomRef.current?.scrollIntoView({ behavior: "smooth" }); }, [messages]); @@ -735,7 +860,7 @@ function MyChatPanel({ workspaceId, data }: Props) {
)} {/* Messages */} -
+
{loading && (
Loading chat history...
)} @@ -751,11 +876,15 @@ function MyChatPanel({ workspaceId, data }: Props) { onClick={() => { setLoading(true); setLoadError(null); - loadMessagesFromDB(workspaceId).then(({ messages: msgs, error: fetchErr }) => { - setMessages(msgs); - setLoadError(fetchErr); - setLoading(false); - }); + setHasMore(true); + loadMessagesFromDB(workspaceId, INITIAL_HISTORY_LIMIT).then( + ({ messages: msgs, error: fetchErr, reachedEnd }) => { + setMessages(msgs); + setLoadError(fetchErr); + setHasMore(!reachedEnd); + setLoading(false); + }, + ); }} className="text-[10px] px-2 py-0.5 rounded bg-red-800/40 text-bad hover:bg-red-700/50 transition-colors" > @@ -768,6 +897,24 @@ function MyChatPanel({ workspaceId, data }: Props) { No messages yet. Send a message to start chatting with this agent.
)} + {/* Top sentinel for lazy-loading older history. The IO observer + in the effect above watches this; entering view triggers the + next-older batch fetch. Sits ABOVE messages.map so it's the + first thing the user reaches when scrolling up. + + Only mounted when there might be more history (hasMore) so a + short conversation doesn't pay an idle observer. The + "Loading older messages…" line replaces the sentinel during + the fetch so the user sees feedback for the scroll-up + gesture. Once we hit the end, we drop the sentinel entirely + instead of showing a "no more messages" footer — the user's + scroll resting against the top of the conversation IS the + signal. */} + {hasMore && messages.length > 0 && ( +
+ {loadingOlder ? "Loading older messages…" : " "} +
+ )} {messages.map((msg) => (
=> { + if (path.includes("type=a2a_receive") && path.includes("source=canvas")) { + myChatActivityCalls.push(path); + if (myChatNextResponse.ok) return Promise.resolve(myChatNextResponse.rows); + return Promise.reject(myChatNextResponse.err); + } + // AgentComms / heartbeat / anything else — empty array is a safe + // default that won't blow up the corresponding component's .then(). + return Promise.resolve([]); +}); +const apiPost = vi.fn(); +vi.mock("@/lib/api", () => ({ + api: { + get: (path: string) => apiGet(path), + post: (path: string, body: unknown) => apiPost(path, body), + del: vi.fn(), + patch: vi.fn(), + put: vi.fn(), + }, +})); + +vi.mock("@/store/canvas", () => ({ + useCanvasStore: vi.fn((selector?: (s: unknown) => unknown) => + selector ? selector({ agentMessages: {}, consumeAgentMessages: () => [] }) : {}, + ), +})); + +beforeEach(() => { + apiGet.mockClear(); + apiPost.mockReset(); + myChatActivityCalls.length = 0; + myChatNextResponse = { ok: true, rows: [] }; + if (typeof window !== "undefined" && !("IntersectionObserver" in window)) { + (window as unknown as { IntersectionObserver: unknown }).IntersectionObserver = class { + observe() {} + unobserve() {} + disconnect() {} + }; + } + // jsdom doesn't implement scrollIntoView; ChatTab calls it after every + // messages update. + Element.prototype.scrollIntoView = vi.fn(); +}); + +import { ChatTab } from "../ChatTab"; + +function makeActivityRow(seq: number): Record { + return { + activity_type: "a2a_receive", + status: "ok", + created_at: `2026-05-05T00:0${seq}:00Z`, + request_body: { params: { message: { parts: [{ kind: "text", text: `user msg ${seq}` }] } } }, + response_body: { result: `agent reply ${seq}` }, + }; +} + +const minimalData = { + status: "online" as const, + runtime: "claude-code", + currentTask: null, +} as unknown as Parameters[0]["data"]; + +describe("ChatTab lazy history pagination", () => { + it("initial fetch carries limit=10 (not the legacy 50)", async () => { + myChatNextResponse = { ok: true, rows: [makeActivityRow(1)] }; + render(); + await waitFor(() => expect(myChatActivityCalls.length).toBe(1)); + const url = myChatActivityCalls[0]; + expect(url).toContain("limit=10"); + expect(url).not.toContain("limit=50"); + // before_ts should NOT be set on the initial fetch — that's the + // newest-first slice the user lands on. + expect(url).not.toContain("before_ts"); + }); + + it("hides the top sentinel when initial fetch returns fewer than the limit", async () => { + // 3 < 10 → server says "no more older history exists"; sentinel + // should NOT mount and the "Loading older messages…" line should + // never appear (it can't, since the sentinel is what triggers it). + myChatNextResponse = { + ok: true, + rows: [makeActivityRow(1), makeActivityRow(2), makeActivityRow(3)], + }; + render(); + await waitFor(() => expect(myChatActivityCalls.length).toBe(1)); + await waitFor(() => { + expect(screen.queryByText(/Loading chat history/i)).toBeNull(); + }); + expect(screen.queryByText(/Loading older messages/i)).toBeNull(); + }); + + it("renders all messages when initial fetch returns exactly the limit", async () => { + // 10 == limit → server might have more older rows; sentinel SHOULD + // mount so the IO observer can fire loadOlder() on scroll-up. We + // verify by checking the rendered bubble count — if hasMore stayed + // true the sentinel render path doesn't crash and all 10 rows + // produced their pair of bubbles. + const fullPage = Array.from({ length: 10 }, (_, i) => makeActivityRow(i + 1)); + myChatNextResponse = { ok: true, rows: fullPage }; + render(); + await waitFor(() => expect(myChatActivityCalls.length).toBe(1)); + await waitFor(() => { + expect(screen.queryByText(/Loading chat history/i)).toBeNull(); + }); + expect(screen.getAllByText(/user msg/).length).toBe(10); + expect(screen.getAllByText(/agent reply/).length).toBe(10); + }); + + it("retry-after-failure uses limit=10, not the legacy 50", async () => { + myChatNextResponse = { ok: false, err: new Error("network down") }; + render(); + const retry = await screen.findByText(/Retry/); + myChatNextResponse = { ok: true, rows: [makeActivityRow(1)] }; + fireEvent.click(retry); + await waitFor(() => expect(myChatActivityCalls.length).toBe(2)); + const retryUrl = myChatActivityCalls[1]; + expect(retryUrl).toContain("limit=10"); + expect(retryUrl).not.toContain("limit=50"); + }); +}); From 7cc1c39c49c97d504b62ebaaff982d75c6925bcd Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Mon, 4 May 2026 20:21:59 -0700 Subject: [PATCH 06/43] ci: e2e coverage matrix + branch-protection-as-code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #9. Three pieces, all small: 1. **docs/e2e-coverage.md** — source of truth for which E2E suites guard which surfaces. Today three were running but informational only on staging; that's how the org-import silent-drop bug shipped without a test catching it pre-merge. Now the matrix shows what's required where + a follow-up note for the two suites that need an always-emit refactor before they can be required. 2. **tools/branch-protection/apply.sh** — branch protection as code. Lets `staging` and `main` required-checks live in a reviewable shell script instead of UI clicks that get lost between admins. This PR's net change: add `E2E API Smoke Test` and `Canvas tabs E2E` as required on staging. Both already use the always-emit path-filter pattern (no-op step emits SUCCESS when the workflow's paths weren't touched), so making them required can't deadlock unrelated PRs. 3. **branch-protection-drift.yml** — daily cron + drift_check.sh that compares live protection against apply.sh's desired state. Catches out-of-band UI edits before they drift further. Fails the workflow on mismatch; ops re-runs apply.sh or updates the script. Out of scope (filed as follow-ups): - e2e-staging-saas + e2e-staging-external use plain `paths:` filters and never trigger when paths are unchanged. They need refactoring to the always-emit shape (same as e2e-api / e2e-staging-canvas) before they can be required. - main branch protection mirrors staging here; if main wants the E2E SaaS / External added later, do it in apply.sh and rerun. Operator must apply once after merge: bash tools/branch-protection/apply.sh The drift check picks it up from there. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/branch-protection-drift.yml | 42 ++++++ docs/e2e-coverage.md | 58 ++++++++ tools/branch-protection/apply.sh | 133 ++++++++++++++++++ tools/branch-protection/drift_check.sh | 44 ++++++ 4 files changed, 277 insertions(+) create mode 100644 .github/workflows/branch-protection-drift.yml create mode 100644 docs/e2e-coverage.md create mode 100755 tools/branch-protection/apply.sh create mode 100755 tools/branch-protection/drift_check.sh diff --git a/.github/workflows/branch-protection-drift.yml b/.github/workflows/branch-protection-drift.yml new file mode 100644 index 00000000..7b05f09f --- /dev/null +++ b/.github/workflows/branch-protection-drift.yml @@ -0,0 +1,42 @@ +name: branch-protection drift check + +# Catches out-of-band edits to branch protection (UI clicks, manual gh +# api PATCH from a one-off ops session) by comparing live state against +# tools/branch-protection/apply.sh's desired state every day. Fails the +# workflow when they drift; the failure is the signal. +# +# When it fails: re-run apply.sh to put the live state back to the +# script's intent, OR update apply.sh to encode the new intent and +# commit. Either way the script is the source of truth. + +on: + schedule: + # 14:00 UTC daily. Off-hours for most teams; gives a fresh signal + # at the start of every working day. + - cron: '0 14 * * *' + workflow_dispatch: + pull_request: + branches: [staging, main] + paths: + - 'tools/branch-protection/**' + - '.github/workflows/branch-protection-drift.yml' + +permissions: + contents: read + +jobs: + drift: + name: Branch protection drift + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Run drift check + env: + # GH_TOKEN_FOR_ADMIN_API — repo-admin scope, needed for the + # /branches/:b/protection endpoint. Falls back to GITHUB_TOKEN + # for read-only mode (gh api on the protection endpoint + # returns 200 to maintainers, 404 to read-only). Configure at + # Settings → Secrets and variables → Actions. + GH_TOKEN: ${{ secrets.GH_TOKEN_FOR_ADMIN_API || secrets.GITHUB_TOKEN }} + run: bash tools/branch-protection/drift_check.sh diff --git a/docs/e2e-coverage.md b/docs/e2e-coverage.md new file mode 100644 index 00000000..48ca0e19 --- /dev/null +++ b/docs/e2e-coverage.md @@ -0,0 +1,58 @@ +# E2E coverage matrix + +This document is the source of truth for which E2E suites guard which surfaces and which gates are wired up where. Read this before adding a new E2E or moving a check between branches. + +## Suites + +| Workflow file | Job (= required-check name) | What it covers | Cron | +|---|---|---|---| +| `e2e-api.yml` | `E2E API Smoke Test` | A2A handshake, registry/register, /workspaces/:id/a2a forward, structured-event emission. Lightweight enough to run on every PR. | — | +| `e2e-staging-canvas.yml` | `Canvas tabs E2E` | Canvas-tab Playwright UX checks against staging — config tab, secrets tab, agent-card tab, Activity hydration. | weekly Sun 08:00 UTC | +| `e2e-staging-saas.yml` | `E2E Staging SaaS` | Full lifecycle: org creation → workspace provision (CP path) → A2A delegation → status/heartbeat → workspace delete → EC2 termination. The integration test that catches the silent-drop bug class (#2486 / #2811 / #2813 / #2814). | daily 07:00 UTC | +| `e2e-staging-external.yml` | `E2E Staging External Runtime` | External-runtime registration + heartbeat staleness sweep + `/registry/peers` resolution. Validates the OSS-templated workspace path. | daily 07:30 UTC | +| `e2e-staging-sanity.yml` | `Intentional-failure teardown sanity` | Inverted assertion — the run MUST fail. Validates the leak-detection self-check itself; not for general gating. | weekly Mon 06:00 UTC | +| `continuous-synth-e2e.yml` | `Synthetic E2E against staging` | Standing background coverage between PR runs. Catches drift in production-like staging that PR-time E2Es miss. | every 15 min | + +## Required-check status (branch protection) + +| Suite | staging required | main required | +|---|---|---| +| `E2E API Smoke Test` | ✅ this PR | ✅ | +| `Canvas tabs E2E` | ✅ this PR | (see follow-up) | +| `E2E Staging SaaS` | ❌ — needs always-emit refactor | ❌ | +| `E2E Staging External Runtime` | ❌ — needs always-emit refactor | ❌ | +| `Intentional-failure teardown sanity` | ❌ inverted assertion, never required | ❌ | +| `Synthetic E2E against staging` | ❌ cron-only, not a per-PR gate | ❌ | + +## Why the always-emit pattern matters + +Branch protection requires a *check name* to land at SUCCESS for every PR. Workflows with `paths:` filters that exclude a PR never run, so the check name never appears, and the PR sits BLOCKED forever. + +The pattern that supports being required is: + +1. Workflow always triggers on push/PR to the protected branch. +2. A `detect-changes` job uses `dorny/paths-filter` to decide if real work runs. +3. The protected job runs unconditionally and either (a) does real work when paths matched, or (b) emits a no-op SUCCESS step when paths skipped. + +`e2e-api.yml` and `e2e-staging-canvas.yml` already have this shape. `e2e-staging-saas.yml` and `e2e-staging-external.yml` use plain `paths:` filters and need the refactor before they can be required (filed as follow-up). + +## Adding a new E2E suite + +1. Pick a verb: smoke test, full lifecycle, fault-injection, drift detection. Pre-existing suites split along these lines. +2. Use the always-emit shape so the check name can be made required. +3. Add a row to the matrix above. +4. Decide cron cadence based on cost + how fast drift would otherwise be caught. +5. If you want it required, add to the relevant branch protection via `tools/branch-protection/apply.sh` (this PR adds the script). + +## When to break glass — temporarily skip a required E2E + +Don't. If an E2E is intermittently flaky, fix the test or move it out of required. The point of a required check is that it's load-bearing; bypassing one with admin override teaches the next operator the gate is optional. + +If a Production incident requires bypassing, document the override in the incident postmortem with a same-week followup to either fix the test or rip the check out of required. + +## Related issues / PRs + +- #2486 — silent-drop bug class that the SaaS E2E now catches +- PR #2811 — `provisionWorkspaceAuto` consolidation (org-import SaaS gate) +- PR #2824 — `StopWorkspaceAuto` mirror (closes #2813 + #2814) +- Follow-up: refactor `e2e-staging-saas` + `e2e-staging-external` to always-emit (so they can be required) diff --git a/tools/branch-protection/apply.sh b/tools/branch-protection/apply.sh new file mode 100755 index 00000000..928d2829 --- /dev/null +++ b/tools/branch-protection/apply.sh @@ -0,0 +1,133 @@ +#!/usr/bin/env bash +# tools/branch-protection/apply.sh — idempotently apply branch +# protection to molecule-core's `staging` and `main` branches. +# +# Why a script: GitHub's branch protection lives in repo settings, so +# changes are usually clicked through the UI and lost between admins. +# This script makes the config reproducible — diff it against the live +# state, change the file, run it, done. Single source of truth that +# shows up in code review. +# +# Usage: +# tools/branch-protection/apply.sh # apply both branches +# tools/branch-protection/apply.sh --dry-run # show payload only +# tools/branch-protection/apply.sh --branch staging +# +# Requires: gh CLI authenticated as a repo admin. The script uses gh's +# token (no separate PAT needed). + +set -euo pipefail + +REPO="Molecule-AI/molecule-core" +DRY_RUN=0 +ONLY_BRANCH="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --dry-run) DRY_RUN=1; shift ;; + --branch) ONLY_BRANCH="$2"; shift 2 ;; + -h|--help) + echo "Usage: $0 [--dry-run] [--branch ]" + exit 0 + ;; + *) echo "Unknown arg: $1" >&2; exit 1 ;; + esac +done + +# Required-check matrices. Each branch's set is the canonical list of +# check NAMES (from each workflow's job-name). Adding/removing a check +# here is the place to do it. Match docs/e2e-coverage.md. +# +# Why staging gets E2E API + Canvas E2E (this PR's addition): both +# already use the always-emit pattern (path-filter no-ops emit SUCCESS), +# so making them required can't deadlock a PR that doesn't touch their +# paths. The other E2Es (SaaS, External) need a refactor to that +# pattern before they can be required — tracked as follow-up. + +read -r -d '' STAGING_CHECKS <<'EOF' || true +Analyze (go) +Analyze (javascript-typescript) +Analyze (python) +Canvas (Next.js) +Canvas tabs E2E +Detect changes +E2E API Smoke Test +Platform (Go) +Python Lint & Test +Scan diff for credential-shaped strings +Shellcheck (E2E scripts) +EOF + +read -r -d '' MAIN_CHECKS <<'EOF' || true +Analyze (go) +Analyze (javascript-typescript) +Analyze (python) +Canvas (Next.js) +Canvas tabs E2E +Detect changes +E2E API Smoke Test +PR-built wheel + import smoke +Platform (Go) +Python Lint & Test +Scan diff for credential-shaped strings +Shellcheck (E2E scripts) +EOF + +build_payload() { + local checks="$1" + local require_reviews="$2" # true / false + local checks_json + checks_json=$(printf '%s\n' "$checks" | jq -Rs ' + split("\n") + | map(select(length > 0)) + | map({context: ., app_id: -1}) + ') + jq -n \ + --argjson checks "$checks_json" \ + --argjson reviews "$require_reviews" \ + '{ + required_status_checks: { + strict: false, + checks: $checks + }, + enforce_admins: false, + required_pull_request_reviews: ( + if $reviews then + { required_approving_review_count: 1, dismiss_stale_reviews: true } + else null end + ), + restrictions: null, + allow_deletions: false, + allow_force_pushes: false, + block_creations: false, + required_conversation_resolution: true, + required_linear_history: false, + lock_branch: false, + allow_fork_syncing: true + }' +} + +apply_branch() { + local branch="$1" + local checks="$2" + local require_reviews="$3" + local payload + payload=$(build_payload "$checks" "$require_reviews") + if [[ "$DRY_RUN" -eq 1 ]]; then + echo "=== branch: $branch ===" + echo "$payload" | jq . + return + fi + echo "Applying branch protection on $branch..." + printf '%s' "$payload" | gh api -X PUT \ + "repos/$REPO/branches/$branch/protection" \ + --input - + echo "Applied: $branch" +} + +if [[ -z "$ONLY_BRANCH" || "$ONLY_BRANCH" == "staging" ]]; then + apply_branch staging "$STAGING_CHECKS" true +fi +if [[ -z "$ONLY_BRANCH" || "$ONLY_BRANCH" == "main" ]]; then + apply_branch main "$MAIN_CHECKS" true +fi diff --git a/tools/branch-protection/drift_check.sh b/tools/branch-protection/drift_check.sh new file mode 100755 index 00000000..b25e23f0 --- /dev/null +++ b/tools/branch-protection/drift_check.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# tools/branch-protection/drift_check.sh — compare the live branch +# protection on staging + main against what apply.sh would set. Used +# by branch-protection-drift.yml (cron) to catch out-of-band UI edits. +# +# Exit codes: +# 0 — live state matches the script +# 1 — drift detected (output shows the diff) +# 2 — gh API call failed + +set -euo pipefail + +REPO="Molecule-AI/molecule-core" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +EXIT_CODE=0 + +check_branch() { + local branch="$1" + local want + want=$(bash "$SCRIPT_DIR/apply.sh" --dry-run --branch "$branch" 2>&1 | + sed -n '/^{$/,/^}$/p' | + jq -S '.required_status_checks.checks | map(.context) | sort') + local have + if ! have=$(gh api "repos/$REPO/branches/$branch/protection/required_status_checks" 2>/dev/null | + jq -S '.checks | map(.context) | sort'); then + echo "drift_check: FAIL to fetch $branch protection (gh API error)" + return 2 + fi + if [[ "$want" != "$have" ]]; then + echo "=== DRIFT on $branch ===" + echo "want:"; echo "$want" + echo "have:"; echo "$have" + diff <(echo "$want") <(echo "$have") || true + return 1 + fi + echo "OK: $branch matches desired state" +} + +for b in staging main; do + if ! check_branch "$b"; then + EXIT_CODE=1 + fi +done +exit "$EXIT_CODE" From 62fc25757cfa3557a1648f5dc1906f07431440f5 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Mon, 4 May 2026 20:25:10 -0700 Subject: [PATCH 07/43] docs(backends): document Auto-dispatcher SoT pattern + source-level pins MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #10. The 2026-05-05 hongming silent-drop incident shipped because the backends.md parity matrix didn't enforce a "go through the dispatcher" rule — three handlers (TeamHandler.Expand, OrgHandler.createWorkspaceTree, workspace_crud.go's stopAndRemove) silently bypassed routing on SaaS for ~6 months across two distinct verbs. This doc pass: - Adds a "How to dispatch" section that's the canonical answer to "where do I call Start / Stop / Has from?". Names the three dispatchers (provisionWorkspaceAuto, StopWorkspaceAuto, HasProvisioner), their fallbacks, and the allowed exceptions. - Updates the matrix lifecycle rows so every dispatched operation points at the dispatcher source, not the per-backend bodies. - Adds Org-import + Team-collapse rows so the bulk paths are visible to anyone scanning for parity gaps. - Lists the source-level pins (4 of them) under Enforcement so future contributors see them as load-bearing tests, not noise. - Adds a "When you add a NEW dispatch site" section so the next verb (Pause / Hibernate / Snapshot) lands as a dispatcher mirror, not as another bespoke handler that drifts from the existing two. - Refreshes Last audit to 2026-05-05. No code change; doc-only. The SoT abstractions described here landed in PRs #2811 + #2824. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/architecture/backends.md | 47 ++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 4 deletions(-) diff --git a/docs/architecture/backends.md b/docs/architecture/backends.md index ce01b247..0d5ba827 100644 --- a/docs/architecture/backends.md +++ b/docs/architecture/backends.md @@ -2,7 +2,7 @@ **Status:** living document — update when you ship a feature that touches one backend. **Owner:** workspace-server + controlplane teams. -**Last audit:** 2026-05-02 (Claude agent, PR #TBD). +**Last audit:** 2026-05-05 (Claude agent — `provisionWorkspaceAuto` / `StopWorkspaceAuto` / `HasProvisioner` SoT pattern landed in PRs #2811 + #2824). ## Why this exists @@ -15,16 +15,39 @@ Every user-visible workspace feature should work on both backends unless it is f This document is the canonical matrix. If you are landing a workspace-facing feature, update the row before you merge. +## How to dispatch (the SoT pattern) + +When a handler needs to start, stop, or check whether-something-can-run a workspace, it MUST go through the centralized dispatcher on `WorkspaceHandler`: + +| Need | Use | Source | +|---|---|---| +| Start a workspace | `provisionWorkspaceAuto(ctx, ...)` | `workspace.go:130` | +| Stop a workspace | `StopWorkspaceAuto(ctx, wsID)` | `workspace.go:172` | +| Gate "do we have any backend wired?" | `HasProvisioner()` | `workspace.go:115` | + +Each dispatcher routes to `cpProv.X()` when the SaaS backend is wired, then `provisioner.X()` when the Docker backend is wired, then a defined fallback (`provisionWorkspaceAuto` self-marks-failed; `StopWorkspaceAuto` no-ops; `HasProvisioner` returns false). + +**Rule: do not call `h.cpProv.Stop`, `h.provisioner.Stop`, `h.cpProv.Start`, or `h.provisioner.Start` directly from a handler.** Source-level pins (`TestNoCallSiteCallsDirectProvisionerExceptAuto`, `TestNoCallSiteCallsBareStop`) gate this at CI; they exist because the same drift class shipped twice — TeamHandler.Expand (#2367) bypassed routing on Start, then `team.go:208` + `workspace_crud.go:432` bypassed it on Stop (#2813, #2814) for ~6 months. + +Allowed exceptions (in the source-pin allowlists): +- `workspace.go` and `workspace_provision.go` — define the per-backend bodies the dispatcher routes between. +- `workspace_restart.go` — pre-dates the dispatchers and uses manual if-cpProv-else dispatch with retry semantics tuned for the restart hot path. Consolidation tracked in #2799. +- `container_files.go` — drives the Docker daemon directly for short-lived file-copy containers; no workspace-level Stop semantics involved. + +For "do we have any backend?", use `HasProvisioner()`, never bare `h.provisioner == nil && h.cpProv == nil`. Source-level pin `TestNoBareBothNilCheck` enforces this — added 2026-05-05 after the hongming org-import incident showed the bare check shape was a recurring drift target. + ## The matrix | Feature | File(s) | Docker | EC2 | Verdict | |---|---|---|---|---| | **Lifecycle** | | | | | -| Create | `workspace_provision.go:19-214` | `provisionWorkspace()` → `provisioner.Start()` | `provisionWorkspaceCP()` → `cpProv.Start()` | ✅ parity | +| Create | `workspace.go:130` `provisionWorkspaceAuto` → `provisionWorkspace()` (Docker) / `provisionWorkspaceCP()` (CP) | dispatched | dispatched | ✅ parity (single source of truth, PR #2811) | | Start | `provisioner.go:140-325` | container create + image pull | EC2 `RunInstance` via CP | ✅ parity | -| Stop | `provisioner.go:772-785` | `ContainerRemove(force=true)` + optional volume rm | `DELETE /cp/workspaces/:id` | ✅ parity | +| Stop | `workspace.go:172` `StopWorkspaceAuto` → `provisioner.Stop()` (Docker) / `cpProv.Stop()` (CP) | dispatched | dispatched | ✅ parity (single source of truth, PR #2824) | | Restart | `workspace_restart.go:45-210` | reads runtime from live container before stop | reads runtime from DB only | ⚠️ divergent — config-change + crash window can boot old runtime on EC2 | -| Delete | `workspace_crud.go` | stop + volume rm | stop only (stateless) | ✅ parity (expected divergence on volume cleanup) | +| Delete | `workspace_crud.go` `stopAndRemove` → `StopWorkspaceAuto` + Docker-only `RemoveVolume` | stop + volume rm | stop only (stateless — CP has no volumes) | ✅ parity (PR #2824 closed the SaaS-leak gap) | +| Org-import (bulk Create) | `org_import.go:178` gates on `h.workspace.HasProvisioner()`; routes through `provisionWorkspaceAuto` per workspace | dispatched | dispatched | ✅ parity (PR #2811 closed the SaaS-skip gate) | +| Team-collapse (bulk Stop) | `team.go:206` calls `StopWorkspaceAuto` for each child | dispatched | dispatched | ✅ parity (PR #2824 closed the SaaS-leak gap) | | **Secrets** | | | | | | Create / update | `secrets.go` | DB insert, injected at container start | DB insert, injected via user-data at boot | ✅ parity | | Redaction | `workspace_provision.go:251` | applied at memory-seed time | applied at agent runtime | ⚠️ divergent — timing differs | @@ -76,7 +99,23 @@ This document is the canonical matrix. If you are landing a workspace-facing fea - **`tools/check-template-parity.sh`** (this repo) — ensures `install.sh` and `start.sh` in a template repo forward identical sets of provider keys. Wire into each template repo's CI as `bash $MONOREPO/tools/check-template-parity.sh install.sh start.sh`. - **Contract tests** (stub) — `workspace-server/internal/provisioner/backend_contract_test.go` defines the behaviors every `provisioner.Provisioner` implementation must satisfy. Fails compile when a method drifts between `Docker` and `CPProvisioner`. Scenario-level runs are `t.Skip`'d today pending drift risk #6 (see above) — compile-time assertions still catch method drift. +- **Source-level dispatcher pins** — `workspace_provision_auto_test.go` enforces the SoT pattern documented above: + - `TestNoCallSiteCallsDirectProvisionerExceptAuto` — no handler calls `.provisionWorkspace(` or `.provisionWorkspaceCP(` directly outside the dispatcher's allowlist. + - `TestNoCallSiteCallsBareStop` — no handler calls `.provisioner.Stop(` or `.cpProv.Stop(` directly outside the dispatcher's allowlist (strips Go comments before substring match so archaeology in code comments doesn't trip the gate). + - `TestNoBareBothNilCheck` — no production code uses `h.provisioner == nil && h.cpProv == nil`; must use `!h.HasProvisioner()`. + - `TestOrgImportGate_UsesHasProvisionerNotBareField` — pins the org-import provisioning gate against the bare-Docker-check shape that caused the 2026-05-05 hongming incident. ## How to update this doc When you land a feature that touches a handler dispatch on `h.cpProv != nil`, add or update the matching row. If you can't implement both backends in the same PR, mark the row `docker-only` or `ec2-only` and file an issue tracking the gap. + +### When you add a NEW dispatch site + +If you find yourself writing `if h.cpProv != nil { ... } else if h.provisioner != nil { ... }` for a new operation (Pause, Hibernate, Snapshot, etc.): + +1. Add a `WorkspaceAuto` method on `WorkspaceHandler` next to the existing dispatchers. Mirror the docstring shape: routing, no-backend fallback, ordering rationale. +2. Add a source-level pin in `workspace_provision_auto_test.go` — the bare-call shape your dispatcher replaces, fail when a handler reintroduces it. +3. Add a row to the matrix above with the dispatcher reference. +4. If your operation has retry semantics specific to a hot path, leave them in the original location for now and file a follow-up under #2799 — don't bake retry into the generic dispatcher unless every caller benefits. + +The pattern is "one dispatcher per verb." Don't fold every operation into `provisionWorkspaceAuto` — different verbs have different no-backend fallbacks (mark-failed for Start, no-op for Stop, false for Has). From ba63f76e10a1309022c0e8def3dcfe1fe21996b6 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Mon, 4 May 2026 20:33:13 -0700 Subject: [PATCH 08/43] feat(canvas/terminal): not-available banner for runtimes without a TTY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-fix TerminalTab tried to open /ws/terminal/ for every workspace including external ones (which have no shell endpoint on the workspace-server). The server returned 404, status flipped to "error", the user saw "Connection failed" with a Reconnect button — reading as a bug when really the runtime intentionally has no TTY. Now: when data.runtime is in RUNTIMES_WITHOUT_TERMINAL (currently just "external"), TerminalTab renders a NotAvailablePanel with a big terminal-off icon and a one-line explanation including the runtime name. The xterm + WebSocket dance is skipped entirely — no spurious 404s, no scary error UI, no Reconnect that can't help. The runtime is determined from the data prop now threaded by SidePanel.tsx (existing pattern for ChatTab/ConfigTab/etc). Tests: 4 new in TerminalTab.notAvailable.test.tsx pin: external renders banner with runtime name, external doesn't open WS, claude- code mounts normally (regression cover for the early-return scope), data omitted falls through (back-compat). Build clean. 1258 tests pass. --- canvas/src/components/SidePanel.tsx | 2 +- canvas/src/components/tabs/TerminalTab.tsx | 91 ++++++++++++++- .../TerminalTab.notAvailable.test.tsx | 107 ++++++++++++++++++ 3 files changed, 198 insertions(+), 2 deletions(-) create mode 100644 canvas/src/components/tabs/__tests__/TerminalTab.notAvailable.test.tsx diff --git a/canvas/src/components/SidePanel.tsx b/canvas/src/components/SidePanel.tsx index 437aff90..2db6c4a1 100644 --- a/canvas/src/components/SidePanel.tsx +++ b/canvas/src/components/SidePanel.tsx @@ -283,7 +283,7 @@ export function SidePanel() { {panelTab === "skills" && } {panelTab === "activity" && } {panelTab === "chat" && } - {panelTab === "terminal" && } + {panelTab === "terminal" && } {panelTab === "config" && } {panelTab === "schedule" && } {panelTab === "channels" && } diff --git a/canvas/src/components/tabs/TerminalTab.tsx b/canvas/src/components/tabs/TerminalTab.tsx index 1c2c2027..43fc8e04 100644 --- a/canvas/src/components/tabs/TerminalTab.tsx +++ b/canvas/src/components/tabs/TerminalTab.tsx @@ -1,16 +1,105 @@ "use client"; import { useEffect, useRef, useState, useCallback } from "react"; +import type { WorkspaceNodeData } from "@/store/canvas"; interface Props { workspaceId: string; + /** Workspace metadata from the canvas store. Optional for back-compat + * with any caller that still mounts + * without threading data through (e.g. tests). When present, the + * runtime field gates the early-return below. */ + data?: WorkspaceNodeData; } import { deriveWsBaseUrl } from "@/lib/ws-url"; const WS_URL = deriveWsBaseUrl(); -export function TerminalTab({ workspaceId }: Props) { +/** + * NotAvailablePanel — full-tab placeholder with a big terminal-off icon + * for runtimes that don't expose a TTY (e.g. external workspaces, where + * the platform doesn't own the process). Pre-fix the tab tried to open + * a WebSocket against /ws/terminal/ for these workspaces, the server + * 404'd, and the user saw "Connection failed" — which reads as a bug, + * not as "this runtime intentionally has no shell". This banner makes + * the absence intentional. + */ +function NotAvailablePanel({ runtime }: { runtime: string }) { + return ( +
+ {/* Big terminal-off icon — bracket "[_]" with a slash through it. + Custom inline SVG so we don't depend on an icon set being + present at canvas build-time. */} + +

Terminal not available

+

+ This workspace runs the{" "} + {runtime} runtime, + which doesn't expose a shell. Use the Chat tab to interact with the + agent directly. +

+
+ ); +} + +/** Runtimes that don't expose a TTY. Keep narrow — only add a runtime + * here when its provisioner genuinely has no shell endpoint, otherwise + * the user loses access to a real debugging surface. */ +const RUNTIMES_WITHOUT_TERMINAL = new Set(["external"]); + +export function TerminalTab({ workspaceId, data }: Props) { + // Early-return for runtimes that have no shell. Skips the entire + // xterm + WebSocket dance below — without this, mounting the tab + // for an external workspace pops the WS, gets a 404 from the + // workspace-server (no /ws/terminal/ route registered for it), + // and shows "Connection failed" with a Reconnect button — confusing + // because the workspace IS healthy, just doesn't have a TTY. + if (data && RUNTIMES_WITHOUT_TERMINAL.has(data.runtime)) { + return ; + } + const containerRef = useRef(null); const termRef = useRef<{ dispose: () => void } | null>(null); const wsRef = useRef(null); diff --git a/canvas/src/components/tabs/__tests__/TerminalTab.notAvailable.test.tsx b/canvas/src/components/tabs/__tests__/TerminalTab.notAvailable.test.tsx new file mode 100644 index 00000000..df955a46 --- /dev/null +++ b/canvas/src/components/tabs/__tests__/TerminalTab.notAvailable.test.tsx @@ -0,0 +1,107 @@ +// @vitest-environment jsdom +// +// Pins the "Terminal not available" early-return added 2026-05-05. +// +// Pre-fix: TerminalTab tried to open /ws/terminal/ for every +// workspace including external runtimes (which have no shell endpoint). +// The server returned 404, status flipped to "error", user saw +// "Connection failed" with a Reconnect button — reading as a bug +// when really the runtime intentionally has no TTY. Now: when +// data.runtime is in RUNTIMES_WITHOUT_TERMINAL, render a banner + +// big icon instead of mounting xterm/WS. +// +// Pinned branches: +// 1. external runtime → "Terminal not available" banner renders, +// runtime name surfaces in the body so the user knows WHY. +// 2. external runtime → xterm + WebSocket are NOT initialised. +// Verified by checking the global WebSocket constructor isn't +// called. +// 3. claude-code (or any other runtime) → no banner, normal mount +// proceeds. Pre-fix regression cover. +// 4. data prop omitted (back-compat with any caller that doesn't +// thread it through) → no early-return, falls through to normal +// mount. Tested via the absence of the banner. + +import { describe, it, expect, vi, beforeEach, afterEach } from "vitest"; +import { render, screen, cleanup } from "@testing-library/react"; +import React from "react"; + +afterEach(cleanup); + +// xterm + addon-fit are dynamically imported by TerminalTab. Stub them +// so the tests don't pull a 200KB+ dependency just to verify the +// not-available banner. The stubs only matter for the non-banner +// branches; the banner returns BEFORE the dynamic import. +vi.mock("xterm", () => ({ + Terminal: vi.fn().mockImplementation(() => ({ + loadAddon: vi.fn(), + open: vi.fn(), + onData: vi.fn(), + write: vi.fn(), + dispose: vi.fn(), + onResize: vi.fn(), + cols: 80, + rows: 24, + })), +})); +vi.mock("@xterm/addon-fit", () => ({ + FitAddon: vi.fn().mockImplementation(() => ({ + fit: vi.fn(), + })), +})); + +// Track WebSocket constructor calls — this is the load-bearing +// assertion for "external doesn't even try to connect". +let wsConstructed = 0; +beforeEach(() => { + wsConstructed = 0; + (globalThis as unknown as { WebSocket: unknown }).WebSocket = vi + .fn() + .mockImplementation(() => { + wsConstructed++; + return { + addEventListener: vi.fn(), + removeEventListener: vi.fn(), + send: vi.fn(), + close: vi.fn(), + readyState: 0, + }; + }); +}); + +import { TerminalTab } from "../TerminalTab"; + +const externalData = { runtime: "external", status: "online" } as unknown as Parameters< + typeof TerminalTab +>[0]["data"]; + +const claudeData = { runtime: "claude-code", status: "online" } as unknown as Parameters< + typeof TerminalTab +>[0]["data"]; + +describe("TerminalTab not-available early-return for runtimes without TTY", () => { + it("external runtime renders the not-available banner with runtime name", () => { + render(); + expect(screen.getByText(/Terminal not available/i)).not.toBeNull(); + // Runtime name surfaces so user knows WHY there's no terminal. + expect(screen.getByText(/external/)).not.toBeNull(); + }); + + it("external runtime does NOT open a WebSocket", async () => { + render(); + // Wait a tick for any deferred init (there shouldn't be any, but + // tolerate a microtask boundary). + await new Promise((r) => setTimeout(r, 0)); + expect(wsConstructed).toBe(0); + }); + + it("claude-code runtime does NOT render the banner (normal mount)", () => { + render(); + expect(screen.queryByText(/Terminal not available/i)).toBeNull(); + }); + + it("data prop omitted falls through to normal mount (back-compat)", () => { + render(); + expect(screen.queryByText(/Terminal not available/i)).toBeNull(); + }); +}); From 20f76c4fdfcf99893ed6954344d8158b726121c0 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Mon, 4 May 2026 20:38:37 -0700 Subject: [PATCH 09/43] fix(canvas/chat): stable IntersectionObserver + inflight guard for loadOlder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Self-review of the lazy-load PR caught three Important findings: 1. IO observer was re-armed on every messages change. The previous loadOlder useCallback depended on `messages`, so every live agent push recreated it → re-ran the IO useEffect → tore down + re-armed the observer. In a perf PR shipping to chat-heavy users, that's the wrong direction. Fix: refs for the captured state (oldestMessageRef, hasMoreRef), narrow loadOlder deps to [workspaceId], and gate the IO effect on `messages.length > 0` (boolean) instead of `messages` so it arms exactly once when data first lands and stays armed across appends. 2. loadingOlder setState race. Two IO callbacks dispatched in the same microtask (fast scroll, layout shift) could both pass the `if (loadingOlder)` guard before React committed setLoadingOlder. Fix: synchronous inflightRef set BEFORE any await, cleared in finally; loadingOlder state stays for the UI label only. 3. Retry-button onClick duplicated the mount-effect body. Single loadInitial() callback now serves both, eliminating the drift hazard. Coverage: - 4 new tests bring the file to 8/8 (was 4): - loadOlder fetches with limit=20 and before_ts=oldest.timestamp - inflight guard rejects three concurrent IO triggers while a deferred fetch is in flight (asserts call count stays at 2, not 5) - empty older response unmounts the sentinel (proxy for the anchor-clearing branch in loadOlder) - IO observer instance survives three subsequent prepends — same object reference both before and after, no churn - Both behavioural tests verified to FAIL on the prior code (stashed ChatTab.tsx, ran them alone, confirmed both red), then PASS on this commit. Pinning real regressions, not tautologies. - IntersectionObserver fake captures instances + exposes triggerIntersection() so the IO callback can be driven directly from jsdom (no real layout / scrolling needed). Test: vitest run src/components/tabs/__tests__/ → 39 passed. Co-Authored-By: Claude Opus 4.7 (1M context) --- canvas/src/components/tabs/ChatTab.tsx | 129 ++++++++---- .../__tests__/ChatTab.lazyHistory.test.tsx | 189 +++++++++++++++++- 2 files changed, 268 insertions(+), 50 deletions(-) diff --git a/canvas/src/components/tabs/ChatTab.tsx b/canvas/src/components/tabs/ChatTab.tsx index af6e8b63..7d7fcf15 100644 --- a/canvas/src/components/tabs/ChatTab.tsx +++ b/canvas/src/components/tabs/ChatTab.tsx @@ -291,18 +291,31 @@ function MyChatPanel({ workspaceId, data }: Props) { // - topRef = sentinel above the messages list; IO observes it // and triggers loadOlder() when it enters view // - hasMore = false once a fetch returns < limit rows; stops IO - // - loadingOlder = guards against duplicate loadOlder() calls while - // one is already in flight (fast scroll-flick) + // - loadingOlder = drives the "Loading older messages…" UI label + // - inflightRef = synchronous guard against double-entry of loadOlder + // when the IO callback fires twice in the same + // microtask (state-based guard would be stale until + // the next React commit) // - scrollAnchorRef = saves distance-from-bottom before a prepend // so the useLayoutEffect below can restore the // user's exact viewport position. Without this, // prepending older messages would jump the scroll // position by the height of the new content. + // - oldestMessageRef / hasMoreRef = let the loadOlder closure read + // the latest values without taking them as deps — + // every live agent push mutates `messages`, and + // having loadOlder depend on `messages` would tear + // down + re-arm the IntersectionObserver on every + // push. Refs decouple the observer lifecycle from + // message-list updates. const containerRef = useRef(null); const topRef = useRef(null); const [hasMore, setHasMore] = useState(true); const [loadingOlder, setLoadingOlder] = useState(false); + const inflightRef = useRef(false); const scrollAnchorRef = useRef<{ savedDistanceFromBottom: number } | null>(null); + const oldestMessageRef = useRef(null); + const hasMoreRef = useRef(true); // Files the user has picked but not yet sent. Cleared on send // (upload success) or by the × on each pill. const [pendingFiles, setPendingFiles] = useState([]); @@ -341,13 +354,11 @@ function MyChatPanel({ workspaceId, data }: Props) { sendInFlightRef.current = false; }, []); - // Load chat history from database on mount. - // Initial load is bounded to INITIAL_HISTORY_LIMIT (newest 10) — the - // rest streams in as the user scrolls up via loadOlder() below. Pre- - // 2026-05-05 this fetched the newest 50 in one shot; on a long-running - // workspace that meant 50× message-bubble paint + DOM cost on every - // tab-open even when the user only wanted to read the last few. - useEffect(() => { + // Initial-load fetch — used by the mount effect and the "Retry" + // button below. Single source of truth so the two paths can't drift + // (e.g. INITIAL_HISTORY_LIMIT bumped in the effect but not the + // retry, leading to inconsistent first-paint sizes). + const loadInitial = useCallback(() => { setLoading(true); setLoadError(null); setHasMore(true); @@ -361,16 +372,41 @@ function MyChatPanel({ workspaceId, data }: Props) { ); }, [workspaceId]); - // Fetch the next-older batch and prepend. Caller responsibility: - // already check loadingOlder + hasMore (we re-check defensively for - // race-safety against the IO callback firing twice). + // Load chat history on mount / workspace switch. + // Initial load is bounded to INITIAL_HISTORY_LIMIT (newest 10) — the + // rest streams in as the user scrolls up via loadOlder() below. Pre- + // 2026-05-05 this fetched the newest 50 in one shot; on a long-running + // workspace that meant 50× message-bubble paint + DOM cost on every + // tab-open even when the user only wanted to read the last few. + useEffect(() => { + loadInitial(); + }, [loadInitial]); + + // Mirror the latest oldest-message + hasMore into refs so loadOlder + // can read them without taking `messages` as a dep. Every live push + // through agentMessages would otherwise recreate loadOlder and tear + // down the IO observer. + useEffect(() => { + oldestMessageRef.current = messages[0] ?? null; + }, [messages]); + useEffect(() => { + hasMoreRef.current = hasMore; + }, [hasMore]); + + // Fetch the next-older batch and prepend. Stable identity (deps = + // [workspaceId]) so the IntersectionObserver effect below doesn't + // re-arm on every messages update. const loadOlder = useCallback(async () => { - if (loadingOlder || !hasMore) return; - if (messages.length === 0) return; - const oldest = messages[0]; + // inflightRef is the load-bearing guard — synchronous, set BEFORE + // any await, so two IO callbacks dispatched in the same microtask + // can't both pass. The state checks are defensive secondary + // gates for the slow-scroll case. + if (inflightRef.current || !hasMoreRef.current) return; + const oldest = oldestMessageRef.current; if (!oldest) return; const container = containerRef.current; if (!container) return; + inflightRef.current = true; // Capture the user's distance-from-bottom BEFORE we prepend so the // useLayoutEffect can restore it after the new DOM lands. Without // this anchor, the user reading mid-history would get yanked @@ -379,26 +415,45 @@ function MyChatPanel({ workspaceId, data }: Props) { savedDistanceFromBottom: container.scrollHeight - container.scrollTop, }; setLoadingOlder(true); - const { messages: older, reachedEnd } = await loadMessagesFromDB( - workspaceId, - OLDER_HISTORY_BATCH, - oldest.timestamp, - ); - if (older.length > 0) { - setMessages((prev) => [...older, ...prev]); - } else { - // Nothing came back — clear the anchor so the next paint doesn't - // try to "restore" against a no-op prepend. - scrollAnchorRef.current = null; + try { + const { messages: older, reachedEnd } = await loadMessagesFromDB( + workspaceId, + OLDER_HISTORY_BATCH, + oldest.timestamp, + ); + if (older.length > 0) { + setMessages((prev) => [...older, ...prev]); + } else { + // Nothing came back — clear the anchor so the next paint doesn't + // try to "restore" against a no-op prepend. + scrollAnchorRef.current = null; + } + setHasMore(!reachedEnd); + } finally { + setLoadingOlder(false); + inflightRef.current = false; } - setHasMore(!reachedEnd); - setLoadingOlder(false); - }, [workspaceId, messages, loadingOlder, hasMore]); + }, [workspaceId]); // IntersectionObserver on the top sentinel. Fires loadOlder() the // moment the user scrolls within 200px of the top. AbortController // unwires cleanly on workspace switch / unmount; root is the // scrollable container so we observe only what's visible inside it. + // + // Dependencies: + // - loadOlder — stable per workspaceId (refs decouple it from + // message updates), so this dep is here for the + // workspace-switch case only + // - hasMore — re-run when older history runs out so we + // disconnect cleanly + // - hasMessages — load-bearing: the sentinel JSX is gated on + // `messages.length > 0`, so topRef.current is null + // on the empty-messages render. We re-arm exactly + // once when messages first land. NOT depending on + // `messages.length` (or `messages`) directly so + // each subsequent message append doesn't tear down + // + re-arm the observer. + const hasMessages = messages.length > 0; useEffect(() => { const top = topRef.current; const container = containerRef.current; @@ -415,7 +470,7 @@ function MyChatPanel({ workspaceId, data }: Props) { io.observe(top); ac.signal.addEventListener("abort", () => io.disconnect()); return () => ac.abort(); - }, [loadOlder, hasMore]); + }, [loadOlder, hasMore, hasMessages]); // Agent reachability useEffect(() => { @@ -873,19 +928,7 @@ function MyChatPanel({ workspaceId, data }: Props) { Failed to load chat history: {loadError}

+ ))} +
+ )} +
+ + {/* Namespace */} +
+ + setNamespace(e.target.value)} + placeholder="general" + className="w-full bg-surface border border-line/60 focus:border-accent/60 rounded px-2 py-1.5 text-[12px] text-ink placeholder-zinc-600 focus:outline-none transition-colors" + /> +
+ + {/* Content */} +
+ +