From da416caeca6511d19e42f6d2a12bd004042d28f6 Mon Sep 17 00:00:00 2001 From: Molecule AI Core-BE Date: Thu, 14 May 2026 21:27:52 +0000 Subject: [PATCH 1/2] fix(staging): restore goAsync tracking in 5 dispatch calls + move config seeding pre-Start Investigation of issue #1058 confirmed 3 regressions on staging (introduced by the OFFSEC-003 promotion PR #1059): 1. workspace_dispatchers.go (4 calls): provisionWorkspaceAuto and RestartWorkspaceAutoOpts used bare `go func()` instead of `h.goAsync(func() { ... })`, losing goroutine WaitGroup tracking. Restored h.goAsync on all 4 dispatch sites. 2. a2a_proxy.go (1 call): resolveAgentURL used bare `go h.RestartByID()` when waking a hibernated workspace. Restored h.goAsync wrapper. 3. provisioner.go: config seeding (CopyTemplateToContainer + WriteFilesToContainer) was placed AFTER ContainerStart with warning-level errors. Moved before ContainerStart with hard error + container cleanup on failure. molecule-runtime reads /configs immediately on start; a post-Start copy races into FileNotFoundError crash loops. All three changes are already present on main (PR #1041 cascade + later main advances). This PR brings staging to parity. Co-Authored-By: Claude Opus 4.7 --- .../internal/handlers/a2a_proxy.go | 2 +- .../handlers/workspace_dispatchers.go | 8 ++--- .../internal/provisioner/provisioner.go | 30 ++++++++++--------- 3 files changed, 21 insertions(+), 19 deletions(-) diff --git a/workspace-server/internal/handlers/a2a_proxy.go b/workspace-server/internal/handlers/a2a_proxy.go index 5737b156..fd94c8ea 100644 --- a/workspace-server/internal/handlers/a2a_proxy.go +++ b/workspace-server/internal/handlers/a2a_proxy.go @@ -645,7 +645,7 @@ func (h *WorkspaceHandler) resolveAgentURL(ctx context.Context, workspaceID stri // the caller can retry once the workspace is back online (~10s). if status == "hibernated" { log.Printf("ProxyA2A: waking hibernated workspace %s", workspaceID) - go h.RestartByID(workspaceID) + h.goAsync(func() { h.RestartByID(workspaceID) }) return "", &proxyA2AError{ Status: http.StatusServiceUnavailable, Headers: map[string]string{"Retry-After": "15"}, diff --git a/workspace-server/internal/handlers/workspace_dispatchers.go b/workspace-server/internal/handlers/workspace_dispatchers.go index 3df25877..03f8e579 100644 --- a/workspace-server/internal/handlers/workspace_dispatchers.go +++ b/workspace-server/internal/handlers/workspace_dispatchers.go @@ -111,11 +111,11 @@ func (h *WorkspaceHandler) provisionWorkspaceAuto(workspaceID, templatePath stri "sync": false, }) if h.cpProv != nil { - go h.provisionWorkspaceCP(workspaceID, templatePath, configFiles, payload) + h.goAsync(func() { h.provisionWorkspaceCP(workspaceID, templatePath, configFiles, payload) }) return true } if h.provisioner != nil { - go h.provisionWorkspace(workspaceID, templatePath, configFiles, payload) + h.goAsync(func() { h.provisionWorkspace(workspaceID, templatePath, configFiles, payload) }) return true } // No backend wired — mark failed so the workspace doesn't linger in @@ -275,13 +275,13 @@ func (h *WorkspaceHandler) RestartWorkspaceAutoOpts(ctx context.Context, workspa if h.cpProv != nil { h.cpStopWithRetry(ctx, workspaceID, "RestartWorkspaceAuto") // resetClaudeSession is Docker-only — CP has no session state to clear. - go h.provisionWorkspaceCP(workspaceID, templatePath, configFiles, payload) + h.goAsync(func() { h.provisionWorkspaceCP(workspaceID, templatePath, configFiles, payload) }) return true } if h.provisioner != nil { // Docker.Stop has no retry — see docstring rationale. h.provisioner.Stop(ctx, workspaceID) - go h.provisionWorkspaceOpts(workspaceID, templatePath, configFiles, payload, resetClaudeSession) + h.goAsync(func() { h.provisionWorkspaceOpts(workspaceID, templatePath, configFiles, payload, resetClaudeSession) }) return true } // No backend wired — same shape as provisionWorkspaceAuto's no-backend diff --git a/workspace-server/internal/provisioner/provisioner.go b/workspace-server/internal/provisioner/provisioner.go index d50ad06b..4c19c204 100644 --- a/workspace-server/internal/provisioner/provisioner.go +++ b/workspace-server/internal/provisioner/provisioner.go @@ -481,6 +481,22 @@ func (p *Provisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string, e return "", fmt.Errorf("failed to create container: %w", err) } + // Seed /configs before the entrypoint starts. molecule-runtime reads + // /configs/config.yaml immediately; post-start copy races fast runtimes + // into a FileNotFoundError crash loop. + if cfg.TemplatePath != "" { + if err := p.CopyTemplateToContainer(ctx, resp.ID, cfg.TemplatePath); err != nil { + _ = p.cli.ContainerRemove(ctx, resp.ID, container.RemoveOptions{Force: true}) + return "", fmt.Errorf("failed to copy template to container %s before start: %w", name, err) + } + } + if len(cfg.ConfigFiles) > 0 { + if err := p.WriteFilesToContainer(ctx, resp.ID, cfg.ConfigFiles); err != nil { + _ = p.cli.ContainerRemove(ctx, resp.ID, container.RemoveOptions{Force: true}) + return "", fmt.Errorf("failed to write config files to container %s before start: %w", name, err) + } + } + if err := p.cli.ContainerStart(ctx, resp.ID, container.StartOptions{}); err != nil { // Clean up created container on start failure _ = p.cli.ContainerRemove(ctx, resp.ID, container.RemoveOptions{Force: true}) @@ -496,20 +512,6 @@ func (p *Provisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string, e // /configs and /workspace, then drops to agent via gosu). No per-start // chown needed here. - // Copy template files into /configs if TemplatePath is set - if cfg.TemplatePath != "" { - if err := p.CopyTemplateToContainer(ctx, resp.ID, cfg.TemplatePath); err != nil { - log.Printf("Provisioner: warning — failed to copy template to container %s: %v", name, err) - } - } - - // Write generated config files into /configs if ConfigFiles is set - if len(cfg.ConfigFiles) > 0 { - if err := p.WriteFilesToContainer(ctx, resp.ID, cfg.ConfigFiles); err != nil { - log.Printf("Provisioner: warning — failed to write config files to container %s: %v", name, err) - } - } - // Resolve the host-mapped port. Retry inspect up to 3 times if Docker hasn't // bound the ephemeral port yet (rare race under heavy load). hostURL := InternalURL(cfg.WorkspaceID) // fallback to Docker-internal From 2751861b0403212886b6d12a55d41e0ea6b7a45a Mon Sep 17 00:00:00 2001 From: Molecule AI Core-BE Date: Thu, 14 May 2026 22:37:56 +0000 Subject: [PATCH 2/2] fix(staging): add goAsync method + asyncWG field to WorkspaceHandler Cherry-picks the goAsync definition from main commit 1c3b4ff3 so that PR #1076's 5 goAsync(...) call sites compile on staging. core-devops correctly identified that h.goAsync was called at 5 sites but never defined on the staging branch. Without this, the build fails. fixes #1076 review feedback Co-Authored-By: Claude Opus 4.7 --- workspace-server/internal/handlers/workspace.go | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/workspace-server/internal/handlers/workspace.go b/workspace-server/internal/handlers/workspace.go index b674836b..ff97728b 100644 --- a/workspace-server/internal/handlers/workspace.go +++ b/workspace-server/internal/handlers/workspace.go @@ -15,6 +15,7 @@ import ( "os" "path/filepath" "strings" + "sync" "time" "github.com/Molecule-AI/molecule-monorepo/platform/internal/crypto" @@ -73,6 +74,22 @@ type WorkspaceHandler struct { // memory plugin). main.go sets this to plugin.DeleteNamespace // when MEMORY_PLUGIN_URL is configured. namespaceCleanupFn func(ctx context.Context, workspaceID string) + // asyncWG tracks goroutines launched by goAsync so tests can wait + // for async DB users (restart, provision) before asserting results. + // Matches the pattern from main commit 1c3b4ff3. + asyncWG sync.WaitGroup +} + +func (h *WorkspaceHandler) goAsync(fn func()) { + h.asyncWG.Add(1) + go func() { + defer h.asyncWG.Done() + fn() + }() +} + +func (h *WorkspaceHandler) waitAsyncForTest() { + h.asyncWG.Wait() } func NewWorkspaceHandler(b events.EventEmitter, p *provisioner.Provisioner, platformURL, configsDir string) *WorkspaceHandler {