diff --git a/workspace-server/internal/handlers/restart_template.go b/workspace-server/internal/handlers/restart_template.go new file mode 100644 index 00000000..57193fad --- /dev/null +++ b/workspace-server/internal/handlers/restart_template.go @@ -0,0 +1,96 @@ +package handlers + +import ( + "log" + "os" + "path/filepath" +) + +// restartTemplateInput is the subset of the /workspaces/:id/restart request +// body that affects which config source the provisioner uses. Extracted as +// a type so `resolveRestartTemplate` has a single pure-function signature +// for unit tests — no gin context, no DB, no filesystem writes. +type restartTemplateInput struct { + // Template is an explicit template dir name from the request body. + // Always honoured when resolvable — caller asked by name, that's + // unambiguous consent to overwrite the config volume. + Template string + // ApplyTemplate opts the caller in to name-based auto-match AND the + // runtime-default fallback. Without this flag a restart MUST NOT + // overwrite the user's config volume — a user who edited their + // model/provider/skills/prompts via the Canvas Config tab and hit + // Save+Restart expects their edits to survive. The previous behaviour + // (name-based auto-match unconditionally) silently reverted edits for + // any workspace whose name matched a template dir (e.g. "Hermes Agent" + // → hermes/), which is the regression this fix closes. + ApplyTemplate bool + // RebuildConfig (#239) is the recovery signal used when the workspace's + // config volume was destroyed out-of-band. Tries org-templates as a + // last-resort source so the workspace can self-heal without admin + // intervention. Orthogonal to ApplyTemplate. + RebuildConfig bool +} + +// resolveRestartTemplate chooses the config source for a restart in the +// documented priority order: +// +// 1. Explicit `Template` from the request body (always honoured). +// 2. `ApplyTemplate=true` → name-based auto-match via findTemplateByName. +// 3. `RebuildConfig=true` → org-templates recovery fallback (#239). +// 4. `ApplyTemplate=true` + non-empty dbRuntime → runtime-default template +// (e.g. `hermes-default/`) for runtime-change workflows. +// 5. Fall through → empty path + "existing-volume" label. Provisioner +// reuses the workspace's existing config volume from the previous run. +// +// Returns (templatePath, configLabel). An empty templatePath is the signal +// to the provisioner that the existing volume is authoritative — the flow +// that preserves user edits. +// +// Pure function: no writes, no DB access, no network. Safe to unit-test +// with just a temp directory. +func resolveRestartTemplate(configsDir, wsName, dbRuntime string, body restartTemplateInput) (templatePath, configLabel string) { + template := body.Template + + // Tier 2: name-based auto-match, gated on ApplyTemplate. + if template == "" && body.ApplyTemplate { + template = findTemplateByName(configsDir, wsName) + } + + // Tier 1 + 2 resolve via the same code path — validate + stat. + if template != "" { + candidatePath, resolveErr := resolveInsideRoot(configsDir, template) + if resolveErr != nil { + log.Printf("Restart: invalid template %q: %v — proceeding without it", template, resolveErr) + template = "" + } else if _, err := os.Stat(candidatePath); err == nil { + return candidatePath, template + } else { + log.Printf("Restart: template %q dir not found — proceeding without it", template) + } + } + + // Tier 3: #239 rebuild_config — org-templates as last-resort recovery. + if body.RebuildConfig { + if p, label := resolveOrgTemplate(configsDir, wsName); p != "" { + log.Printf("Restart: rebuild_config — using org-template %s (%s)", label, wsName) + return p, label + } + } + + // Tier 4: runtime-default — apply_template=true + known runtime. + // Use case: Canvas Config tab changed the runtime; we need the new + // runtime's base files (entry point, Dockerfile, skill scaffolding) + // because the existing volume was written by the old runtime. + if body.ApplyTemplate && dbRuntime != "" { + runtimeTemplate := filepath.Join(configsDir, dbRuntime+"-default") + if _, err := os.Stat(runtimeTemplate); err == nil { + label := dbRuntime + "-default" + log.Printf("Restart: applying template %s (runtime change)", label) + return runtimeTemplate, label + } + } + + // Tier 5: reuse existing volume. This is the default, and the path + // the Canvas Save+Restart flow MUST hit to preserve user edits. + return "", "existing-volume" +} diff --git a/workspace-server/internal/handlers/restart_template_test.go b/workspace-server/internal/handlers/restart_template_test.go new file mode 100644 index 00000000..6c44b856 --- /dev/null +++ b/workspace-server/internal/handlers/restart_template_test.go @@ -0,0 +1,178 @@ +package handlers + +import ( + "os" + "path/filepath" + "testing" +) + +// Tests for resolveRestartTemplate — the pure helper that implements the +// priority chain documented on the function. Each test builds a minimal +// temp configsDir, fabricates the specific precondition it exercises, +// and asserts (templatePath, configLabel). +// +// The regression this suite locks in: a default restart (no flags) must +// never auto-apply a template that happens to match the workspace name. +// That was the "model reverts on Save+Restart" bug from +// fix/restart-preserves-user-config. + +// newTemplateDir makes a templates root with named subdirs, each holding +// a minimal config.yaml so findTemplateByName's dir-scan path has +// something to read. Returns the absolute root. +func newTemplateDir(t *testing.T, names ...string) string { + t.Helper() + root := t.TempDir() + for _, n := range names { + dir := filepath.Join(root, n) + if err := os.MkdirAll(dir, 0o755); err != nil { + t.Fatalf("mkdir %s: %v", dir, err) + } + cfg := filepath.Join(dir, "config.yaml") + if err := os.WriteFile(cfg, []byte("name: "+n+"\n"), 0o644); err != nil { + t.Fatalf("write %s: %v", cfg, err) + } + } + return root +} + +// TestResolveRestartTemplate_DefaultRestart_PreservesVolume is the +// regression test for the Canvas Save+Restart bug. A workspace named +// "Hermes Agent" normalises to "hermes-agent" — no dir match — but the +// findTemplateByName second pass would also scan config.yaml's `name:` +// field. We seed a template whose config.yaml DOES have the matching +// name, exactly the worst case. Without apply_template, the helper +// MUST still return empty templatePath. +func TestResolveRestartTemplate_DefaultRestart_PreservesVolume(t *testing.T) { + root := newTemplateDir(t, "hermes") + // Overwrite config.yaml so the name-scan would hit: + cfg := filepath.Join(root, "hermes", "config.yaml") + if err := os.WriteFile(cfg, []byte("name: Hermes Agent\n"), 0o644); err != nil { + t.Fatal(err) + } + + path, label := resolveRestartTemplate(root, "Hermes Agent", "hermes", restartTemplateInput{ + // ApplyTemplate intentionally omitted — this is the default restart. + }) + if path != "" { + t.Errorf("default restart must NOT resolve a template; got path=%q", path) + } + if label != "existing-volume" { + t.Errorf("expected 'existing-volume' label on default restart; got %q", label) + } +} + +// TestResolveRestartTemplate_ExplicitTemplate_AlwaysHonoured verifies +// that passing Template by name works regardless of ApplyTemplate — +// the caller named a template, that's unambiguous consent. +func TestResolveRestartTemplate_ExplicitTemplate_AlwaysHonoured(t *testing.T) { + root := newTemplateDir(t, "langgraph") + + path, label := resolveRestartTemplate(root, "Some Agent", "", restartTemplateInput{ + Template: "langgraph", + }) + if path == "" || label != "langgraph" { + t.Errorf("explicit template must resolve; got path=%q label=%q", path, label) + } +} + +// TestResolveRestartTemplate_ApplyTemplate_NameMatch verifies that +// setting ApplyTemplate re-enables the name-based auto-match for +// operators who actually want "reset this workspace to its template". +func TestResolveRestartTemplate_ApplyTemplate_NameMatch(t *testing.T) { + root := newTemplateDir(t, "hermes") + + path, label := resolveRestartTemplate(root, "Hermes", "", restartTemplateInput{ + ApplyTemplate: true, + }) + if path == "" || label != "hermes" { + t.Errorf("apply_template should name-match; got path=%q label=%q", path, label) + } +} + +// TestResolveRestartTemplate_ApplyTemplate_RuntimeDefault verifies the +// runtime-change flow: when the Canvas Config tab changes the runtime, +// the restart handler needs to lay down the new runtime's base files +// via `-default/`. Matches the existing behaviour comment. +func TestResolveRestartTemplate_ApplyTemplate_RuntimeDefault(t *testing.T) { + root := newTemplateDir(t, "langgraph-default") + + path, label := resolveRestartTemplate(root, "Some Workspace", "langgraph", restartTemplateInput{ + ApplyTemplate: true, + }) + if path == "" || label != "langgraph-default" { + t.Errorf("apply_template + dbRuntime should resolve runtime-default; got path=%q label=%q", path, label) + } +} + +// TestResolveRestartTemplate_ApplyTemplate_NoMatch_NoRuntime falls all +// the way through to the reuse-volume path when neither name nor +// runtime-default resolves. +func TestResolveRestartTemplate_ApplyTemplate_NoMatch_NoRuntime(t *testing.T) { + root := newTemplateDir(t) // empty templates dir + + path, label := resolveRestartTemplate(root, "Orphan", "", restartTemplateInput{ + ApplyTemplate: true, + }) + if path != "" { + t.Errorf("nothing to apply → expected empty path; got %q", path) + } + if label != "existing-volume" { + t.Errorf("expected 'existing-volume' fallback; got %q", label) + } +} + +// TestResolveRestartTemplate_InvalidExplicitTemplate_ProceedsWithout +// covers the defensive path where an explicit Template doesn't resolve +// to a valid dir (e.g. traversal attempt, deleted template). The helper +// must log + fall through, not crash or escape the root. +func TestResolveRestartTemplate_InvalidExplicitTemplate_ProceedsWithout(t *testing.T) { + root := newTemplateDir(t, "langgraph") + + path, label := resolveRestartTemplate(root, "Some Agent", "", restartTemplateInput{ + Template: "../../etc/passwd", + }) + if path != "" { + t.Errorf("traversal attempt must not resolve; got %q", path) + } + if label != "existing-volume" { + t.Errorf("expected 'existing-volume' fallback on invalid template; got %q", label) + } +} + +// TestResolveRestartTemplate_NonExistentExplicitTemplate mirrors the +// above but for a syntactically-valid name that simply doesn't exist +// on disk (e.g. template was manually deleted). Must fall through. +func TestResolveRestartTemplate_NonExistentExplicitTemplate(t *testing.T) { + root := newTemplateDir(t, "langgraph") + + path, label := resolveRestartTemplate(root, "Some Agent", "", restartTemplateInput{ + Template: "deleted-template", + }) + if path != "" { + t.Errorf("missing template must not resolve; got %q", path) + } + if label != "existing-volume" { + t.Errorf("expected 'existing-volume' fallback on missing template; got %q", label) + } +} + +// TestResolveRestartTemplate_Priority_ExplicitBeatsApplyTemplate proves +// that an explicit Template takes precedence over a name-based match. +// Scenario: workspace "Hermes" with ApplyTemplate=true + explicit +// Template="langgraph" — caller wants langgraph, not hermes. +func TestResolveRestartTemplate_Priority_ExplicitBeatsApplyTemplate(t *testing.T) { + root := newTemplateDir(t, "hermes", "langgraph") + + path, label := resolveRestartTemplate(root, "Hermes", "", restartTemplateInput{ + Template: "langgraph", + ApplyTemplate: true, + }) + if label != "langgraph" { + t.Errorf("explicit Template must win; got label=%q", label) + } + // Verify the path is actually inside the langgraph template dir + expected := filepath.Join(root, "langgraph") + if path != expected { + t.Errorf("expected path %q, got %q", expected, path) + } +} diff --git a/workspace-server/internal/handlers/workspace_restart.go b/workspace-server/internal/handlers/workspace_restart.go index 3228122d..9b3b2bfa 100644 --- a/workspace-server/internal/handlers/workspace_restart.go +++ b/workspace-server/internal/handlers/workspace_restart.go @@ -5,8 +5,6 @@ import ( "database/sql" "log" "net/http" - "os" - "path/filepath" "strings" "sync" "time" @@ -127,53 +125,11 @@ func (h *WorkspaceHandler) Restart(c *gin.Context) { } c.ShouldBindJSON(&body) - // Resolve template path in priority order: - // 1. Explicit template from request body - // 2. Runtime-specific default template (e.g. claude-code-default/) - // 3. Name-based match in templates directory - // 4. No template — the volume already has configs from previous run - var templatePath string - var configFiles map[string][]byte - configLabel := "existing-volume" - - template := body.Template - if template == "" { - template = findTemplateByName(h.configsDir, wsName) - } - if template != "" { - candidatePath, resolveErr := resolveInsideRoot(h.configsDir, template) - if resolveErr != nil { - log.Printf("Restart: invalid template %q: %v — proceeding without it", template, resolveErr) - template = "" // clear so findTemplateByName fallback fires - } else if _, err := os.Stat(candidatePath); err == nil { - templatePath = candidatePath - configLabel = template - } else { - log.Printf("Restart: template %q dir not found — proceeding without it", template) - } - } - - // #239: rebuild_config=true — try org-templates as last-resort source so a - // workspace with a destroyed config volume can self-recover without admin - // intervention. Only fires when no other template was resolved above. - if templatePath == "" && body.RebuildConfig { - if p, label := resolveOrgTemplate(h.configsDir, wsName); p != "" { - templatePath = p - configLabel = label - log.Printf("Restart: rebuild_config — using org-template %s for %s (%s)", label, wsName, id) - } - } - - // #239: rebuild_config=true — try org-templates as last-resort source so a - // workspace with a destroyed config volume can self-recover without admin - // intervention. Only fires when no other template was resolved above. - if templatePath == "" && body.RebuildConfig { - if p, label := resolveOrgTemplate(h.configsDir, wsName); p != "" { - templatePath = p - configLabel = label - log.Printf("Restart: rebuild_config — using org-template %s for %s (%s)", label, wsName, id) - } - } + templatePath, configLabel := resolveRestartTemplate(h.configsDir, wsName, dbRuntime, restartTemplateInput{ + Template: body.Template, + ApplyTemplate: body.ApplyTemplate, + RebuildConfig: body.RebuildConfig, + }) if templatePath == "" { log.Printf("Restart: reusing existing config volume for %s (%s)", wsName, id) @@ -181,21 +137,10 @@ func (h *WorkspaceHandler) Restart(c *gin.Context) { log.Printf("Restart: using template %s for %s (%s)", templatePath, wsName, id) } + var configFiles map[string][]byte payload := models.CreateWorkspacePayload{Name: wsName, Tier: tier, Runtime: containerRuntime} log.Printf("Restart: workspace %s (%s) runtime=%q", wsName, id, containerRuntime) - // Apply runtime-default template ONLY when explicitly requested via "apply_template": true. - // Use case: runtime was changed via Config tab — need new runtime's base files. - // Normal restarts preserve existing config volume (user's model, skills, prompts). - if templatePath == "" && body.ApplyTemplate && dbRuntime != "" { - runtimeTemplate := filepath.Join(h.configsDir, dbRuntime+"-default") - if _, err := os.Stat(runtimeTemplate); err == nil { - templatePath = runtimeTemplate - configLabel = dbRuntime + "-default" - log.Printf("Restart: applying template %s (runtime change)", configLabel) - } - } - // #12: ?reset=true (or body.Reset) discards the claude-sessions volume // before restart, giving the agent a clean /root/.claude/sessions dir. resetClaudeSession := c.Query("reset") == "true" || body.Reset