forked from molecule-ai/molecule-core
Merge pull request #1888 from Molecule-AI/fix/restart-preserves-user-config
fix(restart): preserve user config volume on default restart (#1822 drift-risk-3)
This commit is contained in:
commit
8ef0b653bd
96
workspace-server/internal/handlers/restart_template.go
Normal file
96
workspace-server/internal/handlers/restart_template.go
Normal file
@ -0,0 +1,96 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
// restartTemplateInput is the subset of the /workspaces/:id/restart request
|
||||
// body that affects which config source the provisioner uses. Extracted as
|
||||
// a type so `resolveRestartTemplate` has a single pure-function signature
|
||||
// for unit tests — no gin context, no DB, no filesystem writes.
|
||||
type restartTemplateInput struct {
|
||||
// Template is an explicit template dir name from the request body.
|
||||
// Always honoured when resolvable — caller asked by name, that's
|
||||
// unambiguous consent to overwrite the config volume.
|
||||
Template string
|
||||
// ApplyTemplate opts the caller in to name-based auto-match AND the
|
||||
// runtime-default fallback. Without this flag a restart MUST NOT
|
||||
// overwrite the user's config volume — a user who edited their
|
||||
// model/provider/skills/prompts via the Canvas Config tab and hit
|
||||
// Save+Restart expects their edits to survive. The previous behaviour
|
||||
// (name-based auto-match unconditionally) silently reverted edits for
|
||||
// any workspace whose name matched a template dir (e.g. "Hermes Agent"
|
||||
// → hermes/), which is the regression this fix closes.
|
||||
ApplyTemplate bool
|
||||
// RebuildConfig (#239) is the recovery signal used when the workspace's
|
||||
// config volume was destroyed out-of-band. Tries org-templates as a
|
||||
// last-resort source so the workspace can self-heal without admin
|
||||
// intervention. Orthogonal to ApplyTemplate.
|
||||
RebuildConfig bool
|
||||
}
|
||||
|
||||
// resolveRestartTemplate chooses the config source for a restart in the
|
||||
// documented priority order:
|
||||
//
|
||||
// 1. Explicit `Template` from the request body (always honoured).
|
||||
// 2. `ApplyTemplate=true` → name-based auto-match via findTemplateByName.
|
||||
// 3. `RebuildConfig=true` → org-templates recovery fallback (#239).
|
||||
// 4. `ApplyTemplate=true` + non-empty dbRuntime → runtime-default template
|
||||
// (e.g. `hermes-default/`) for runtime-change workflows.
|
||||
// 5. Fall through → empty path + "existing-volume" label. Provisioner
|
||||
// reuses the workspace's existing config volume from the previous run.
|
||||
//
|
||||
// Returns (templatePath, configLabel). An empty templatePath is the signal
|
||||
// to the provisioner that the existing volume is authoritative — the flow
|
||||
// that preserves user edits.
|
||||
//
|
||||
// Pure function: no writes, no DB access, no network. Safe to unit-test
|
||||
// with just a temp directory.
|
||||
func resolveRestartTemplate(configsDir, wsName, dbRuntime string, body restartTemplateInput) (templatePath, configLabel string) {
|
||||
template := body.Template
|
||||
|
||||
// Tier 2: name-based auto-match, gated on ApplyTemplate.
|
||||
if template == "" && body.ApplyTemplate {
|
||||
template = findTemplateByName(configsDir, wsName)
|
||||
}
|
||||
|
||||
// Tier 1 + 2 resolve via the same code path — validate + stat.
|
||||
if template != "" {
|
||||
candidatePath, resolveErr := resolveInsideRoot(configsDir, template)
|
||||
if resolveErr != nil {
|
||||
log.Printf("Restart: invalid template %q: %v — proceeding without it", template, resolveErr)
|
||||
template = ""
|
||||
} else if _, err := os.Stat(candidatePath); err == nil {
|
||||
return candidatePath, template
|
||||
} else {
|
||||
log.Printf("Restart: template %q dir not found — proceeding without it", template)
|
||||
}
|
||||
}
|
||||
|
||||
// Tier 3: #239 rebuild_config — org-templates as last-resort recovery.
|
||||
if body.RebuildConfig {
|
||||
if p, label := resolveOrgTemplate(configsDir, wsName); p != "" {
|
||||
log.Printf("Restart: rebuild_config — using org-template %s (%s)", label, wsName)
|
||||
return p, label
|
||||
}
|
||||
}
|
||||
|
||||
// Tier 4: runtime-default — apply_template=true + known runtime.
|
||||
// Use case: Canvas Config tab changed the runtime; we need the new
|
||||
// runtime's base files (entry point, Dockerfile, skill scaffolding)
|
||||
// because the existing volume was written by the old runtime.
|
||||
if body.ApplyTemplate && dbRuntime != "" {
|
||||
runtimeTemplate := filepath.Join(configsDir, dbRuntime+"-default")
|
||||
if _, err := os.Stat(runtimeTemplate); err == nil {
|
||||
label := dbRuntime + "-default"
|
||||
log.Printf("Restart: applying template %s (runtime change)", label)
|
||||
return runtimeTemplate, label
|
||||
}
|
||||
}
|
||||
|
||||
// Tier 5: reuse existing volume. This is the default, and the path
|
||||
// the Canvas Save+Restart flow MUST hit to preserve user edits.
|
||||
return "", "existing-volume"
|
||||
}
|
||||
178
workspace-server/internal/handlers/restart_template_test.go
Normal file
178
workspace-server/internal/handlers/restart_template_test.go
Normal file
@ -0,0 +1,178 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// Tests for resolveRestartTemplate — the pure helper that implements the
|
||||
// priority chain documented on the function. Each test builds a minimal
|
||||
// temp configsDir, fabricates the specific precondition it exercises,
|
||||
// and asserts (templatePath, configLabel).
|
||||
//
|
||||
// The regression this suite locks in: a default restart (no flags) must
|
||||
// never auto-apply a template that happens to match the workspace name.
|
||||
// That was the "model reverts on Save+Restart" bug from
|
||||
// fix/restart-preserves-user-config.
|
||||
|
||||
// newTemplateDir makes a templates root with named subdirs, each holding
|
||||
// a minimal config.yaml so findTemplateByName's dir-scan path has
|
||||
// something to read. Returns the absolute root.
|
||||
func newTemplateDir(t *testing.T, names ...string) string {
|
||||
t.Helper()
|
||||
root := t.TempDir()
|
||||
for _, n := range names {
|
||||
dir := filepath.Join(root, n)
|
||||
if err := os.MkdirAll(dir, 0o755); err != nil {
|
||||
t.Fatalf("mkdir %s: %v", dir, err)
|
||||
}
|
||||
cfg := filepath.Join(dir, "config.yaml")
|
||||
if err := os.WriteFile(cfg, []byte("name: "+n+"\n"), 0o644); err != nil {
|
||||
t.Fatalf("write %s: %v", cfg, err)
|
||||
}
|
||||
}
|
||||
return root
|
||||
}
|
||||
|
||||
// TestResolveRestartTemplate_DefaultRestart_PreservesVolume is the
|
||||
// regression test for the Canvas Save+Restart bug. A workspace named
|
||||
// "Hermes Agent" normalises to "hermes-agent" — no dir match — but the
|
||||
// findTemplateByName second pass would also scan config.yaml's `name:`
|
||||
// field. We seed a template whose config.yaml DOES have the matching
|
||||
// name, exactly the worst case. Without apply_template, the helper
|
||||
// MUST still return empty templatePath.
|
||||
func TestResolveRestartTemplate_DefaultRestart_PreservesVolume(t *testing.T) {
|
||||
root := newTemplateDir(t, "hermes")
|
||||
// Overwrite config.yaml so the name-scan would hit:
|
||||
cfg := filepath.Join(root, "hermes", "config.yaml")
|
||||
if err := os.WriteFile(cfg, []byte("name: Hermes Agent\n"), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
path, label := resolveRestartTemplate(root, "Hermes Agent", "hermes", restartTemplateInput{
|
||||
// ApplyTemplate intentionally omitted — this is the default restart.
|
||||
})
|
||||
if path != "" {
|
||||
t.Errorf("default restart must NOT resolve a template; got path=%q", path)
|
||||
}
|
||||
if label != "existing-volume" {
|
||||
t.Errorf("expected 'existing-volume' label on default restart; got %q", label)
|
||||
}
|
||||
}
|
||||
|
||||
// TestResolveRestartTemplate_ExplicitTemplate_AlwaysHonoured verifies
|
||||
// that passing Template by name works regardless of ApplyTemplate —
|
||||
// the caller named a template, that's unambiguous consent.
|
||||
func TestResolveRestartTemplate_ExplicitTemplate_AlwaysHonoured(t *testing.T) {
|
||||
root := newTemplateDir(t, "langgraph")
|
||||
|
||||
path, label := resolveRestartTemplate(root, "Some Agent", "", restartTemplateInput{
|
||||
Template: "langgraph",
|
||||
})
|
||||
if path == "" || label != "langgraph" {
|
||||
t.Errorf("explicit template must resolve; got path=%q label=%q", path, label)
|
||||
}
|
||||
}
|
||||
|
||||
// TestResolveRestartTemplate_ApplyTemplate_NameMatch verifies that
|
||||
// setting ApplyTemplate re-enables the name-based auto-match for
|
||||
// operators who actually want "reset this workspace to its template".
|
||||
func TestResolveRestartTemplate_ApplyTemplate_NameMatch(t *testing.T) {
|
||||
root := newTemplateDir(t, "hermes")
|
||||
|
||||
path, label := resolveRestartTemplate(root, "Hermes", "", restartTemplateInput{
|
||||
ApplyTemplate: true,
|
||||
})
|
||||
if path == "" || label != "hermes" {
|
||||
t.Errorf("apply_template should name-match; got path=%q label=%q", path, label)
|
||||
}
|
||||
}
|
||||
|
||||
// TestResolveRestartTemplate_ApplyTemplate_RuntimeDefault verifies the
|
||||
// runtime-change flow: when the Canvas Config tab changes the runtime,
|
||||
// the restart handler needs to lay down the new runtime's base files
|
||||
// via `<runtime>-default/`. Matches the existing behaviour comment.
|
||||
func TestResolveRestartTemplate_ApplyTemplate_RuntimeDefault(t *testing.T) {
|
||||
root := newTemplateDir(t, "langgraph-default")
|
||||
|
||||
path, label := resolveRestartTemplate(root, "Some Workspace", "langgraph", restartTemplateInput{
|
||||
ApplyTemplate: true,
|
||||
})
|
||||
if path == "" || label != "langgraph-default" {
|
||||
t.Errorf("apply_template + dbRuntime should resolve runtime-default; got path=%q label=%q", path, label)
|
||||
}
|
||||
}
|
||||
|
||||
// TestResolveRestartTemplate_ApplyTemplate_NoMatch_NoRuntime falls all
|
||||
// the way through to the reuse-volume path when neither name nor
|
||||
// runtime-default resolves.
|
||||
func TestResolveRestartTemplate_ApplyTemplate_NoMatch_NoRuntime(t *testing.T) {
|
||||
root := newTemplateDir(t) // empty templates dir
|
||||
|
||||
path, label := resolveRestartTemplate(root, "Orphan", "", restartTemplateInput{
|
||||
ApplyTemplate: true,
|
||||
})
|
||||
if path != "" {
|
||||
t.Errorf("nothing to apply → expected empty path; got %q", path)
|
||||
}
|
||||
if label != "existing-volume" {
|
||||
t.Errorf("expected 'existing-volume' fallback; got %q", label)
|
||||
}
|
||||
}
|
||||
|
||||
// TestResolveRestartTemplate_InvalidExplicitTemplate_ProceedsWithout
|
||||
// covers the defensive path where an explicit Template doesn't resolve
|
||||
// to a valid dir (e.g. traversal attempt, deleted template). The helper
|
||||
// must log + fall through, not crash or escape the root.
|
||||
func TestResolveRestartTemplate_InvalidExplicitTemplate_ProceedsWithout(t *testing.T) {
|
||||
root := newTemplateDir(t, "langgraph")
|
||||
|
||||
path, label := resolveRestartTemplate(root, "Some Agent", "", restartTemplateInput{
|
||||
Template: "../../etc/passwd",
|
||||
})
|
||||
if path != "" {
|
||||
t.Errorf("traversal attempt must not resolve; got %q", path)
|
||||
}
|
||||
if label != "existing-volume" {
|
||||
t.Errorf("expected 'existing-volume' fallback on invalid template; got %q", label)
|
||||
}
|
||||
}
|
||||
|
||||
// TestResolveRestartTemplate_NonExistentExplicitTemplate mirrors the
|
||||
// above but for a syntactically-valid name that simply doesn't exist
|
||||
// on disk (e.g. template was manually deleted). Must fall through.
|
||||
func TestResolveRestartTemplate_NonExistentExplicitTemplate(t *testing.T) {
|
||||
root := newTemplateDir(t, "langgraph")
|
||||
|
||||
path, label := resolveRestartTemplate(root, "Some Agent", "", restartTemplateInput{
|
||||
Template: "deleted-template",
|
||||
})
|
||||
if path != "" {
|
||||
t.Errorf("missing template must not resolve; got %q", path)
|
||||
}
|
||||
if label != "existing-volume" {
|
||||
t.Errorf("expected 'existing-volume' fallback on missing template; got %q", label)
|
||||
}
|
||||
}
|
||||
|
||||
// TestResolveRestartTemplate_Priority_ExplicitBeatsApplyTemplate proves
|
||||
// that an explicit Template takes precedence over a name-based match.
|
||||
// Scenario: workspace "Hermes" with ApplyTemplate=true + explicit
|
||||
// Template="langgraph" — caller wants langgraph, not hermes.
|
||||
func TestResolveRestartTemplate_Priority_ExplicitBeatsApplyTemplate(t *testing.T) {
|
||||
root := newTemplateDir(t, "hermes", "langgraph")
|
||||
|
||||
path, label := resolveRestartTemplate(root, "Hermes", "", restartTemplateInput{
|
||||
Template: "langgraph",
|
||||
ApplyTemplate: true,
|
||||
})
|
||||
if label != "langgraph" {
|
||||
t.Errorf("explicit Template must win; got label=%q", label)
|
||||
}
|
||||
// Verify the path is actually inside the langgraph template dir
|
||||
expected := filepath.Join(root, "langgraph")
|
||||
if path != expected {
|
||||
t.Errorf("expected path %q, got %q", expected, path)
|
||||
}
|
||||
}
|
||||
@ -5,8 +5,6 @@ import (
|
||||
"database/sql"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@ -127,53 +125,11 @@ func (h *WorkspaceHandler) Restart(c *gin.Context) {
|
||||
}
|
||||
c.ShouldBindJSON(&body)
|
||||
|
||||
// Resolve template path in priority order:
|
||||
// 1. Explicit template from request body
|
||||
// 2. Runtime-specific default template (e.g. claude-code-default/)
|
||||
// 3. Name-based match in templates directory
|
||||
// 4. No template — the volume already has configs from previous run
|
||||
var templatePath string
|
||||
var configFiles map[string][]byte
|
||||
configLabel := "existing-volume"
|
||||
|
||||
template := body.Template
|
||||
if template == "" {
|
||||
template = findTemplateByName(h.configsDir, wsName)
|
||||
}
|
||||
if template != "" {
|
||||
candidatePath, resolveErr := resolveInsideRoot(h.configsDir, template)
|
||||
if resolveErr != nil {
|
||||
log.Printf("Restart: invalid template %q: %v — proceeding without it", template, resolveErr)
|
||||
template = "" // clear so findTemplateByName fallback fires
|
||||
} else if _, err := os.Stat(candidatePath); err == nil {
|
||||
templatePath = candidatePath
|
||||
configLabel = template
|
||||
} else {
|
||||
log.Printf("Restart: template %q dir not found — proceeding without it", template)
|
||||
}
|
||||
}
|
||||
|
||||
// #239: rebuild_config=true — try org-templates as last-resort source so a
|
||||
// workspace with a destroyed config volume can self-recover without admin
|
||||
// intervention. Only fires when no other template was resolved above.
|
||||
if templatePath == "" && body.RebuildConfig {
|
||||
if p, label := resolveOrgTemplate(h.configsDir, wsName); p != "" {
|
||||
templatePath = p
|
||||
configLabel = label
|
||||
log.Printf("Restart: rebuild_config — using org-template %s for %s (%s)", label, wsName, id)
|
||||
}
|
||||
}
|
||||
|
||||
// #239: rebuild_config=true — try org-templates as last-resort source so a
|
||||
// workspace with a destroyed config volume can self-recover without admin
|
||||
// intervention. Only fires when no other template was resolved above.
|
||||
if templatePath == "" && body.RebuildConfig {
|
||||
if p, label := resolveOrgTemplate(h.configsDir, wsName); p != "" {
|
||||
templatePath = p
|
||||
configLabel = label
|
||||
log.Printf("Restart: rebuild_config — using org-template %s for %s (%s)", label, wsName, id)
|
||||
}
|
||||
}
|
||||
templatePath, configLabel := resolveRestartTemplate(h.configsDir, wsName, dbRuntime, restartTemplateInput{
|
||||
Template: body.Template,
|
||||
ApplyTemplate: body.ApplyTemplate,
|
||||
RebuildConfig: body.RebuildConfig,
|
||||
})
|
||||
|
||||
if templatePath == "" {
|
||||
log.Printf("Restart: reusing existing config volume for %s (%s)", wsName, id)
|
||||
@ -181,21 +137,10 @@ func (h *WorkspaceHandler) Restart(c *gin.Context) {
|
||||
log.Printf("Restart: using template %s for %s (%s)", templatePath, wsName, id)
|
||||
}
|
||||
|
||||
var configFiles map[string][]byte
|
||||
payload := models.CreateWorkspacePayload{Name: wsName, Tier: tier, Runtime: containerRuntime}
|
||||
log.Printf("Restart: workspace %s (%s) runtime=%q", wsName, id, containerRuntime)
|
||||
|
||||
// Apply runtime-default template ONLY when explicitly requested via "apply_template": true.
|
||||
// Use case: runtime was changed via Config tab — need new runtime's base files.
|
||||
// Normal restarts preserve existing config volume (user's model, skills, prompts).
|
||||
if templatePath == "" && body.ApplyTemplate && dbRuntime != "" {
|
||||
runtimeTemplate := filepath.Join(h.configsDir, dbRuntime+"-default")
|
||||
if _, err := os.Stat(runtimeTemplate); err == nil {
|
||||
templatePath = runtimeTemplate
|
||||
configLabel = dbRuntime + "-default"
|
||||
log.Printf("Restart: applying template %s (runtime change)", configLabel)
|
||||
}
|
||||
}
|
||||
|
||||
// #12: ?reset=true (or body.Reset) discards the claude-sessions volume
|
||||
// before restart, giving the agent a clean /root/.claude/sessions dir.
|
||||
resetClaudeSession := c.Query("reset") == "true" || body.Reset
|
||||
|
||||
Loading…
Reference in New Issue
Block a user