fix(restart): preserve workspace template on SaaS re-provision (#33, #32 keystone) #3010

Merged
core-devops merged 1 commits from fix/rfc2843-33-restart-preserves-template into main 2026-06-17 09:22:47 +00:00
6 changed files with 110 additions and 0 deletions
@@ -734,6 +734,21 @@ func (h *WorkspaceHandler) Create(c *gin.Context) {
}
}
// RFC#2843 #33: persist the template name so the RESTART / re-provision path
// can re-deliver the SAME template's config.yaml + prompts (and re-run the
// declared-plugin reconcile). Without this, the auto-restart cycle rebuilds
// the provision payload with template="" → the SaaS re-provision has no
// TemplateIdentity → config degrades to a 218-byte stub and skills drop on
// every restart. Non-fatal: a write failure must not abort an otherwise-good
// create (the row is live; restart just falls back to the old stub behavior).
if t := strings.TrimSpace(payload.Template); t != "" {
if _, dbErr := tx.ExecContext(ctx,
`UPDATE workspaces SET template = $2, updated_at = now() WHERE id = $1`,
id, t); dbErr != nil {
log.Printf("Create workspace %s: failed to persist template %q: %v (continuing — restart will fall back to stub config)", id, t, dbErr)
}
}
// Persist initial secrets from the create payload (inside same transaction).
// nil/empty map is a no-op. Any failure rolls back the workspace insert
// so we never have a workspace row without its intended secrets.
@@ -7,6 +7,7 @@ import (
"fmt"
"log"
"net/http"
"strings"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models"
@@ -526,6 +527,30 @@ func withStoredCompute(ctx context.Context, workspaceID string, payload models.C
return payload
}
// storedWorkspaceTemplate returns the template a workspace was created from
// (workspaces.template), or "" if none / unavailable. RFC#2843 #33: the
// auto-restart cycle uses this to restore payload.Template on the SaaS
// re-provision so config.yaml + prompts (and the declared-plugin reconcile)
// are re-delivered from the SAME template — instead of re-provisioning with
// template="" which degraded the box to a 218-byte stub and dropped skills.
// Fail-soft: any error (missing column on an un-migrated DB, no row) → "".
func storedWorkspaceTemplate(ctx context.Context, workspaceID string) string {
if db.DB == nil {
return ""
}
var tmpl string
if err := db.DB.QueryRowContext(ctx,
`SELECT COALESCE(template, '') FROM workspaces WHERE id = $1`,
workspaceID,
).Scan(&tmpl); err != nil {
if err != sql.ErrNoRows {
log.Printf("storedWorkspaceTemplate: load template for %s failed: %v", workspaceID, err)
}
return ""
}
return strings.TrimSpace(tmpl)
}
// workspaceComputeOptionsResponse is the SSOT payload the canvas Container-Config
// tab consumes to populate its provider + instance-type dropdowns (core#2489).
// It is derived entirely from the allowlist + defaults in this file, so the UI
@@ -984,6 +984,18 @@ func (h *WorkspaceHandler) runRestartCycle(workspaceID string) {
// Runtime from DB — no more config file parsing
payload := withStoredCompute(ctx, workspaceID, models.CreateWorkspacePayload{Name: wsName, Tier: tier, Runtime: dbRuntime})
// RFC#2843 #33: on SaaS (cpProv), restore the persisted template so the
// re-provision re-delivers config.yaml + prompts — TemplateIdentity is
// derived from payload.Template (workspace_provision.go). Without this the
// SaaS re-provision ran with template="" → 218-byte stub config + dropped
// skills on every restart. Docker keeps its persistent config volume, so it
// retains the "do not re-apply templates" behavior (template left empty).
if h.cpProv != nil {
if storedTmpl := storedWorkspaceTemplate(ctx, workspaceID); storedTmpl != "" {
payload.Template = storedTmpl
}
}
// Snapshot restart-context data before the new session overwrites
// last_heartbeat_at. Issue #19 Layer 1.
restartData := loadRestartContextData(ctx, workspaceID)
@@ -1185,6 +1197,13 @@ func (h *WorkspaceHandler) Resume(c *gin.Context) {
"name": ws.name, "tier": ws.tier, "runtime": ws.runtime,
})
payload := withStoredCompute(ctx, ws.id, models.CreateWorkspacePayload{Name: ws.name, Tier: ws.tier, Runtime: ws.runtime})
// RFC#2843 #33: restore the persisted template on SaaS resume so config +
// prompts re-deliver (see runRestartCycle for the full rationale).
if h.cpProv != nil {
if storedTmpl := storedWorkspaceTemplate(ctx, ws.id); storedTmpl != "" {
payload.Template = storedTmpl
}
}
// Resume is provision-only (workspace is paused, no live container
// to stop). provisionWorkspaceAuto handles backend routing and the
// no-backend mark-failed fallback identically to Create. Pre-
@@ -0,0 +1,35 @@
package handlers
import (
"context"
"testing"
"github.com/DATA-DOG/go-sqlmock"
)
// TestStoredWorkspaceTemplate pins the RFC#2843 #33 restart-restore reader:
// the auto-restart cycle reads the persisted workspaces.template so the SaaS
// re-provision re-delivers config.yaml + prompts (TemplateIdentity is derived
// from payload.Template) instead of degrading to a 218-byte stub.
func TestStoredWorkspaceTemplate(t *testing.T) {
mock := setupTestDB(t)
const wsID = "ws-tmpl-1"
t.Run("returns persisted template", func(t *testing.T) {
mock.ExpectQuery(`SELECT COALESCE\(template, ''\) FROM workspaces WHERE id`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"template"}).AddRow("seo-agent"))
if got := storedWorkspaceTemplate(context.Background(), wsID); got != "seo-agent" {
t.Fatalf("storedWorkspaceTemplate = %q, want seo-agent", got)
}
})
t.Run("empty template → empty string (default/blank workspace)", func(t *testing.T) {
mock.ExpectQuery(`SELECT COALESCE\(template, ''\) FROM workspaces WHERE id`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"template"}).AddRow(""))
if got := storedWorkspaceTemplate(context.Background(), wsID); got != "" {
t.Fatalf("storedWorkspaceTemplate = %q, want empty", got)
}
})
}
@@ -0,0 +1 @@
ALTER TABLE workspaces DROP COLUMN IF EXISTS template;
@@ -0,0 +1,15 @@
-- workspaces.template: the template a workspace was created from (the
-- workspace-configs-templates folder name, e.g. "seo-agent"). Persisted at
-- create time so the RESTART / re-provision path can re-deliver the SAME
-- template's config.yaml + prompts (and re-run the declared-plugin reconcile)
-- instead of re-provisioning with template="" — which silently degraded the
-- box to a 218-byte stub config and dropped its skills on EVERY restart
-- (RFC#2843 #32 keystone / #33; root-caused 2026-06-17 from live tenant-box
-- logs: the auto-restart cycle rebuilt the provision payload with Name/Tier/
-- Runtime only, never the template, so the SaaS re-provision had no
-- TemplateIdentity to fetch assets from).
--
-- Empty string = no template (default/blank workspaces). NOT NULL DEFAULT ''
-- keeps existing rows valid and the restart-side read trivial (no NULL handling).
-- Idempotent + additive.
ALTER TABLE workspaces ADD COLUMN IF NOT EXISTS template TEXT NOT NULL DEFAULT '';