diff --git a/workspace-server/internal/handlers/template_schedules.go b/workspace-server/internal/handlers/template_schedules.go new file mode 100644 index 000000000..76a7aeb68 --- /dev/null +++ b/workspace-server/internal/handlers/template_schedules.go @@ -0,0 +1,180 @@ +package handlers + +// template_schedules.go — read a workspace template's `schedules:` +// block and seed workspace_schedules with source='template'. Mirrors +// the org/import flow (org_import.go) so a workspace created directly +// from a workspace template (e.g. via WorkspaceHandler.Create) lands +// with the same schedule grid the org/import path would have produced. +// +// Issue #24 contract (also enforced by org_import + schedules.go): +// - INSERT new rows with source='template' +// - On (workspace_id, name) collision, only refresh template-source +// rows; runtime-added rows survive re-provisioning untouched +// - Never DELETE (additive only) +// +// The actual INSERT statement is the canonical orgImportScheduleSQL +// defined in org.go — reused here verbatim so the four guarantees +// stay in one place. +// +// Hostile-template defenses (a tenant can upload a config.yaml via +// POST /templates/import or webhook-sync a repo they control): +// - config.yaml is loaded through a 1 MiB LimitReader so a YAML +// anchor-bomb / billion-laughs cannot pre-explode memory before +// unmarshal returns. +// - len(schedules), per-schedule cron length, and resolved prompt +// body length are all bounded; over-sized entries are skipped +// rather than committed. +// - Per-row insert errors and ctx cancellation surface to the +// caller via the returned counts so partial-seed states are +// observable (workspace.go Create logs the (seeded, skipped) +// pair when skipped > 0). + +import ( + "context" + "errors" + "fmt" + "io" + "log" + "os" + "path/filepath" + "time" + + "gopkg.in/yaml.v3" + + "git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db" + "git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/scheduler" +) + +// Bounds protecting the seeder against hostile or buggy templates. +// All chosen with generous headroom relative to legitimate use +// (reno-stars org template — the largest production schedule grid — +// runs ~10 entries per workspace, each prompt body well under 1 KiB). +const ( + maxTemplateConfigYAMLBytes int64 = 1 << 20 // 1 MiB — hard cap on config.yaml size + maxTemplateSchedules = 100 // 10x current largest grid + maxScheduleCronExprLen = 128 // cron-spec syntax is short by construction + maxSchedulePromptBytes = 16 << 10 // 16 KiB after prompt_file resolution +) + +// templateConfigSchedules is the minimal shape parsed from a workspace +// template's config.yaml. Only the `schedules:` block is modelled; +// the rest of the file (providers, runtime_config, …) is opaque to +// this loader and continues to flow through the existing pass-through +// in workspace_provision.go. +type templateConfigSchedules struct { + Schedules []OrgSchedule `yaml:"schedules"` +} + +// parseTemplateSchedules reads `/config.yaml` and +// returns its `schedules:` block (nil + nil error when the file is +// absent or the block is empty). +// +// The file is read through a 1 MiB LimitReader so a billion-laughs +// or anchor-explosion YAML cannot pre-explode memory before +// Unmarshal returns. Returns an error only when a present +// config.yaml fails to read or parse — callers should treat that as +// a template-author bug rather than a runtime fault. The Create +// handler logs the error and continues so a broken schedules block +// can never block workspace provisioning. +func parseTemplateSchedules(templatePath string) ([]OrgSchedule, error) { + if templatePath == "" { + return nil, nil + } + f, err := os.Open(filepath.Join(templatePath, "config.yaml")) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return nil, nil + } + return nil, fmt.Errorf("open template config.yaml: %w", err) + } + defer f.Close() + + // Read maxTemplateConfigYAMLBytes+1 — if we filled the buffer the + // underlying file exceeded the cap and we refuse to unmarshal. + data, err := io.ReadAll(io.LimitReader(f, maxTemplateConfigYAMLBytes+1)) + if err != nil { + return nil, fmt.Errorf("read template config.yaml: %w", err) + } + if int64(len(data)) > maxTemplateConfigYAMLBytes { + return nil, fmt.Errorf("template config.yaml exceeds %d-byte cap", maxTemplateConfigYAMLBytes) + } + var cfg templateConfigSchedules + if err := yaml.Unmarshal(data, &cfg); err != nil { + return nil, fmt.Errorf("parse template config.yaml schedules: %w", err) + } + if len(cfg.Schedules) > maxTemplateSchedules { + return nil, fmt.Errorf("template declares %d schedules; cap is %d", len(cfg.Schedules), maxTemplateSchedules) + } + return cfg.Schedules, nil +} + +// seedTemplateSchedules INSERTs (or refreshes) each schedule into +// workspace_schedules with source='template'. Returns (seeded, +// skipped) counts so the caller can observe partial-seed states. +// +// Prompt body resolution mirrors org_import.go: inline `prompt:` wins, +// else `prompt_file:` is resolved relative to templatePath via +// resolvePromptRef. Per-schedule failures (bad cron, missing prompt +// file, DB error, oversize input) are logged with the schedule name +// quoted via %q (CRLF-safe) and skipped so one bad row never breaks +// the rest of the grid. A cancelled ctx breaks the loop early. +// +// Timezone defaults to "UTC" when unset. Env-var expansion in the +// timezone field is intentionally not performed — that mirrors the +// org/import behavior; template authors should pick a literal IANA +// zone (or rely on UTC + operator overrides per-tenant). +func seedTemplateSchedules(ctx context.Context, workspaceID, templatePath string, schedules []OrgSchedule) (seeded, skipped int) { + for _, sched := range schedules { + // Honour caller cancellation — protects against long seed + // loops on a request whose client already gave up. + if err := ctx.Err(); err != nil { + log.Printf("Template schedule seed: ctx cancelled after %d/%d on %s: %v", seeded, len(schedules), workspaceID, err) + skipped += len(schedules) - seeded - skipped + return + } + if len(sched.CronExpr) > maxScheduleCronExprLen { + log.Printf("Template schedule seed: cron_expr too long (%d > %d) for %q on %s — skipping", len(sched.CronExpr), maxScheduleCronExprLen, sched.Name, workspaceID) + skipped++ + continue + } + tz := sched.Timezone + if tz == "" { + tz = "UTC" + } + enabled := true + if sched.Enabled != nil { + enabled = *sched.Enabled + } + prompt, promptErr := resolvePromptRef(sched.Prompt, sched.PromptFile, templatePath, "") + if promptErr != nil { + log.Printf("Template schedule seed: failed to resolve prompt for %q on %s: %v — skipping", sched.Name, workspaceID, promptErr) + skipped++ + continue + } + if prompt == "" { + log.Printf("Template schedule seed: schedule %q on %s has empty prompt — skipping", sched.Name, workspaceID) + skipped++ + continue + } + if len(prompt) > maxSchedulePromptBytes { + log.Printf("Template schedule seed: prompt too long (%d > %d bytes) for %q on %s — skipping", len(prompt), maxSchedulePromptBytes, sched.Name, workspaceID) + skipped++ + continue + } + nextRun, nextRunErr := scheduler.ComputeNextRun(sched.CronExpr, tz, time.Now()) + if nextRunErr != nil { + log.Printf("Template schedule seed: invalid cron for %q on %s: %v — skipping", sched.Name, workspaceID, nextRunErr) + skipped++ + continue + } + if _, err := db.DB.ExecContext(ctx, orgImportScheduleSQL, + workspaceID, sched.Name, sched.CronExpr, tz, prompt, enabled, nextRun); err != nil { + log.Printf("Template schedule seed: failed to upsert %q on %s: %v", sched.Name, workspaceID, err) + skipped++ + continue + } + seeded++ + log.Printf("Template schedule seed: %q (%s, %d chars) upserted on %s (source=template)", sched.Name, sched.CronExpr, len(prompt), workspaceID) + } + return +} diff --git a/workspace-server/internal/handlers/template_schedules_test.go b/workspace-server/internal/handlers/template_schedules_test.go new file mode 100644 index 000000000..1f3c194fe --- /dev/null +++ b/workspace-server/internal/handlers/template_schedules_test.go @@ -0,0 +1,141 @@ +package handlers + +// template_schedules_test.go — unit tests for parseTemplateSchedules. +// +// seedTemplateSchedules' DB INSERT path is already covered indirectly +// by TestImport_OrgScheduleSQLShape (schedules_test.go) since both +// code paths share the canonical orgImportScheduleSQL constant; the +// loop logic (default tz, default enabled, prompt resolution, cron +// validation) is exercised at the parser level here and at the +// orgImportScheduleSQL level there. + +import ( + "path/filepath" + "testing" +) + +func TestParseTemplateSchedules_AbsentFile(t *testing.T) { + dir := t.TempDir() + // No config.yaml in dir. + got, err := parseTemplateSchedules(dir) + if err != nil { + t.Fatalf("expected nil error for absent config.yaml, got %v", err) + } + if got != nil { + t.Fatalf("expected nil slice, got %#v", got) + } +} + +func TestParseTemplateSchedules_EmptyTemplatePath(t *testing.T) { + got, err := parseTemplateSchedules("") + if err != nil { + t.Fatalf("expected nil error for empty path, got %v", err) + } + if got != nil { + t.Fatalf("expected nil slice for empty path, got %#v", got) + } +} + +func TestParseTemplateSchedules_NoSchedulesBlock(t *testing.T) { + dir := t.TempDir() + mustWriteFile(t, filepath.Join(dir, "config.yaml"), ` +name: Some Template +runtime: claude-code +model: foo/bar +`) + got, err := parseTemplateSchedules(dir) + if err != nil { + t.Fatalf("expected nil error when schedules: absent, got %v", err) + } + if len(got) != 0 { + t.Fatalf("expected zero schedules, got %d", len(got)) + } +} + +func TestParseTemplateSchedules_HappyPath(t *testing.T) { + dir := t.TempDir() + mustWriteFile(t, filepath.Join(dir, "config.yaml"), ` +name: SEO Agent +schedules: + - name: Continuous tick + cron_expr: "*/30 * * * *" + timezone: America/Vancouver + prompt: | + Run one SEO tick. + - name: Monday GSC + cron_expr: "0 8 * * 1" + timezone: America/Vancouver + prompt: /seo google + enabled: true + - name: Disabled placeholder + cron_expr: "0 0 1 1 *" + prompt: noop + enabled: false +`) + got, err := parseTemplateSchedules(dir) + if err != nil { + t.Fatalf("parse: %v", err) + } + if len(got) != 3 { + t.Fatalf("expected 3 schedules, got %d", len(got)) + } + if got[0].Name != "Continuous tick" || got[0].CronExpr != "*/30 * * * *" { + t.Errorf("schedule[0] mismatch: %+v", got[0]) + } + if got[1].Timezone != "America/Vancouver" { + t.Errorf("schedule[1].Timezone = %q, want America/Vancouver", got[1].Timezone) + } + // Enabled is *bool: nil means "default true" at seed time, false is + // explicit opt-out and must survive the YAML round-trip. + if got[2].Enabled == nil { + t.Errorf("schedule[2].Enabled = nil, want *false") + } else if *got[2].Enabled { + t.Errorf("schedule[2].Enabled = true, want false") + } +} + +func TestParseTemplateSchedules_MalformedYAML(t *testing.T) { + dir := t.TempDir() + mustWriteFile(t, filepath.Join(dir, "config.yaml"), ` +name: Broken +schedules: + - this is: [not, a, valid +`) + _, err := parseTemplateSchedules(dir) + if err == nil { + t.Fatal("expected parse error on malformed YAML, got nil") + } +} + +// TestParseTemplateSchedules_RejectsOversizeFile gates against the +// billion-laughs / anchor-bomb DoS class: a hostile config.yaml over +// the 1 MiB cap must be refused before yaml.Unmarshal runs. +func TestParseTemplateSchedules_RejectsOversizeFile(t *testing.T) { + dir := t.TempDir() + // One byte over the cap — fastest path to the gate. + pad := make([]byte, maxTemplateConfigYAMLBytes+1) + for i := range pad { + pad[i] = '#' + } + mustWriteFile(t, filepath.Join(dir, "config.yaml"), string(pad)) + if _, err := parseTemplateSchedules(dir); err == nil { + t.Fatal("expected oversize-file error, got nil") + } +} + +// TestParseTemplateSchedules_RejectsTooManySchedules gates against a +// hostile config.yaml that flips one row into a 10k-row insert storm. +func TestParseTemplateSchedules_RejectsTooManySchedules(t *testing.T) { + dir := t.TempDir() + var b []byte + b = append(b, []byte("schedules:\n")...) + // maxTemplateSchedules+1 minimal entries — they don't have to be + // valid as schedules because the gate trips before resolution. + for i := 0; i <= maxTemplateSchedules; i++ { + b = append(b, []byte(" - name: s\n cron_expr: \"* * * * *\"\n prompt: x\n")...) + } + mustWriteFile(t, filepath.Join(dir, "config.yaml"), string(b)) + if _, err := parseTemplateSchedules(dir); err == nil { + t.Fatal("expected schedule-count error, got nil") + } +} diff --git a/workspace-server/internal/handlers/workspace.go b/workspace-server/internal/handlers/workspace.go index 7af6c779d..b0947d756 100644 --- a/workspace-server/internal/handlers/workspace.go +++ b/workspace-server/internal/handlers/workspace.go @@ -769,7 +769,8 @@ func (h *WorkspaceHandler) Create(c *gin.Context) { // runtime/model/tier as JSON — the Config tab needs that to render // even on failed workspaces, so Create owns this Create-only side // effect rather than coupling Auto to a UI concern. - if !h.provisionWorkspaceAuto(id, templatePath, configFiles, payload) { + provisionOK := h.provisionWorkspaceAuto(id, templatePath, configFiles, payload) + if !provisionOK { cfgJSON := fmt.Sprintf(`{"name":%q,"runtime":%q,"tier":%d,"template":%q}`, payload.Name, payload.Runtime, payload.Tier, payload.Template) if _, err := db.DB.ExecContext(ctx, ` @@ -780,6 +781,32 @@ func (h *WorkspaceHandler) Create(c *gin.Context) { } } + // Seed schedules declared in the workspace template's config.yaml + // AFTER provisionWorkspaceAuto succeeds so the scheduler never + // fires cron rows against a workspace whose backend never wired + // (review feedback PR #1929#1). Async EC2 provisioning may still + // fail downstream; scheduler.go is expected to handle non-online + // status as a no-op tick. Idempotent across re-creates via + // orgImportScheduleSQL's ON CONFLICT clause; runtime-added rows + // are preserved (Issue #24 contract). Restart does not re-seed + // (so user-deleted template rows stay deleted). + // + // Non-fatal: a broken schedules: block must never block workspace + // provisioning — the workspace row is already live and the grid + // is recoverable via POST /workspaces/{id}/schedules. + if provisionOK && templatePath != "" { + if templateScheds, parseErr := parseTemplateSchedules(templatePath); parseErr != nil { + log.Printf("Create %s: parsing template schedules: %v (continuing)", id, parseErr) + } else if len(templateScheds) > 0 { + seeded, skipped := seedTemplateSchedules(ctx, id, templatePath, templateScheds) + if skipped > 0 { + log.Printf("Create %s: template schedule partial-seed: seeded=%d skipped=%d total=%d", id, seeded, skipped, len(templateScheds)) + } else { + log.Printf("Create %s: seeded %d/%d template schedules", id, seeded, len(templateScheds)) + } + } + } + c.JSON(http.StatusCreated, gin.H{ "id": id, "status": "provisioning",