From a44a110d60c6bbde9aa8a6b05b8cb5f3179b4bda Mon Sep 17 00:00:00 2001 From: hongming Date: Fri, 29 May 2026 09:03:27 +0000 Subject: [PATCH] fix(org-import): migrate runtime schedules from removed predecessor on recreate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit internal#2006 — recreating an agent orphans its schedules. Root cause: createWorkspaceTree's INSERT … ON CONFLICT (parent_id,name) WHERE status != 'removed' only matches NON-removed rows, so when an agent is recreated after its prior workspace was marked removed, a brand-new workspace id is minted. Reconcile then re-derives template-sourced state (MODEL, template schedules via the upsert loop), but schedules a user added at runtime (source='runtime', via the canvas/API) bind to the ephemeral workspace_id and are abandoned on the removed row — they silently stop firing (the 2026-05-29 agents-team incident: all 5 *-autonomous-tick schedules, source=runtime, orphaned on removed ids; canvas showed "missing schedulers"). Fix: after a fresh insert, migrate runtime-created schedules from the most-recent removed predecessor of the same agent onto the new workspace. The predecessor is matched by the stable `role` (survives the name auto-suffixing that yields "Agent (2)"), falling back to name+parent. Template-sourced schedules are NOT migrated (reconcile re-derives those); runs before the template upsert loop so a same-named template schedule still wins; skips names already present on the new workspace; best-effort (logs, never errors the import). Tests: predecessor-found re-points; no-predecessor (first create) does NOT run the UPDATE; name-fallback branch. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../internal/handlers/org_import.go | 68 +++++++++++++++++ .../org_import_schedule_migration_test.go | 75 +++++++++++++++++++ 2 files changed, 143 insertions(+) create mode 100644 workspace-server/internal/handlers/org_import_schedule_migration_test.go diff --git a/workspace-server/internal/handlers/org_import.go b/workspace-server/internal/handlers/org_import.go index 49dc64fbc..970aa702b 100644 --- a/workspace-server/internal/handlers/org_import.go +++ b/workspace-server/internal/handlers/org_import.go @@ -548,6 +548,16 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX }) } + // internal#2006: migrate runtime-created schedules from a removed + // predecessor of the same agent (role+parent) onto this freshly-created + // workspace. Reconcile re-derives template-sourced state below, but + // schedules a user added at runtime (source='runtime', via the canvas/API) + // bind to the ephemeral workspace_id and would otherwise be abandoned on + // the removed row when an agent is recreated with a new id. Runs before the + // template upsert loop so a same-named template schedule still wins. + // Best-effort: never fails the import. + h.migrateRuntimeSchedulesFromRemovedPredecessor(ctx, id, role, ws.Name, parentID) + // Insert schedules if defined. Resolve each schedule's prompt body from // either inline `prompt:` or `prompt_file:` (file ref relative to the // workspace's files_dir). Inline wins; empty prompt after resolution is @@ -687,6 +697,64 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX return h.recurseChildrenForImport(ws, id, absX, absY, defaults, orgBaseDir, results, provisionSem) } +// migrateRuntimeSchedulesFromRemovedPredecessor re-points runtime-created +// schedules (source='runtime') from the most-recent removed predecessor of the +// same agent onto newID. Recreating an agent mints a NEW workspace id (the +// ON CONFLICT in createWorkspaceTree only matches non-removed rows), so a +// schedule a user added at runtime would otherwise be abandoned on the removed +// row. Template-sourced schedules are NOT migrated — reconcile re-derives those +// from the org template (the upsert loop). The predecessor is matched by the +// stable `role` when present (survives the name auto-suffixing that yields +// "Agent (2)"), falling back to name+parent. Idempotent (skips names already on +// newID) and best-effort (logs, never errors the import). See internal#2006. +func (h *OrgHandler) migrateRuntimeSchedulesFromRemovedPredecessor(ctx context.Context, newID string, role interface{}, name string, parentID *string) { + var predID string + var err error + if role != nil { + err = db.DB.QueryRowContext(ctx, ` + SELECT id FROM workspaces + WHERE status = 'removed' AND role = $1 + AND parent_id IS NOT DISTINCT FROM $2 + AND id <> $3 + ORDER BY updated_at DESC NULLS LAST + LIMIT 1 + `, role, parentID, newID).Scan(&predID) + } else { + err = db.DB.QueryRowContext(ctx, ` + SELECT id FROM workspaces + WHERE status = 'removed' AND name = $1 + AND parent_id IS NOT DISTINCT FROM $2 + AND id <> $3 + ORDER BY updated_at DESC NULLS LAST + LIMIT 1 + `, name, parentID, newID).Scan(&predID) + } + if errors.Is(err, sql.ErrNoRows) { + return // first-time create — no predecessor to migrate from + } + if err != nil { + log.Printf("Org import: predecessor lookup for %q (new=%s) failed: %v — skipping schedule migration", name, newID, err) + return + } + res, err := db.DB.ExecContext(ctx, ` + UPDATE workspace_schedules s + SET workspace_id = $1, updated_at = now() + WHERE s.workspace_id = $2 + AND s.source = 'runtime' + AND NOT EXISTS ( + SELECT 1 FROM workspace_schedules t + WHERE t.workspace_id = $1 AND t.name = s.name + ) + `, newID, predID) + if err != nil { + log.Printf("Org import: schedule migration %s -> %s (%q) failed: %v", predID, newID, name, err) + return + } + if n, _ := res.RowsAffected(); n > 0 { + log.Printf("Org import: migrated %d runtime schedule(s) from removed predecessor %s to new workspace %s (%q)", n, predID, newID, name) + } +} + // lookupExistingChild returns the id of an existing workspace under // (parent_id, name) if any, with idempotency-friendly semantics: // - parent_id IS NOT DISTINCT FROM matches NULL too (root workspaces) diff --git a/workspace-server/internal/handlers/org_import_schedule_migration_test.go b/workspace-server/internal/handlers/org_import_schedule_migration_test.go new file mode 100644 index 000000000..72e6a5f87 --- /dev/null +++ b/workspace-server/internal/handlers/org_import_schedule_migration_test.go @@ -0,0 +1,75 @@ +package handlers + +import ( + "context" + "database/sql" + "testing" + + sqlmock "github.com/DATA-DOG/go-sqlmock" +) + +// TestMigrateRuntimeSchedulesFromRemovedPredecessor verifies the happy path: +// a removed predecessor exists for the agent (matched by role), and its +// runtime-created schedules are re-pointed onto the freshly-created workspace. +// internal#2006 (recreate-orphans-schedules regression). +func TestMigrateRuntimeSchedulesFromRemovedPredecessor(t *testing.T) { + mock := setupTestDB(t) + h := &OrgHandler{} + + // Predecessor lookup (role branch) returns the removed prior workspace. + mock.ExpectQuery(`SELECT id FROM workspaces`). + WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("old-removed-ws")) + // Re-point UPDATE migrates 2 runtime schedules. + mock.ExpectExec(`UPDATE workspace_schedules`). + WillReturnResult(sqlmock.NewResult(0, 2)) + + parent := "parent-1" + h.migrateRuntimeSchedulesFromRemovedPredecessor( + context.Background(), "new-ws", interface{}("code-reviewer"), "Code Reviewer (2)", &parent, + ) + + if err := mock.ExpectationsWereMet(); err != nil { + t.Fatalf("unmet sqlmock expectations: %v", err) + } +} + +// TestMigrateRuntimeSchedules_NoPredecessor verifies the first-time-create path: +// no removed predecessor → the function returns after the lookup and MUST NOT +// run the re-point UPDATE (sqlmock errors on an unexpected query if it does). +func TestMigrateRuntimeSchedules_NoPredecessor(t *testing.T) { + mock := setupTestDB(t) + h := &OrgHandler{} + + mock.ExpectQuery(`SELECT id FROM workspaces`). + WillReturnError(sql.ErrNoRows) + // No ExpectExec — an UPDATE here would be an unexpected query → test fails. + + h.migrateRuntimeSchedulesFromRemovedPredecessor( + context.Background(), "new-ws", interface{}("researcher"), "Root-Cause Researcher", nil, + ) + + if err := mock.ExpectationsWereMet(); err != nil { + t.Fatalf("unmet sqlmock expectations: %v", err) + } +} + +// TestMigrateRuntimeSchedules_NameFallback verifies the name-branch lookup is +// used when the agent has no stable role (role == nil), still followed by the +// re-point UPDATE. +func TestMigrateRuntimeSchedules_NameFallback(t *testing.T) { + mock := setupTestDB(t) + h := &OrgHandler{} + + mock.ExpectQuery(`SELECT id FROM workspaces`). + WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("old-removed-ws")) + mock.ExpectExec(`UPDATE workspace_schedules`). + WillReturnResult(sqlmock.NewResult(0, 1)) + + h.migrateRuntimeSchedulesFromRemovedPredecessor( + context.Background(), "new-ws", nil, "Some Agent", nil, + ) + + if err := mock.ExpectationsWereMet(); err != nil { + t.Fatalf("unmet sqlmock expectations: %v", err) + } +} -- 2.52.0