fix(reconcile): dedupe boot-installed plugins — no EIC re-deliver+restart (#38) #3018

Merged
core-devops merged 1 commits from fix/rfc2843-38-reconcile-dedupe-boot-install into main 2026-06-17 20:17:42 +00:00
2 changed files with 71 additions and 3 deletions
@@ -110,13 +110,26 @@ func (h *PluginsHandler) ReconcileWorkspacePlugins(ctx context.Context, workspac
continue
}
deliverErr := h.deliver(ctx, workspaceID, stage)
stage.cleanup()
if deliverErr != nil {
// RFC#2843 #38: the runtime-image entrypoint boot-installs declared
// plugins to the box on EVERY boot, BEFORE this online-transition
// reconcile runs — so by now the plugin is typically already on the box.
// If present, record the tracking row WITHOUT re-delivering via EIC +
// restarting: the EIC push + restartFunc was the redundant churn (one
// wasted full re-provision per fresh workspace — the observed ~12-min
// reprovision). Only deliver when the plugin is NOT already present
// (boot-install disabled/failed, or a non-boot-install path) — the
// safety net. pluginPresentOnBox is conservative (false on any
// uncertainty) so a genuinely-missing install is never silently skipped.
if h.pluginPresentOnBox(ctx, workspaceID, stage.PluginName) {
log.Printf("Plugin reconcile: workspace=%s plugin=%s already on box (boot-installed) — recording tracking row only, no re-deliver/restart",
workspaceID, stage.PluginName)
} else if deliverErr := h.deliver(ctx, workspaceID, stage); deliverErr != nil {
stage.cleanup()
log.Printf("Plugin reconcile: workspace=%s plugin=%s deliver failed: %v",
workspaceID, d.PluginName, deliverErr)
continue
}
stage.cleanup()
if recErr := recordWorkspacePluginInstall(
ctx, workspaceID, stage.PluginName, stage.Source.Raw(), track, stage.InstalledSHA,
@@ -141,6 +154,26 @@ func (h *PluginsHandler) ReconcileWorkspacePlugins(ctx context.Context, workspac
}
}
// pluginPresentOnBox reports whether the plugin is already installed on the
// workspace box — e.g. delivered by the runtime-image boot-install
// (RFC#2843 #32) before this online-transition reconcile runs. Used to skip the
// redundant EIC re-deliver + restart that caused the per-fresh-workspace
// reprovision churn (#38). CONSERVATIVE by design: returns false on any
// uncertainty (not SaaS / no instance / read error / empty manifest), so the
// caller falls back to delivering — a genuinely-missing install is never
// silently skipped, only a confirmed-present one is deduped.
func (h *PluginsHandler) pluginPresentOnBox(ctx context.Context, workspaceID, pluginName string) bool {
instanceID, runtime := h.lookupSaaSDispatch(workspaceID)
if instanceID == "" {
return false // not a SaaS box we can probe — deliver as before
}
data, err := readPluginManifestViaEIC(ctx, instanceID, runtime, pluginName)
if err != nil || len(data) == 0 {
return false // can't confirm presence — fall back to deliver
}
return true
}
// cleanup removes the staged tempdir. Mirrors the defer the interactive
// install path uses; the reconcile must clean up explicitly because it stages
// many plugins in a loop rather than one-per-request.
@@ -228,3 +228,38 @@ func TestProvisioningChannelCarriesNoPlugins(t *testing.T) {
"so the post-online reconcile can install them")
}
}
// TestReconcile_BootInstalled_RecordsWithoutDeliver pins the RFC#2843 #38 fix:
// when the plugin is already on the box (boot-installed by the runtime-image
// entrypoint before this online-transition reconcile runs), the reconcile must
// record the tracking row but NOT re-deliver via EIC + restart (the churn that
// caused one wasted re-provision per fresh workspace).
func TestReconcile_BootInstalled_RecordsWithoutDeliver(t *testing.T) {
mock, cleanup := withMockDB(t)
defer cleanup()
h, delivered := newReconcileHandler(t)
// SaaS box present + manifest read returns non-empty → pluginPresentOnBox=true.
h.instanceIDLookup = func(string) (string, error) { return "i-boot", nil }
orig := readPluginManifestViaEIC
readPluginManifestViaEIC = func(ctx context.Context, instanceID, runtime, pluginName string) ([]byte, error) {
return []byte("name: seo-all\n"), nil
}
defer func() { readPluginManifestViaEIC = orig }()
expectDeclared(mock,
[]DeclaredPlugin{{PluginName: "seo-all", SourceRaw: "local://seo-all"}},
nil,
)
// Tracking row IS still recorded (so drift/UI work) — just no deliver/restart.
mock.ExpectExec(`INSERT INTO workspace_plugins`).
WillReturnResult(sqlmock.NewResult(1, 1))
h.ReconcileWorkspacePlugins(context.Background(), "ws-1")
if len(*delivered) != 0 {
t.Fatalf("boot-installed plugin must NOT be re-delivered (no EIC push/restart churn), but delivered %v", *delivered)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet DB expectations (tracking row must still be recorded): %v", err)
}
}