diff --git a/.github/workflows/block-internal-paths.yml b/.github/workflows/block-internal-paths.yml index a24e613a..7629a669 100644 --- a/.github/workflows/block-internal-paths.yml +++ b/.github/workflows/block-internal-paths.yml @@ -1,7 +1,7 @@ name: Block internal-flavored paths # Hard CI gate. Internal content (positioning, competitive briefs, sales -# playbooks, PMM/press drip, draft campaigns) lives in Molecule-AI/internal — +# playbooks, PMM/press drip, draft campaigns) lives in molecule-ai/internal — # this public monorepo must never re-acquire those paths. CEO directive # 2026-04-23 after a fleet-wide audit found 79 internal files leaked here. # @@ -135,7 +135,7 @@ jobs: echo "::error::Forbidden internal-flavored paths detected:" printf "$OFFENDING" echo "" - echo "These paths belong in Molecule-AI/internal, not this public repo." + echo "These paths belong in molecule-ai/internal, not this public repo." echo "See docs/internal-content-policy.md for canonical locations." echo "" echo "If your file is genuinely public-facing (e.g. a blog post" diff --git a/.github/workflows/canary-verify.yml b/.github/workflows/canary-verify.yml index 6972194e..c26958ae 100644 --- a/.github/workflows/canary-verify.yml +++ b/.github/workflows/canary-verify.yml @@ -108,7 +108,7 @@ jobs: echo echo "One or more canary secrets are unset (\`CANARY_TENANT_URLS\`, \`CANARY_ADMIN_TOKENS\`, \`CANARY_CP_SHARED_SECRET\`)." echo "Phase 2 canary fleet has not been stood up yet —" - echo "see [canary-tenants.md](https://github.com/Molecule-AI/molecule-controlplane/blob/main/docs/canary-tenants.md)." + echo "see [canary-tenants.md](https://github.com/molecule-ai/molecule-controlplane/blob/main/docs/canary-tenants.md)." echo echo "**Skipped — promote-to-latest will NOT auto-fire.** Dispatch \`promote-latest.yml\` manually when ready." } >> "$GITHUB_STEP_SUMMARY" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fffce798..a9ff776e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -87,7 +87,7 @@ jobs: run: go mod download - if: needs.changes.outputs.platform == 'true' run: go build ./cmd/server - # CLI (molecli) moved to standalone repo: github.com/Molecule-AI/molecule-cli + # CLI (molecli) moved to standalone repo: github.com/molecule-ai/molecule-cli - if: needs.changes.outputs.platform == 'true' run: go vet ./... || true - if: needs.changes.outputs.platform == 'true' @@ -165,7 +165,7 @@ jobs: # Strip the package-import prefix so we can match .coverage-allowlist.txt # entries written as paths relative to workspace-server/. # Handle both module paths: platform/workspace-server/... and platform/... - rel=$(echo "$file" | sed 's|^github.com/Molecule-AI/molecule-monorepo/platform/workspace-server/||; s|^github.com/Molecule-AI/molecule-monorepo/platform/||') + rel=$(echo "$file" | sed 's|^github.com/molecule-ai/molecule-monorepo/platform/workspace-server/||; s|^github.com/molecule-ai/molecule-monorepo/platform/||') if echo "$ALLOWLIST" | grep -qxF "$rel"; then echo "::warning file=workspace-server/$rel::Critical file at ${pct}% coverage (allowlisted, #1823) — fix before expiry." @@ -243,8 +243,8 @@ jobs: if-no-files-found: warn # MCP Server + SDK removed from CI — now in standalone repos: - # - github.com/Molecule-AI/molecule-mcp-server (npm CI) - # - github.com/Molecule-AI/molecule-sdk-python (PyPI CI) + # - github.com/molecule-ai/molecule-mcp-server (npm CI) + # - github.com/molecule-ai/molecule-sdk-python (PyPI CI) # e2e-api job moved to .github/workflows/e2e-api.yml (issue #458). # It now has workflow-level concurrency (cancel-in-progress: false) so @@ -434,5 +434,5 @@ jobs: fi # SDK + plugin validation moved to standalone repo: - # github.com/Molecule-AI/molecule-sdk-python + # github.com/molecule-ai/molecule-sdk-python diff --git a/.github/workflows/pr-guards.yml b/.github/workflows/pr-guards.yml index 29645b58..151757fe 100644 --- a/.github/workflows/pr-guards.yml +++ b/.github/workflows/pr-guards.yml @@ -19,4 +19,4 @@ permissions: jobs: disable-auto-merge-on-push: - uses: Molecule-AI/molecule-ci/.github/workflows/disable-auto-merge-on-push.yml@main + uses: molecule-ai/molecule-ci/.github/workflows/disable-auto-merge-on-push.yml@main diff --git a/.github/workflows/redeploy-tenants-on-main.yml b/.github/workflows/redeploy-tenants-on-main.yml index f862c487..0625fc3f 100644 --- a/.github/workflows/redeploy-tenants-on-main.yml +++ b/.github/workflows/redeploy-tenants-on-main.yml @@ -9,7 +9,7 @@ name: redeploy-tenants-on-main # # This workflow closes the gap by calling the control-plane admin # endpoint that performs a canary-first, batched, health-gated rolling -# redeploy across every live tenant. Implemented in Molecule-AI/ +# redeploy across every live tenant. Implemented in molecule-ai/ # molecule-controlplane as POST /cp/admin/tenants/redeploy-fleet # (feat/tenant-auto-redeploy, landing alongside this workflow). # @@ -146,7 +146,7 @@ jobs: - name: Call CP redeploy-fleet # CP_ADMIN_API_TOKEN must be set as a repo/org secret on - # Molecule-AI/molecule-core, matching the staging/prod CP's + # molecule-ai/molecule-core, matching the staging/prod CP's # CP_ADMIN_API_TOKEN env. Stored in Railway, mirrored to this # repo's secrets for CI. env: diff --git a/.github/workflows/redeploy-tenants-on-staging.yml b/.github/workflows/redeploy-tenants-on-staging.yml index e0d69544..2726db9e 100644 --- a/.github/workflows/redeploy-tenants-on-staging.yml +++ b/.github/workflows/redeploy-tenants-on-staging.yml @@ -97,7 +97,7 @@ jobs: - name: Call staging-CP redeploy-fleet # CP_STAGING_ADMIN_API_TOKEN must be set as a repo/org secret - # on Molecule-AI/molecule-core, matching staging-CP's + # on molecule-ai/molecule-core, matching staging-CP's # CP_ADMIN_API_TOKEN env var (visible in Railway controlplane # / staging environment). Stored separately from the prod # CP_ADMIN_API_TOKEN so a leak of one doesn't auth the other. diff --git a/.github/workflows/retarget-main-to-staging.yml b/.github/workflows/retarget-main-to-staging.yml index 5e1ff8bc..1958a4b9 100644 --- a/.github/workflows/retarget-main-to-staging.yml +++ b/.github/workflows/retarget-main-to-staging.yml @@ -96,7 +96,7 @@ jobs: --body "$(cat <<'BODY' [retarget-bot] This PR was opened against `main` and has been retargeted to `staging` automatically. - **Why:** per [SHARED_RULES rule 8](https://github.com/Molecule-AI/molecule-ai-org-template-molecule-dev/blob/main/SHARED_RULES.md), all feature work targets `staging` first; the CEO promotes `staging → main` separately. + **Why:** per [SHARED_RULES rule 8](https://github.com/molecule-ai/molecule-ai-org-template-molecule-dev/blob/main/SHARED_RULES.md), all feature work targets `staging` first; the CEO promotes `staging → main` separately. **What changed:** just the base branch — no code change. CI will re-run against `staging`. If you get merge conflicts, rebase on `staging`. diff --git a/.github/workflows/secret-scan.yml b/.github/workflows/secret-scan.yml index 2a38d1e4..edea6bf9 100644 --- a/.github/workflows/secret-scan.yml +++ b/.github/workflows/secret-scan.yml @@ -12,7 +12,7 @@ name: Secret scan # # jobs: # secret-scan: -# uses: Molecule-AI/molecule-core/.github/workflows/secret-scan.yml@staging +# uses: molecule-ai/molecule-core/.github/workflows/secret-scan.yml@staging # # Pin to @staging not @main — staging is the active default branch, # main lags via the staging-promotion workflow. Updates ride along diff --git a/workspace-server/cmd/server/bind_test.go b/workspace-server/cmd/server/bind_test.go new file mode 100644 index 00000000..cb3dbe28 --- /dev/null +++ b/workspace-server/cmd/server/bind_test.go @@ -0,0 +1,89 @@ +package main + +import "testing" + +// TestResolveBindHost pins the precedence: BIND_ADDR explicit > dev-mode +// fail-open default of 127.0.0.1 > production-shape empty (all interfaces). +// +// Mutation-test invariant: removing the IsDevModeFailOpen() branch makes +// "no_bindaddr_devmode_unset_admin" fail (returns "" instead of "127.0.0.1"). +// Removing the BIND_ADDR branch makes "explicit_bindaddr_*" cases fail. +func TestResolveBindHost(t *testing.T) { + cases := []struct { + name string + bindAddr string + adminToken string + molEnv string + want string + }{ + { + name: "no_bindaddr_devmode_unset_admin", + bindAddr: "", + adminToken: "", + molEnv: "dev", + want: "127.0.0.1", + }, + { + name: "no_bindaddr_devmode_unset_admin_full_word", + bindAddr: "", + adminToken: "", + molEnv: "development", + want: "127.0.0.1", + }, + { + name: "no_bindaddr_admin_set_in_dev_env", + bindAddr: "", + adminToken: "secret", + molEnv: "dev", + want: "", // ADMIN_TOKEN flips IsDevModeFailOpen to false → all interfaces + }, + { + name: "no_bindaddr_production_env", + bindAddr: "", + adminToken: "", + molEnv: "production", + want: "", // production is not a dev value → all interfaces + }, + { + name: "no_bindaddr_unset_env", + bindAddr: "", + adminToken: "", + molEnv: "", + want: "", // unset MOLECULE_ENV → not dev → all interfaces + }, + { + name: "explicit_bindaddr_loopback_overrides_devmode", + bindAddr: "127.0.0.1", + adminToken: "", + molEnv: "dev", + want: "127.0.0.1", + }, + { + name: "explicit_bindaddr_wildcard_overrides_devmode_default", + bindAddr: "0.0.0.0", + adminToken: "", + molEnv: "dev", + want: "0.0.0.0", + }, + { + name: "explicit_bindaddr_in_production", + bindAddr: "10.0.5.7", + adminToken: "secret", + molEnv: "production", + want: "10.0.5.7", + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Setenv("BIND_ADDR", tc.bindAddr) + t.Setenv("ADMIN_TOKEN", tc.adminToken) + t.Setenv("MOLECULE_ENV", tc.molEnv) + got := resolveBindHost() + if got != tc.want { + t.Errorf("resolveBindHost() = %q, want %q (BIND_ADDR=%q ADMIN_TOKEN=%q MOLECULE_ENV=%q)", + got, tc.want, tc.bindAddr, tc.adminToken, tc.molEnv) + } + }) + } +} diff --git a/workspace-server/cmd/server/main.go b/workspace-server/cmd/server/main.go index a767c190..e7010ff1 100644 --- a/workspace-server/cmd/server/main.go +++ b/workspace-server/cmd/server/main.go @@ -19,6 +19,7 @@ import ( "github.com/Molecule-AI/molecule-monorepo/platform/internal/handlers" "github.com/Molecule-AI/molecule-monorepo/platform/internal/imagewatch" memwiring "github.com/Molecule-AI/molecule-monorepo/platform/internal/memory/wiring" + "github.com/Molecule-AI/molecule-monorepo/platform/internal/middleware" "github.com/Molecule-AI/molecule-monorepo/platform/internal/pendinguploads" "github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner" "github.com/Molecule-AI/molecule-monorepo/platform/internal/registry" @@ -332,15 +333,23 @@ func main() { // Router r := router.Setup(hub, broadcaster, prov, platformURL, configsDir, wh, channelMgr, memBundle) - // HTTP server with graceful shutdown + // HTTP server with graceful shutdown. + // + // Bind host: in dev-mode (no ADMIN_TOKEN, MOLECULE_ENV=dev|development) + // the AdminAuth chain fails open by design; pairing that with a wildcard + // bind would expose unauth /workspaces to any same-LAN peer. Default to + // loopback when fail-open is active. Operators who need LAN exposure set + // BIND_ADDR=0.0.0.0 explicitly. Production (ADMIN_TOKEN set) is unchanged. + // See molecule-core#7. + bindHost := resolveBindHost() srv := &http.Server{ - Addr: fmt.Sprintf(":%s", port), + Addr: fmt.Sprintf("%s:%s", bindHost, port), Handler: r, } // Start server in goroutine go func() { - log.Printf("Platform starting on :%s", port) + log.Printf("Platform starting on %s:%s (dev-mode-fail-open=%v)", bindHost, port, middleware.IsDevModeFailOpen()) if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed { log.Fatalf("Server failed: %v", err) } @@ -375,6 +384,29 @@ func envOr(key, fallback string) string { return fallback } +// resolveBindHost picks the listener interface for the HTTP server. +// +// Precedence: +// 1. BIND_ADDR — explicit operator override (any value, including "0.0.0.0"). +// 2. dev-mode fail-open active → "127.0.0.1" (loopback only). +// 3. otherwise → "" (Go binds every interface; existing prod/self-host shape). +// +// Coupling the loopback default to middleware.IsDevModeFailOpen() means the +// two safety levers — bind narrowness and auth strength — move together. A +// production deploy (ADMIN_TOKEN set) keeps binding to all interfaces because +// the auth chain is doing its job; a dev Mac (no ADMIN_TOKEN, MOLECULE_ENV=dev) +// is reachable only via loopback because the auth chain is fail-open. See +// molecule-core#7 for the original LAN exposure finding. +func resolveBindHost() string { + if v := os.Getenv("BIND_ADDR"); v != "" { + return v + } + if middleware.IsDevModeFailOpen() { + return "127.0.0.1" + } + return "" +} + func findConfigsDir() string { candidates := []string{ "workspace-configs-templates", diff --git a/workspace-server/internal/handlers/plugins.go b/workspace-server/internal/handlers/plugins.go index b25aa544..4befcfe3 100644 --- a/workspace-server/internal/handlers/plugins.go +++ b/workspace-server/internal/handlers/plugins.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "io" + "log" "os" "path/filepath" "strings" @@ -177,16 +178,42 @@ func strDefault(m map[string]interface{}, key, fallback string) string { return fallback } +// findRunningContainer returns the live container name for workspaceID, or "" +// when the container is genuinely not running OR the daemon errored +// transiently. Routed through provisioner.RunningContainerName as the SSOT +// (molecule-core#10) so this handler agrees with healthsweep on the same +// inputs. Transient daemon errors are logged distinctly so triage doesn't +// confuse a flaky daemon with a stopped container. func (h *PluginsHandler) findRunningContainer(ctx context.Context, workspaceID string) string { - if h.docker == nil { + name, err := provisioner.RunningContainerName(ctx, h.docker, workspaceID) + if err != nil { + log.Printf("plugins: docker inspect transient error for %s: %v (treating as not-running for this request)", workspaceID, err) return "" } - name := provisioner.ContainerName(workspaceID) - info, err := h.docker.ContainerInspect(ctx, name) - if err == nil && info.State.Running { - return name + return name +} + +// isExternalRuntime reports whether the workspace's runtime is the +// `external` (remote-pull) shape introduced in Phase 30. External +// workspaces have no local container — `POST /plugins` (push-install via +// docker exec) doesn't apply to them; they pull via the download endpoint +// instead. Returns false (allow-install) if the lookup is unwired or +// errors — failing open here is safe because the downstream +// findRunningContainer step still gates on a real container being there. +// +// Background — molecule-core#10: without this check, external workspaces +// fall through to findRunningContainer's NotFound path and return a +// misleading 503 "container not running" instead of a clear "use the +// pull endpoint" message. +func (h *PluginsHandler) isExternalRuntime(workspaceID string) bool { + if h.runtimeLookup == nil { + return false } - return "" + runtime, err := h.runtimeLookup(workspaceID) + if err != nil { + return false + } + return runtime == "external" } func (h *PluginsHandler) execAsRoot(ctx context.Context, containerName string, cmd []string) (string, error) { diff --git a/workspace-server/internal/handlers/plugins_findrunning_ssot_test.go b/workspace-server/internal/handlers/plugins_findrunning_ssot_test.go new file mode 100644 index 00000000..01c5ec06 --- /dev/null +++ b/workspace-server/internal/handlers/plugins_findrunning_ssot_test.go @@ -0,0 +1,176 @@ +package handlers + +import ( + "go/ast" + "go/parser" + "go/token" + "strings" + "testing" +) + +// TestFindRunningContainer_RoutesThroughProvisionerSSOT is a behavior-based +// AST gate: it pins the invariant that PluginsHandler.findRunningContainer +// MUST go through provisioner.RunningContainerName for its is-running check, +// instead of carrying its own copy of cli.ContainerInspect logic. +// +// Background — molecule-core#10: a parallel impl of "is the workspace's +// container running" used to live in plugins.go. It drifted from the +// canonical impl in healthsweep (which goes through Provisioner.IsRunning +// → RunningContainerName) on edge cases like "transient daemon error" — +// the duplicate would 503 with a misleading message while healthsweep +// correctly stayed defensive. Consolidating onto RunningContainerName as +// the SSOT prevents any future copy from re-introducing that drift. +// +// Mutation invariant: if a future PR replaces the provisioner call with +// `h.docker.ContainerInspect(...)` directly, this test fails. That's the +// signal to either (a) extend RunningContainerName's contract OR (b) +// document why this call site needs to differ. Either way: the drift +// gets a reviewer's attention instead of shipping silently. +func TestFindRunningContainer_RoutesThroughProvisionerSSOT(t *testing.T) { + fset := token.NewFileSet() + file, err := parser.ParseFile(fset, "plugins.go", nil, parser.ParseComments) + if err != nil { + t.Fatalf("parse plugins.go: %v", err) + } + + var fn *ast.FuncDecl + ast.Inspect(file, func(n ast.Node) bool { + f, ok := n.(*ast.FuncDecl) + if !ok || f.Name.Name != "findRunningContainer" { + return true + } + // Confirm receiver is *PluginsHandler so we don't pick up an unrelated + // helper of the same name. ast.Recv is a FieldList — receivers carry + // at most one field. + if f.Recv == nil || len(f.Recv.List) == 0 { + return true + } + fn = f + return false + }) + + if fn == nil { + t.Fatal("findRunningContainer not found in plugins.go — was it renamed? update this test or the SSOT routing assumption") + } + + var ( + callsRunningContainerName bool + callsContainerInspectRaw bool + ) + ast.Inspect(fn.Body, func(n ast.Node) bool { + call, ok := n.(*ast.CallExpr) + if !ok { + return true + } + sel, ok := call.Fun.(*ast.SelectorExpr) + if !ok { + return true + } + // Pkg.Func form: provisioner.RunningContainerName(...) + if pkgIdent, ok := sel.X.(*ast.Ident); ok { + if pkgIdent.Name == "provisioner" && sel.Sel.Name == "RunningContainerName" { + callsRunningContainerName = true + } + } + // Receiver-then-method form: h.docker.ContainerInspect(...) / + // p.cli.ContainerInspect(...) — anything ending in + // .ContainerInspect that's NOT routed through provisioner. + if sel.Sel.Name == "ContainerInspect" { + callsContainerInspectRaw = true + } + return true + }) + + if !callsRunningContainerName { + t.Errorf( + "findRunningContainer must call provisioner.RunningContainerName for the SSOT inspect — see molecule-core#10. Found no such call.", + ) + } + if callsContainerInspectRaw { + t.Errorf( + "findRunningContainer carries a direct ContainerInspect call. This is the parallel-impl drift molecule-core#10 fixed. " + + "Either route through provisioner.RunningContainerName OR — if a new use case truly needs a different inspect — extend RunningContainerName's contract first and update this gate to allow the specific delta.", + ) + } +} + +// TestProvisionerIsRunning_RoutesThroughRunningContainerName mirrors the +// gate above but for the OTHER consumer of the SSOT — Provisioner.IsRunning +// (called by healthsweep). If a future refactor makes IsRunning carry its +// own ContainerInspect again, the two consumers' edge-case behaviors will +// silently drift. Keep them yoked. +func TestProvisionerIsRunning_RoutesThroughRunningContainerName(t *testing.T) { + fset := token.NewFileSet() + file, err := parser.ParseFile(fset, "../provisioner/provisioner.go", nil, parser.ParseComments) + if err != nil { + t.Fatalf("parse provisioner.go: %v", err) + } + + var fn *ast.FuncDecl + ast.Inspect(file, func(n ast.Node) bool { + f, ok := n.(*ast.FuncDecl) + if !ok || f.Name.Name != "IsRunning" || f.Recv == nil { + return true + } + // The receiver type must be *Provisioner specifically. CPProvisioner + // has its own IsRunning that talks HTTP to the controlplane and is + // out of scope for this gate. + if !receiverIs(f, "Provisioner") { + return true + } + fn = f + return false + }) + if fn == nil { + t.Fatal("Provisioner.IsRunning not found — was it renamed? update this test") + } + + var ( + callsRunningContainerName bool + callsContainerInspectRaw bool + ) + ast.Inspect(fn.Body, func(n ast.Node) bool { + call, ok := n.(*ast.CallExpr) + if !ok { + return true + } + // Same-package call: bare identifier (e.g. RunningContainerName(...)). + if id, ok := call.Fun.(*ast.Ident); ok && id.Name == "RunningContainerName" { + callsRunningContainerName = true + return true + } + // Selector call: pkg.Func (e.g. provisioner.RunningContainerName) + // OR recv.Method (e.g. p.cli.ContainerInspect). + sel, ok := call.Fun.(*ast.SelectorExpr) + if !ok { + return true + } + switch sel.Sel.Name { + case "RunningContainerName": + callsRunningContainerName = true + case "ContainerInspect": + callsContainerInspectRaw = true + } + return true + }) + + if !callsRunningContainerName { + t.Errorf("Provisioner.IsRunning must call RunningContainerName for the SSOT inspect — see molecule-core#10") + } + if callsContainerInspectRaw { + t.Errorf("Provisioner.IsRunning carries a direct ContainerInspect call; route through RunningContainerName instead") + } +} + +// receiverIs reports whether fn's receiver is `*` or ``. +func receiverIs(fn *ast.FuncDecl, typeName string) bool { + if fn.Recv == nil || len(fn.Recv.List) == 0 { + return false + } + expr := fn.Recv.List[0].Type + if star, ok := expr.(*ast.StarExpr); ok { + expr = star.X + } + id, ok := expr.(*ast.Ident) + return ok && strings.EqualFold(id.Name, typeName) +} diff --git a/workspace-server/internal/handlers/plugins_install.go b/workspace-server/internal/handlers/plugins_install.go index 23ee1913..1665a54e 100644 --- a/workspace-server/internal/handlers/plugins_install.go +++ b/workspace-server/internal/handlers/plugins_install.go @@ -32,6 +32,18 @@ import ( // inside the workspace at startup. func (h *PluginsHandler) Install(c *gin.Context) { workspaceID := c.Param("id") + // External-runtime guard (molecule-core#10): push-install via docker + // exec is meaningless for `runtime='external'` workspaces — they have + // no local container. Reject early with a hint pointing at the + // pull-mode endpoint, instead of falling through to a misleading + // "container not running" 503 from findRunningContainer. + if h.isExternalRuntime(workspaceID) { + c.JSON(http.StatusUnprocessableEntity, gin.H{ + "error": "plugin install via push is not supported for external runtimes", + "hint": "external workspaces pull plugins via GET /workspaces/:id/plugins/:name/download", + }) + return + } // Cap the JSON body so a pathological POST can't exhaust parser memory. bodyMax := envx.Int64("PLUGIN_INSTALL_BODY_MAX_BYTES", defaultInstallBodyMaxBytes) c.Request.Body = http.MaxBytesReader(c.Writer, c.Request.Body, bodyMax) @@ -93,6 +105,16 @@ func (h *PluginsHandler) Uninstall(c *gin.Context) { pluginName := c.Param("name") ctx := c.Request.Context() + // Mirror Install's external-runtime guard (molecule-core#10) so the + // two endpoints reject the same shape with the same message. + if h.isExternalRuntime(workspaceID) { + c.JSON(http.StatusUnprocessableEntity, gin.H{ + "error": "plugin uninstall via docker exec is not supported for external runtimes", + "hint": "external workspaces manage their own plugin directory; remove it locally", + }) + return + } + if err := validatePluginName(pluginName); err != nil { c.JSON(http.StatusBadRequest, gin.H{"error": "invalid plugin name"}) return diff --git a/workspace-server/internal/handlers/plugins_install_external_test.go b/workspace-server/internal/handlers/plugins_install_external_test.go new file mode 100644 index 00000000..3afe13f6 --- /dev/null +++ b/workspace-server/internal/handlers/plugins_install_external_test.go @@ -0,0 +1,176 @@ +package handlers + +import ( + "bytes" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/gin-gonic/gin" +) + +// TestPluginInstall_ExternalRuntime_Returns422 — molecule-core#10. +// Install on a `runtime='external'` workspace must NOT fall through to +// findRunningContainer (which would 503 with a misleading "container not +// running"). It must return 422 with a hint pointing at the pull-mode +// download endpoint. +func TestPluginInstall_ExternalRuntime_Returns422(t *testing.T) { + h := NewPluginsHandler(t.TempDir(), nil, nil). + WithRuntimeLookup(func(workspaceID string) (string, error) { + return "external", nil + }) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: "ba1789b0-4d21-4f4f-a878-fa226bf77cf5"}} + c.Request = httptest.NewRequest( + "POST", + "/workspaces/ba1789b0-4d21-4f4f-a878-fa226bf77cf5/plugins", + bytes.NewBufferString(`{"source":"local://my-plugin"}`), + ) + c.Request.Header.Set("Content-Type", "application/json") + + h.Install(c) + + if w.Code != http.StatusUnprocessableEntity { + t.Errorf("expected 422 (Unprocessable Entity) for runtime='external', got %d: %s", w.Code, w.Body.String()) + } + if !strings.Contains(w.Body.String(), "external runtimes") { + t.Errorf("expected error body to mention 'external runtimes', got: %s", w.Body.String()) + } + if !strings.Contains(w.Body.String(), "download") { + t.Errorf("expected error body to point at the download endpoint, got: %s", w.Body.String()) + } +} + +// TestPluginUninstall_ExternalRuntime_Returns422 — symmetric guard on the +// uninstall path (DELETE /workspaces/:id/plugins/:name). External +// workspaces manage their own plugin directory locally; the platform +// can't docker-exec into them. +func TestPluginUninstall_ExternalRuntime_Returns422(t *testing.T) { + h := NewPluginsHandler(t.TempDir(), nil, nil). + WithRuntimeLookup(func(workspaceID string) (string, error) { + return "external", nil + }) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{ + {Key: "id", Value: "ba1789b0-4d21-4f4f-a878-fa226bf77cf5"}, + {Key: "name", Value: "my-plugin"}, + } + c.Request = httptest.NewRequest( + "DELETE", + "/workspaces/ba1789b0-4d21-4f4f-a878-fa226bf77cf5/plugins/my-plugin", + nil, + ) + + h.Uninstall(c) + + if w.Code != http.StatusUnprocessableEntity { + t.Errorf("expected 422 for runtime='external', got %d: %s", w.Code, w.Body.String()) + } + if !strings.Contains(w.Body.String(), "external runtimes") { + t.Errorf("expected error body to mention 'external runtimes', got: %s", w.Body.String()) + } +} + +// TestPluginInstall_ContainerBackedRuntime_FallsThroughGuard — the runtime +// guard MUST NOT short-circuit container-backed runtimes. With +// `runtime='claude-code'` the install proceeds past the guard; without a +// real plugin source it'll fail downstream (here: 404 from local resolver +// because no plugin staged), which is the correct error to surface. +// +// This is the mutation-test partner: deleting the `runtime == "external"` +// check would still pass TestPluginInstall_ExternalRuntime (because Install +// would 404 instead of 422 — but the test asserts 422), and would still +// pass this test (because both pre-fix and post-fix produce 404 here). +// What this case pins is "non-external still falls through," catching +// any over-eager guard that rejects all runtimes. +func TestPluginInstall_ContainerBackedRuntime_FallsThroughGuard(t *testing.T) { + h := NewPluginsHandler(t.TempDir(), nil, nil). + WithRuntimeLookup(func(workspaceID string) (string, error) { + return "claude-code", nil + }) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: "c7c28c0b-4ea5-4e75-9728-3ba860081708"}} + c.Request = httptest.NewRequest( + "POST", + "/workspaces/c7c28c0b-4ea5-4e75-9728-3ba860081708/plugins", + bytes.NewBufferString(`{"source":"local://nonexistent-plugin"}`), + ) + c.Request.Header.Set("Content-Type", "application/json") + + h.Install(c) + + if w.Code == http.StatusUnprocessableEntity { + t.Errorf("runtime='claude-code' must fall through the external guard; got 422: %s", w.Body.String()) + } + // The local resolver will fail to find the plugin → 404. Anything + // other than 422 (which would mean we mis-classified) is fine. + if w.Code != http.StatusNotFound { + t.Errorf("expected 404 (plugin not found in registry), got %d: %s", w.Code, w.Body.String()) + } +} + +// TestPluginInstall_NoRuntimeLookup_FailsOpen — when the runtime lookup +// is unwired (test fixtures, niche deploy shapes) the guard MUST default +// to allowing the install attempt. The downstream findRunningContainer +// step still gates on a real container, so failing open here doesn't +// expose a bypass — it just preserves backwards-compat with deployments +// that haven't wired the lookup. +func TestPluginInstall_NoRuntimeLookup_FailsOpen(t *testing.T) { + h := NewPluginsHandler(t.TempDir(), nil, nil) // NO WithRuntimeLookup + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: "ws-no-lookup"}} + c.Request = httptest.NewRequest( + "POST", + "/workspaces/ws-no-lookup/plugins", + bytes.NewBufferString(`{"source":"local://nonexistent"}`), + ) + c.Request.Header.Set("Content-Type", "application/json") + + h.Install(c) + + if w.Code == http.StatusUnprocessableEntity { + t.Errorf("nil runtimeLookup must fall through (fail-open); got 422: %s", w.Body.String()) + } +} + +// TestPluginInstall_RuntimeLookupErrors_FailsOpen — same fail-open story +// for transient DB errors in the lookup. We don't want a momentary +// Postgres hiccup to flip every plugin install into a 422. +func TestPluginInstall_RuntimeLookupErrors_FailsOpen(t *testing.T) { + h := NewPluginsHandler(t.TempDir(), nil, nil). + WithRuntimeLookup(func(workspaceID string) (string, error) { + return "", errFakeDB + }) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: "ws-db-flake"}} + c.Request = httptest.NewRequest( + "POST", + "/workspaces/ws-db-flake/plugins", + bytes.NewBufferString(`{"source":"local://nonexistent"}`), + ) + c.Request.Header.Set("Content-Type", "application/json") + + h.Install(c) + + if w.Code == http.StatusUnprocessableEntity { + t.Errorf("runtimeLookup error must fall through (fail-open); got 422: %s", w.Body.String()) + } +} + +// errFakeDB is a sentinel for the fail-open lookup-error case. +var errFakeDB = &fakeError{msg: "synthetic db error"} + +type fakeError struct{ msg string } + +func (e *fakeError) Error() string { return e.msg } diff --git a/workspace-server/internal/provisioner/provisioner.go b/workspace-server/internal/provisioner/provisioner.go index 0a9797ad..ca399199 100644 --- a/workspace-server/internal/provisioner/provisioner.go +++ b/workspace-server/internal/provisioner/provisioner.go @@ -1073,18 +1073,53 @@ func (p *Provisioner) IsRunning(ctx context.Context, workspaceID string) (bool, if p == nil || p.cli == nil { return false, ErrNoBackend } - name := ContainerName(workspaceID) - info, err := p.cli.ContainerInspect(ctx, name) + name, err := RunningContainerName(ctx, p.cli, workspaceID) if err != nil { - if isContainerNotFound(err) { - return false, nil - } // Transient daemon error: caller treats !running as dead + restarts. // Returning true + the underlying error preserves the error for // metrics/logging without triggering the destructive path. return true, err } - return info.State.Running, nil + return name != "", nil +} + +// RunningContainerName returns the container name for workspaceID iff the +// container exists AND is in the Running state. Single source of truth for +// "what live container should I exec into for this workspace?" — used by +// both Provisioner.IsRunning (healthsweep) and the plugins handler. +// +// Distinguishes three outcomes so callers can pick their own policy: +// +// - ("ws-", nil): container is running. Caller can exec into it. +// - ("", nil): container does not exist OR exists but is stopped +// (NotFound, Exited, Created, Restarting…). Caller +// should treat as a definitive "not running." +// - ("", err): transient daemon error (timeout, socket EOF, ctx +// cancel). Caller should NOT infer "not running" — +// this could be a flaky daemon under load. Decide +// per-callsite whether to fail soft or hard. +// +// Background — molecule-core#10: the plugins handler used to carry its own +// copy of this inspect logic (`findRunningContainer`) which collapsed +// transient errors into the same "" return as a genuinely-stopped container. +// That hid daemon flakes as misleading 503 "container not running" responses +// AND let the two impls drift on edge-case behavior. This is the SSOT. +func RunningContainerName(ctx context.Context, cli *client.Client, workspaceID string) (string, error) { + if cli == nil { + return "", ErrNoBackend + } + name := ContainerName(workspaceID) + info, err := cli.ContainerInspect(ctx, name) + if err != nil { + if isContainerNotFound(err) { + return "", nil + } + return "", err + } + if info.State.Running { + return name, nil + } + return "", nil } // isContainerNotFound returns true when the Docker client indicates the