diff --git a/workspace-server/internal/handlers/registry.go b/workspace-server/internal/handlers/registry.go index 2cb37b67..1d9d5746 100644 --- a/workspace-server/internal/handlers/registry.go +++ b/workspace-server/internal/handlers/registry.go @@ -10,6 +10,7 @@ import ( "net/url" "os" "strings" + "sync" "github.com/Molecule-AI/molecule-monorepo/platform/internal/db" "github.com/Molecule-AI/molecule-monorepo/platform/internal/events" @@ -35,26 +36,38 @@ type blockedRange struct { // trusted by construction. // // Resolution order: -// 1. MOLECULE_DEPLOY_MODE — explicit operator-set flag ("saas" | "self-hosted"). -// Prefer this for new deployments: the signal is unambiguous and can't -// collide with some future non-SaaS path that legitimately needs -// MOLECULE_ORG_ID. -// 2. MOLECULE_ORG_ID presence — implicit legacy signal, kept so deployments -// that haven't yet adopted MOLECULE_DEPLOY_MODE keep working. +// 1. MOLECULE_DEPLOY_MODE set — explicit operator flag is authoritative. +// Recognised values: "saas" → true. "self-hosted" / "selfhosted" / +// "standalone" → false. Any other non-empty value logs a warning and +// falls closed (false) so a typo like MOLECULE_DEPLOY_MODE=prod can't +// silently flip a self-hosted deployment into the relaxed SSRF posture. +// 2. MOLECULE_DEPLOY_MODE unset — fall back to the MOLECULE_ORG_ID presence +// signal for deployments that predate the explicit flag. // // Self-hosted / single-container deployments set neither and keep the strict // blocklist. func saasMode() bool { - mode := strings.ToLower(strings.TrimSpace(os.Getenv("MOLECULE_DEPLOY_MODE"))) - switch mode { - case "saas": - return true - case "self-hosted", "selfhosted", "standalone": - return false + raw := os.Getenv("MOLECULE_DEPLOY_MODE") + trimmed := strings.TrimSpace(raw) + if trimmed != "" { + switch strings.ToLower(trimmed) { + case "saas": + return true + case "self-hosted", "selfhosted", "standalone": + return false + default: + // Warn-once so operators notice the typo without spamming logs. + saasModeWarnUnknownOnce.Do(func() { + log.Printf("saasMode: MOLECULE_DEPLOY_MODE=%q not recognised; falling back to strict (non-SaaS) mode. Valid values: saas | self-hosted.", raw) + }) + return false + } } return strings.TrimSpace(os.Getenv("MOLECULE_ORG_ID")) != "" } +var saasModeWarnUnknownOnce sync.Once + type RegistryHandler struct { broadcaster *events.Broadcaster } diff --git a/workspace-server/internal/handlers/ssrf_test.go b/workspace-server/internal/handlers/ssrf_test.go index 38359329..c58dbbe3 100644 --- a/workspace-server/internal/handlers/ssrf_test.go +++ b/workspace-server/internal/handlers/ssrf_test.go @@ -29,6 +29,13 @@ func TestSaasMode(t *testing.T) { {"explicit standalone wins over legacy org id", "standalone", "some-org", false}, {"whitespace-only deploy mode falls through to legacy", " ", "some-org", true}, {"whitespace-only org id falls through to false", "", " ", false}, + // Typo / unknown values: must fall closed (strict / self-hosted) + // instead of falling through to the MOLECULE_ORG_ID legacy signal. + // Any tenant deployment has MOLECULE_ORG_ID set, so a typo like + // MOLECULE_DEPLOY_MODE=prod used to silently flip into SaaS mode. + {"typo prod falls closed even with org id set", "prod", "some-org", false}, + {"typo SaaS-mode falls closed even with org id set", "SaaS-mode", "some-org", false}, + {"typo production falls closed", "production", "", false}, } for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { diff --git a/workspace-server/internal/handlers/workspace_provision.go b/workspace-server/internal/handlers/workspace_provision.go index 45d4a3bb..256c36d5 100644 --- a/workspace-server/internal/handlers/workspace_provision.go +++ b/workspace-server/internal/handlers/workspace_provision.go @@ -330,24 +330,26 @@ func (h *WorkspaceHandler) buildProvisionerConfig( // provisioning continues — the workspace will get 401 on its first heartbeat // and can recover on the next restart. func (h *WorkspaceHandler) issueAndInjectToken(ctx context.Context, workspaceID string, cfg *provisioner.WorkspaceConfig) { - // SaaS mode skip: the CP provisioner ships workspaces to a remote EC2 - // via user-data and does not carry cfg.ConfigFiles across the wire, so - // any token we mint here never reaches the workspace. Minting it anyway - // would leave a live token in the DB with no plaintext on disk — - // RegistryHandler.requireWorkspaceToken then 401s every /registry/register - // attempt because the workspace has a live token on file (which trips - // bootstrap-allowed) but no way to present it. Defer to the register - // handler's own bootstrap-issuance path, which mints a token only after - // a successful first register and returns the plaintext in the response - // for the runtime to persist locally. - if saasMode() { + // Revoke any existing live tokens FIRST — this must run in both modes. + // In SaaS mode the revoke is load-bearing on re-provision: without it, + // the previous workspace instance's live token sits in the DB, and + // RegistryHandler.requireWorkspaceToken on the fresh instance's first + // /registry/register would reject it (live token exists → no + // bootstrap allowance, but the new workspace has no plaintext because + // the CP provisioner doesn't carry cfg.ConfigFiles across user-data). + // Revoking clears the gate so the register handler's bootstrap path + // can mint a fresh token and return the plaintext in the response. + if err := wsauth.RevokeAllForWorkspace(ctx, db.DB, workspaceID); err != nil { + log.Printf("Provisioner: failed to revoke existing tokens for %s: %v — skipping auth-token injection", workspaceID, err) return } - // Revoke any existing live tokens. If this fails we bail out rather than - // issuing a second live token whose plaintext we can't also deliver. - if err := wsauth.RevokeAllForWorkspace(ctx, db.DB, workspaceID); err != nil { - log.Printf("Provisioner: failed to revoke existing tokens for %s: %v — skipping auth-token injection", workspaceID, err) + // SaaS mode skips the IssueToken + ConfigFiles write because both + // only make sense on the Docker provisioner's volume-mount delivery + // path. The register handler mints a fresh token on first successful + // register and returns the plaintext in the response body for the + // runtime to persist locally. + if saasMode() { return }