forked from molecule-ai/molecule-core
fix(platform): address code review — saasMode fallthrough, revoke in SaaS, warn-once on typo
Three Critical issues from the independent review pass: 1. saasMode() typo fallthrough. MOLECULE_DEPLOY_MODE=prod (typo) used to fall through to the MOLECULE_ORG_ID legacy signal, which is set in every tenant. A self-hosted deployment that happened to have MOLECULE_ORG_ID set would silently flip into SaaS mode with the relaxed SSRF posture. Now: non-empty MOLECULE_DEPLOY_MODE that doesn't match the recognised vocabulary falls closed (strict, non- SaaS) and logs a one-shot warning so operators notice the typo. 2. issueAndInjectToken early-return dropped RevokeAllForWorkspace. On re-provision in SaaS mode, the old workspace's live token stayed in the DB. The new workspace's first /registry/register then 401'd because requireWorkspaceToken saw live tokens and skipped the bootstrap-allowed path — and the new workspace had no plaintext to present. Swap the order so revoke runs first in both modes; only the IssueToken + ConfigFiles write is SaaS-skipped. 3. Extended TestSaasMode to cover the typo-fallthrough regression. Three new cases (prod / SaaS-mode / production) pin the fall-closed behaviour. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
1125a029b8
commit
cf107337b6
@ -10,6 +10,7 @@ import (
|
|||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
|
|
||||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
||||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
|
"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
|
||||||
@ -35,26 +36,38 @@ type blockedRange struct {
|
|||||||
// trusted by construction.
|
// trusted by construction.
|
||||||
//
|
//
|
||||||
// Resolution order:
|
// Resolution order:
|
||||||
// 1. MOLECULE_DEPLOY_MODE — explicit operator-set flag ("saas" | "self-hosted").
|
// 1. MOLECULE_DEPLOY_MODE set — explicit operator flag is authoritative.
|
||||||
// Prefer this for new deployments: the signal is unambiguous and can't
|
// Recognised values: "saas" → true. "self-hosted" / "selfhosted" /
|
||||||
// collide with some future non-SaaS path that legitimately needs
|
// "standalone" → false. Any other non-empty value logs a warning and
|
||||||
// MOLECULE_ORG_ID.
|
// falls closed (false) so a typo like MOLECULE_DEPLOY_MODE=prod can't
|
||||||
// 2. MOLECULE_ORG_ID presence — implicit legacy signal, kept so deployments
|
// silently flip a self-hosted deployment into the relaxed SSRF posture.
|
||||||
// that haven't yet adopted MOLECULE_DEPLOY_MODE keep working.
|
// 2. MOLECULE_DEPLOY_MODE unset — fall back to the MOLECULE_ORG_ID presence
|
||||||
|
// signal for deployments that predate the explicit flag.
|
||||||
//
|
//
|
||||||
// Self-hosted / single-container deployments set neither and keep the strict
|
// Self-hosted / single-container deployments set neither and keep the strict
|
||||||
// blocklist.
|
// blocklist.
|
||||||
func saasMode() bool {
|
func saasMode() bool {
|
||||||
mode := strings.ToLower(strings.TrimSpace(os.Getenv("MOLECULE_DEPLOY_MODE")))
|
raw := os.Getenv("MOLECULE_DEPLOY_MODE")
|
||||||
switch mode {
|
trimmed := strings.TrimSpace(raw)
|
||||||
case "saas":
|
if trimmed != "" {
|
||||||
return true
|
switch strings.ToLower(trimmed) {
|
||||||
case "self-hosted", "selfhosted", "standalone":
|
case "saas":
|
||||||
return false
|
return true
|
||||||
|
case "self-hosted", "selfhosted", "standalone":
|
||||||
|
return false
|
||||||
|
default:
|
||||||
|
// Warn-once so operators notice the typo without spamming logs.
|
||||||
|
saasModeWarnUnknownOnce.Do(func() {
|
||||||
|
log.Printf("saasMode: MOLECULE_DEPLOY_MODE=%q not recognised; falling back to strict (non-SaaS) mode. Valid values: saas | self-hosted.", raw)
|
||||||
|
})
|
||||||
|
return false
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return strings.TrimSpace(os.Getenv("MOLECULE_ORG_ID")) != ""
|
return strings.TrimSpace(os.Getenv("MOLECULE_ORG_ID")) != ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var saasModeWarnUnknownOnce sync.Once
|
||||||
|
|
||||||
type RegistryHandler struct {
|
type RegistryHandler struct {
|
||||||
broadcaster *events.Broadcaster
|
broadcaster *events.Broadcaster
|
||||||
}
|
}
|
||||||
|
|||||||
@ -29,6 +29,13 @@ func TestSaasMode(t *testing.T) {
|
|||||||
{"explicit standalone wins over legacy org id", "standalone", "some-org", false},
|
{"explicit standalone wins over legacy org id", "standalone", "some-org", false},
|
||||||
{"whitespace-only deploy mode falls through to legacy", " ", "some-org", true},
|
{"whitespace-only deploy mode falls through to legacy", " ", "some-org", true},
|
||||||
{"whitespace-only org id falls through to false", "", " ", false},
|
{"whitespace-only org id falls through to false", "", " ", false},
|
||||||
|
// Typo / unknown values: must fall closed (strict / self-hosted)
|
||||||
|
// instead of falling through to the MOLECULE_ORG_ID legacy signal.
|
||||||
|
// Any tenant deployment has MOLECULE_ORG_ID set, so a typo like
|
||||||
|
// MOLECULE_DEPLOY_MODE=prod used to silently flip into SaaS mode.
|
||||||
|
{"typo prod falls closed even with org id set", "prod", "some-org", false},
|
||||||
|
{"typo SaaS-mode falls closed even with org id set", "SaaS-mode", "some-org", false},
|
||||||
|
{"typo production falls closed", "production", "", false},
|
||||||
}
|
}
|
||||||
for _, tc := range cases {
|
for _, tc := range cases {
|
||||||
t.Run(tc.name, func(t *testing.T) {
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
|||||||
@ -330,24 +330,26 @@ func (h *WorkspaceHandler) buildProvisionerConfig(
|
|||||||
// provisioning continues — the workspace will get 401 on its first heartbeat
|
// provisioning continues — the workspace will get 401 on its first heartbeat
|
||||||
// and can recover on the next restart.
|
// and can recover on the next restart.
|
||||||
func (h *WorkspaceHandler) issueAndInjectToken(ctx context.Context, workspaceID string, cfg *provisioner.WorkspaceConfig) {
|
func (h *WorkspaceHandler) issueAndInjectToken(ctx context.Context, workspaceID string, cfg *provisioner.WorkspaceConfig) {
|
||||||
// SaaS mode skip: the CP provisioner ships workspaces to a remote EC2
|
// Revoke any existing live tokens FIRST — this must run in both modes.
|
||||||
// via user-data and does not carry cfg.ConfigFiles across the wire, so
|
// In SaaS mode the revoke is load-bearing on re-provision: without it,
|
||||||
// any token we mint here never reaches the workspace. Minting it anyway
|
// the previous workspace instance's live token sits in the DB, and
|
||||||
// would leave a live token in the DB with no plaintext on disk —
|
// RegistryHandler.requireWorkspaceToken on the fresh instance's first
|
||||||
// RegistryHandler.requireWorkspaceToken then 401s every /registry/register
|
// /registry/register would reject it (live token exists → no
|
||||||
// attempt because the workspace has a live token on file (which trips
|
// bootstrap allowance, but the new workspace has no plaintext because
|
||||||
// bootstrap-allowed) but no way to present it. Defer to the register
|
// the CP provisioner doesn't carry cfg.ConfigFiles across user-data).
|
||||||
// handler's own bootstrap-issuance path, which mints a token only after
|
// Revoking clears the gate so the register handler's bootstrap path
|
||||||
// a successful first register and returns the plaintext in the response
|
// can mint a fresh token and return the plaintext in the response.
|
||||||
// for the runtime to persist locally.
|
if err := wsauth.RevokeAllForWorkspace(ctx, db.DB, workspaceID); err != nil {
|
||||||
if saasMode() {
|
log.Printf("Provisioner: failed to revoke existing tokens for %s: %v — skipping auth-token injection", workspaceID, err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Revoke any existing live tokens. If this fails we bail out rather than
|
// SaaS mode skips the IssueToken + ConfigFiles write because both
|
||||||
// issuing a second live token whose plaintext we can't also deliver.
|
// only make sense on the Docker provisioner's volume-mount delivery
|
||||||
if err := wsauth.RevokeAllForWorkspace(ctx, db.DB, workspaceID); err != nil {
|
// path. The register handler mints a fresh token on first successful
|
||||||
log.Printf("Provisioner: failed to revoke existing tokens for %s: %v — skipping auth-token injection", workspaceID, err)
|
// register and returns the plaintext in the response body for the
|
||||||
|
// runtime to persist locally.
|
||||||
|
if saasMode() {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user