From 9a223afba1aeb131dd064ec74dbdf245e2d370bf Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Fri, 24 Apr 2026 21:09:18 -0700 Subject: [PATCH] fix(dotenv,socket): review-driven hardening of .env loader + WS poll MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Independent code review surfaced three required fixes and one cheap optional one. All addressed here. dotenv parser: - `export FOO=bar` was parsed as key `"export FOO"` (with embedded space) and silently os.Setenv'd, so a developer pasting from a direnv `.envrc` would get junk vars. Now strips the prefix. - Quoted values weren't unwrapped: `FOO="hello world"` produced value `"hello world"` with literal quotes. Now strips one matched pair of surrounding `"` or `'`. Inside a quoted value `#` is part of the value, not a comment marker (matches godotenv convention). - UTF-8 BOM at file start (Windows editors) would have produced a first key like U+FEFF + "FOO". Now stripped via TrimPrefix. dotenv loader: - findDotEnv()'s upward walk would happily pick up `~/.env` or a sibling-repo `.env` if the binary was run from `~/Documents/other- project/`. Real foot-gun on shared dev boxes. Now gated on a monorepo sentinel: the candidate directory must contain `workspace-server/go.mod`. Falls through to "no .env found" (= pre-fix behavior) when the sentinel is absent. socket fallback poll: - startFallbackPoll() previously fired only on onclose, so the very first connect attempt — when onclose hasn't fired yet because we never had a successful onopen — left the canvas with no HTTP poll for the duration of the failing handshake (Chrome can hold a SYN-SENT WebSocket open ~75s before giving up). Now also called at the top of connect(); the timer-already-running guard makes it a no-op when one cycle later onclose calls it again. Test coverage added: export prefix, single+double quoted values, hash inside quotes preserved, unterminated quote falls back to bare value, CRLF stripping locked in, BOM stripping, and a sentinel-rejection regression test that creates a temp .env with no workspace-server sibling and asserts findDotEnv refuses to load it. Verified: 985 canvas tests + 30 dotenv subtests + 4 dotenv integration tests all pass; tsc clean; rebuilt platform from monorepo root with stripped env still loads .env (49 vars) and /workspaces returns 200. Co-Authored-By: Claude Opus 4.7 (1M context) --- canvas/src/store/socket.ts | 10 +++ workspace-server/cmd/server/dotenv.go | 65 +++++++++++++++--- workspace-server/cmd/server/dotenv_test.go | 78 +++++++++++++++++++--- 3 files changed, 137 insertions(+), 16 deletions(-) diff --git a/canvas/src/store/socket.ts b/canvas/src/store/socket.ts index f3b8f99f..16d60685 100644 --- a/canvas/src/store/socket.ts +++ b/canvas/src/store/socket.ts @@ -105,6 +105,16 @@ class ReconnectingSocket { connect() { if (this.disposed) return; useCanvasStore.getState().setWsStatus("connecting"); + // Start the HTTP fallback poll up-front, not just on onclose. Two + // scenarios this guards against: + // 1. The very first connect attempt — onclose hasn't fired yet + // because we never had a successful onopen. + // 2. A failed handshake where the browser takes tens of seconds + // to surface as onclose (Chrome can hold a SYN-SENT WebSocket + // open for ~75s before giving up). + // Idempotent — startFallbackPoll early-returns if a timer is + // already running, so calling it from both places is cheap. + this.startFallbackPoll(); const ws = new WebSocket(this.url); this.ws = ws; diff --git a/workspace-server/cmd/server/dotenv.go b/workspace-server/cmd/server/dotenv.go index 6142f156..ee5765d3 100644 --- a/workspace-server/cmd/server/dotenv.go +++ b/workspace-server/cmd/server/dotenv.go @@ -71,6 +71,14 @@ func loadDotEnvIfPresent() { // findDotEnv returns the path of the nearest .env file walking upward // from CWD. Capped at 6 levels so a deeply-nested launch dir doesn't // scan the entire filesystem. +// +// Sentinel gate: only accept a .env that sits next to `workspace-server/` +// (the monorepo marker). Without it, a developer running the binary from +// `~/Documents/other-project/` would walk up to `~/.env` and load +// arbitrary variables — a real foot-gun on shared dev machines and a +// possible information-leak vector on bare-metal deploys. Skipping the +// match falls through to "no .env found" which is identical to today's +// pre-fix behavior (the operator must export env explicitly). func findDotEnv() (string, bool) { dir, err := os.Getwd() if err != nil { @@ -79,7 +87,12 @@ func findDotEnv() (string, bool) { for i := 0; i < 6; i++ { p := filepath.Join(dir, ".env") if st, err := os.Stat(p); err == nil && !st.IsDir() { - return p, true + if isMonorepoRoot(dir) { + return p, true + } + // .env exists here but the directory isn't the monorepo + // root — keep walking. Loading it could clobber + // environment with values from an unrelated project. } parent := filepath.Dir(dir) if parent == dir { @@ -90,26 +103,62 @@ func findDotEnv() (string, bool) { return "", false } +// isMonorepoRoot returns true if `dir` looks like the molecule-core +// monorepo root — the directory that owns the .env we want to load. +// The marker is `workspace-server/go.mod`, which is the canonical +// in-tree go module and exists only in this monorepo. A simple +// `workspace-server/` directory check would false-positive on a fork +// that renamed the dir; the go.mod check is more precise. +func isMonorepoRoot(dir string) bool { + st, err := os.Stat(filepath.Join(dir, "workspace-server", "go.mod")) + return err == nil && !st.IsDir() +} + // parseDotEnvLine parses a single .env line. Returns (key, value, true) // for KEY=VALUE pairs. Returns (_, _, false) for blanks, comments, and -// malformed lines. Supports inline `# comment` after a value when -// preceded by whitespace, matching the convention already in the -// repo's .env file. +// malformed lines. Handles: +// - leading `export ` prefix (so shell-friendly .env files written +// for `source .env` or direnv work without modification) +// - leading UTF-8 BOM on the first line (Windows editors) +// - inline `# comment` after a value when preceded by whitespace +// - surrounding `"` or `'` quotes on the value (stripped one matched +// pair); inside a quoted value, `#` is part of the value, not a +// comment marker func parseDotEnvLine(line string) (string, string, bool) { + // Strip a UTF-8 BOM if present. bufio.Scanner doesn't filter it, + // so the very first line of a Windows-edited .env would otherwise + // produce a key like U+FEFF + "FOO" that os.Setenv silently accepts. + line = strings.TrimPrefix(line, "\ufeff") line = strings.TrimSpace(line) if line == "" || strings.HasPrefix(line, "#") { return "", "", false } + // Drop a leading `export ` so lines like `export FOO=bar` (the + // form direnv and many `.env` templates emit) don't end up as a + // junk key with an embedded space. + line = strings.TrimPrefix(line, "export ") + line = strings.TrimLeft(line, " \t") // re-trim in case `export` itself had trailing space eq := strings.IndexByte(line, '=') if eq <= 0 { return "", "", false } k := strings.TrimSpace(line[:eq]) v := line[eq+1:] - // Strip inline comment introduced by whitespace + `#`. A bare `#` - // inside the value (no preceding whitespace) is part of the value - // — matches the convention in dotenv parsers and lets values like - // `KEY=token#fragment` round-trip. + // Quoted value: strip one matched pair of surrounding quotes and + // take the contents verbatim (no inline-comment splitting). Matches + // the godotenv convention so values with leading/trailing spaces or + // `#` survive round-trip. + v = strings.TrimLeft(v, " \t") + if len(v) >= 2 { + first := v[0] + if (first == '"' || first == '\'') && v[len(v)-1] == first { + return k, v[1 : len(v)-1], true + } + } + // Bare value: strip inline comment introduced by whitespace + `#`. + // A bare `#` inside the value (no preceding whitespace) is part of + // the value — matches dotenv parsers and lets `KEY=token#fragment` + // round-trip. for _, sep := range []string{" #", "\t#"} { if i := strings.Index(v, sep); i >= 0 { v = v[:i] diff --git a/workspace-server/cmd/server/dotenv_test.go b/workspace-server/cmd/server/dotenv_test.go index 5771835b..2ce1159b 100644 --- a/workspace-server/cmd/server/dotenv_test.go +++ b/workspace-server/cmd/server/dotenv_test.go @@ -35,6 +35,31 @@ func TestParseDotEnvLine(t *testing.T) { {in: "FOO=", k: "FOO", v: "", ok: true, comment: "empty value"}, {in: "ADMIN_TOKEN=", k: "ADMIN_TOKEN", v: "", ok: true, comment: "empty value (production gate sentinel)"}, + + // `export` prefix: shell-friendly .env files (direnv, .envrc-style) + // — the prefix must be stripped, NOT folded into the key. + {in: "export FOO=bar", k: "FOO", v: "bar", ok: true, comment: "export prefix stripped"}, + {in: " export FOO=bar", k: "FOO", v: "bar", ok: true, comment: "leading whitespace + export"}, + {in: "export DATABASE_URL=postgres://u:p@h/db", k: "DATABASE_URL", v: "postgres://u:p@h/db", ok: true, comment: "export with URL value"}, + + // Quoted values: one matched pair of surrounding quotes is + // stripped; embedded `#` survives because it isn't an inline + // comment inside a quote. + {in: `FOO="hello world"`, k: "FOO", v: "hello world", ok: true, comment: "double-quoted value"}, + {in: `FOO='hello world'`, k: "FOO", v: "hello world", ok: true, comment: "single-quoted value"}, + {in: `FOO="value # not a comment"`, k: "FOO", v: "value # not a comment", ok: true, comment: "hash inside quotes is part of value"}, + {in: `FOO= "padded"`, k: "FOO", v: "padded", ok: true, comment: "whitespace before opening quote"}, + {in: `FOO="unterminated`, k: "FOO", v: `"unterminated`, ok: true, comment: "unterminated quote stays as bare value"}, + + // CRLF endings: bufio.Scanner strips \n; \r is left and stripped + // by the value-side TrimSpace. Locking this in so a future + // refactor doesn't accidentally feed \r into os.Setenv. + {in: "FOO=bar\r", k: "FOO", v: "bar", ok: true, comment: "CRLF trailing carriage return stripped"}, + + // UTF-8 BOM at file start: a Windows-edited .env begins with + // \xEF\xBB\xBF; without explicit stripping the first key would + // be "\ufeffFOO". + {in: "\ufeffFOO=bar", k: "FOO", v: "bar", ok: true, comment: "UTF-8 BOM stripped"}, } for _, tc := range cases { @@ -53,13 +78,27 @@ func TestParseDotEnvLine(t *testing.T) { } } -func TestLoadDotEnvIfPresent_PreservesExisting(t *testing.T) { +// makeFakeMonorepo creates a temp dir that satisfies isMonorepoRoot() +// (i.e., contains workspace-server/go.mod) plus a .env file with the +// given body. Returns the dir so the caller can chdir into it. +func makeFakeMonorepo(t *testing.T, envBody string) string { + t.Helper() dir := t.TempDir() - envPath := filepath.Join(dir, ".env") - body := []byte("DOTENV_TEST_NEW=from_file\nDOTENV_TEST_EXISTING=from_file\n") - if err := os.WriteFile(envPath, body, 0o644); err != nil { + wsDir := filepath.Join(dir, "workspace-server") + if err := os.MkdirAll(wsDir, 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(wsDir, "go.mod"), []byte("module fake\n"), 0o644); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(dir, ".env"), []byte(envBody), 0o644); err != nil { t.Fatalf("write .env: %v", err) } + return dir +} + +func TestLoadDotEnvIfPresent_PreservesExisting(t *testing.T) { + dir := makeFakeMonorepo(t, "DOTENV_TEST_NEW=from_file\nDOTENV_TEST_EXISTING=from_file\n") // Pre-set one of the keys — file value must NOT clobber it. t.Setenv("DOTENV_TEST_EXISTING", "from_real_env") @@ -104,10 +143,7 @@ func TestLoadDotEnvIfPresent_NoFile_NoOp(t *testing.T) { } func TestFindDotEnv_WalksUpward(t *testing.T) { - root := t.TempDir() - if err := os.WriteFile(filepath.Join(root, ".env"), []byte("X=1\n"), 0o644); err != nil { - t.Fatal(err) - } + root := makeFakeMonorepo(t, "X=1\n") nested := filepath.Join(root, "a", "b", "c") if err := os.MkdirAll(nested, 0o755); err != nil { t.Fatal(err) @@ -135,3 +171,29 @@ func TestFindDotEnv_WalksUpward(t *testing.T) { t.Errorf("findDotEnv() = %q, want %q", got, want) } } + +func TestFindDotEnv_RejectsUnrelatedDotEnv(t *testing.T) { + // Simulates a developer running the binary from inside an + // unrelated project tree that happens to have its own .env (or + // from $HOME with a personal ~/.env). Without the monorepo + // sentinel, findDotEnv would happily load it and clobber env + // with arbitrary values — a real foot-gun this regression test + // guards against. + dir := t.TempDir() + if err := os.WriteFile(filepath.Join(dir, ".env"), []byte("LEAKY=value\n"), 0o644); err != nil { + t.Fatal(err) + } + + prev, err := os.Getwd() + if err != nil { + t.Fatal(err) + } + if err := os.Chdir(dir); err != nil { + t.Fatal(err) + } + t.Cleanup(func() { _ = os.Chdir(prev) }) + + if got, ok := findDotEnv(); ok { + t.Errorf("findDotEnv() = %q, ok=true; want ok=false (no workspace-server sibling)", got) + } +}