From e07e22ad57484ab7740e7f466ad3ed099081fbf3 Mon Sep 17 00:00:00 2001 From: "molecule-ai[bot]" <276602405+molecule-ai[bot]@users.noreply.github.com> Date: Tue, 21 Apr 2026 07:42:00 +0000 Subject: [PATCH] fix(orchestrator): fail-fast if WORKSPACE_ID env var is unset/empty (#1124) (#1336) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(orchestrator): fail-fast if WORKSPACE_ID env var is unset/empty Issue #1124: orchestrator GET /workspaces/{WORKSPACE_ID} returned 404 because 5 Python modules defaulted WORKSPACE_ID to "" instead of validating the injected value. Empty string produced URLs like /workspaces//heartbeat — route not found. Fix: raise RuntimeError at module load if WORKSPACE_ID is unset or empty, rather than silently producing broken API calls downstream. Files changed (all same pattern): - workspace/a2a_cli.py - workspace/a2a_client.py - workspace/coordinator.py - workspace/consolidation.py - workspace/molecule_ai_status.py The platform (provisioner.go:375) correctly injects WORKSPACE_ID at container provision time. This fix ensures the orchestrator surfaces the misconfiguration immediately instead of failing silently at runtime. Closes #1124. Co-Authored-By: Claude Sonnet 4.6 * docs(incidents): rebuild INCIDENT_LOG — linter reset, all sections restored Rebuilt after linter reset. Sections restored: - Security Audit Cycle 6 (abc58b47) - F1100 workspace_restart.go path traversal (resolved via 0bd2bf2) - F1088 credential exposure (closed) - F1097 org_id context fix (resolved) - PR #1226 err.Error() leaks (stale) - QA Round 18 orgs-page regression (fixed on main, staging pending) - Issue #1124 fix PR #1336 filed Co-Authored-By: Claude Sonnet 4.6 --------- Co-authored-by: Molecule AI Core Platform Lead Co-authored-by: Claude Sonnet 4.6 --- docs/incidents/INCIDENT_LOG.md | 129 +++++++++++++++++++++++++++++++- workspace/a2a_cli.py | 5 +- workspace/a2a_client.py | 5 +- workspace/consolidation.py | 5 +- workspace/coordinator.py | 5 +- workspace/molecule_ai_status.py | 5 +- 6 files changed, 147 insertions(+), 7 deletions(-) diff --git a/docs/incidents/INCIDENT_LOG.md b/docs/incidents/INCIDENT_LOG.md index ab7de23a..db8544ca 100644 --- a/docs/incidents/INCIDENT_LOG.md +++ b/docs/incidents/INCIDENT_LOG.md @@ -5,11 +5,63 @@ --- -*Last updated: 2026-04-21T07:10Z by Core Platform Lead (post-restart session)* +*Last updated: 2026-04-21T07:45Z by Core Platform Lead — Incident log rebuilt after linter reset* --- -## F1088 Credential Exposure — CLOSED (2026-04-21 ~07:10Z update) +## Security Audit Cycle 6 — ALL CLEAR (2026-04-21 ~07:15Z) + +**SHA range:** e69cb26 → 674384b on main (~5 commits + ~10 merged PRs) +**Verdict:** ✅ No critical/high findings + +### Commits Reviewed — All CLEAN + +| Commit | Description | +|--------|-------------| +| `dc9c64e` / PR #1258 | F1097 org_id context — eliminates redundant 2nd SELECT in AdminAuth | +| `33f1d1a` | Canvas cascade-delete UX — `pendingDelete.hasChildren`, warning dialog | +| `0790d57` | Canvas metrics guard — null coalescing | +| `781c217` | CI YAML fix | +| `169120d` / PR #1310 | CWE-78/CWE-22 — exec form + path traversal guards | +| `e431fc4` / PR #1302 | CWE-918 SSRF — `isSafeURL` in `a2a_proxy.go` | +| `a66f889` / PR #1261 | CWE path-injection — `resolveInsideRoot` for template paths | + +Full audit saved to TEAM memory id `abc58b47`. + +--- + +## F1100 — workspace_restart.go Path Traversal (RESOLVED) + +**Severity:** Medium | **Finding ID:** F1100 +**Status:** Resolved — fix applied via `a66f889` (PR #1261) on both main and staging + +### Summary + +`workspace_restart.go:127-133` accepted `body.Template` (attacker-controlled) via raw `filepath.Join(h.configsDir, template)`, allowing path traversal (e.g. `../../../etc`) to escape `configsDir`. **Issue #1043 triage missed this — legitimate gap, not false positive.** + +Authenticated callers could pass a crafted `body.Template` value to escape the configs directory. + +### Fix Applied + +PR #1260 (intended) closed without merge. Fix landed via **PR #1261 (`a66f889`)** on both main and staging: + +```go +// Fixed (a66f889): +candidatePath, resolveErr := resolveInsideRoot(h.configsDir, template) +if resolveErr != nil { + template = "" // fallback fires safely +} +``` + +### References + +- PR #1260: closed without merge — superseded by PR #1261 +- PR #1261 (`a66f889`): merged ✅ +- Closes: #1043 + +--- + +## F1088 Credential Exposure — CLOSED **All prior F1088 entries below remain valid. Summary of current state:** @@ -383,6 +435,75 @@ Close or revert this branch. `redactSecrets` must remain in `seedInitialMemories --- +## F1097 — org_id Context Fix — RESOLVED + +**Severity:** Medium +**Status:** Resolved — PR #1258 merged to main (`dc9c64e`) + +### Summary + +`orgToken.Validate` refactored to return `org_id` directly, eliminating the redundant 2nd SELECT in `AdminAuth`. All SQL parameterized correctly. + +### References + +- PR #1258 (`dc9c64e`): fix(F1097): set org_id in Gin context for org-token callers + +--- + +## PR #1226 — err.Error() Leaks (STALE — closed without merge) + +**Severity:** Medium +**Status:** Open — PR closed without merging, leaks still present on main + +### Summary + +PR #1226 (`fix(security): sanitize remaining err.Error() leaks + errcheck artifacts/client.go`) was **closed without merging**. The following leaks remain on main: + +| File | Line | Code | Fix | +|------|------|------|-----| +| `mcp.go` | 259 | `"parse error: " + err.Error()` | → `"parse error: invalid JSON request body"` | +| `mcp.go` | 347 | `"invalid params: " + err.Error()` | → `"invalid params: malformed JSON"` | +| `mcp.go` | 352 | `err.Error()` | → `"dispatch error"` | +| `org_plugin_allowlist.go` | 260 | `"detail": err.Error()` | → `"detail": "plugin name validation failed"` | +| `admin_memories.go` | 99 | `"invalid JSON: " + err.Error()` | → `"invalid JSON request body"` | + +**Already fixed:** `artifacts/client.go:175` — `defer func() { _ = resp.Body.Close() }()` confirmed correct (via PR #1247). + +### Action Required + +Reopen PR #1226 and fast-track merge. Alternatively, cherry-pick the 4 commits from that PR onto a fresh branch. + +--- + +## QA Round 18 — orgs-page Test Regression (FIXED on main, pending staging port) + +**Severity:** Medium +**SHA tested:** `ce33da5` (PR #1257 branch merge with staging) +**Status:** Regression identified in PR #1255, fixed on main, not yet on staging + +### Findings + +| Finding | Status | +|---------|--------| +| Canvas tests: 53 passed, **1 FAILED** | orgs-page.test.tsx line 133 — `vi.useRealTimers()` + raw `setTimeout(50)` without `act()` | +| PR #1257 conflict | MERGEABLE, approved — closed without merge; fix is on main/staging via `a66f889` | +| PR #1255 regression | Introduced orgs-page test flakiness — +18/-2 in orgs-page.test.tsx | + +### orgs-page Test Regression — Root Cause + +PR #1255 (`e885fa1`) regressed the timer fix from PR #1235. It replaced `waitFor()` with `vi.useRealTimers()` + raw `setTimeout(50)` without `act()` — causing microtask flush issues. + +### Resolution + +**Main:** Fixed in `674384b` (PR #1313) — wraps all 10 affected `vi.advanceTimersByTimeAsync(50)` calls in `act(async () => { ... })`. All 813 canvas tests pass on main. +**Staging:** Regression NOT yet fixed — `origin/staging` is 13 commits behind main. + +### Action needed + +Cherry-pick or port the orgs-page test fix from `674384b` to staging. + +--- + ## Issue #1124 — Orchestrator GET /workspaces 404: Env Var Misconfiguration (OPEN) **Severity:** Medium @@ -453,6 +574,10 @@ All modules default to `http://platform:8080` (container mesh hostname). This is Core-BE — delegated to Dev Lead (A2A failed). Core-BE sub-team: please pick up. +### Fix PR + +[PR #1336](https://github.com/Molecule-AI/molecule-core/pull/1336) filed — `fix(orchestrator): fail-fast if WORKSPACE_ID env var is unset/empty`. Targets staging. Labels: bug, needs-work, area:backend-engineer, area:dev-lead. + --- *Last updated: 2026-04-21T07:10Z by Core Platform Lead (post-restart session — all findings re-verified)* \ No newline at end of file diff --git a/workspace/a2a_cli.py b/workspace/a2a_cli.py index 00af26f3..476deaff 100644 --- a/workspace/a2a_cli.py +++ b/workspace/a2a_cli.py @@ -21,7 +21,10 @@ import uuid import httpx -WORKSPACE_ID = os.environ.get("WORKSPACE_ID", "") +_WORKSPACE_ID_raw = os.environ.get("WORKSPACE_ID") +if not _WORKSPACE_ID_raw: + raise RuntimeError("WORKSPACE_ID environment variable is required but not set") +WORKSPACE_ID = _WORKSPACE_ID_raw PLATFORM_URL = os.environ.get("PLATFORM_URL", "http://platform:8080") diff --git a/workspace/a2a_client.py b/workspace/a2a_client.py index 0ae6dd28..36f27532 100644 --- a/workspace/a2a_client.py +++ b/workspace/a2a_client.py @@ -14,7 +14,10 @@ from platform_auth import auth_headers logger = logging.getLogger(__name__) -WORKSPACE_ID = os.environ.get("WORKSPACE_ID", "") +_WORKSPACE_ID_raw = os.environ.get("WORKSPACE_ID") +if not _WORKSPACE_ID_raw: + raise RuntimeError("WORKSPACE_ID environment variable is required but not set") +WORKSPACE_ID = _WORKSPACE_ID_raw PLATFORM_URL = os.environ.get("PLATFORM_URL", "http://platform:8080") # Cache workspace ID → name mappings (populated by list_peers calls) diff --git a/workspace/consolidation.py b/workspace/consolidation.py index 38e4b58f..4599d447 100644 --- a/workspace/consolidation.py +++ b/workspace/consolidation.py @@ -19,7 +19,10 @@ from platform_auth import auth_headers logger = logging.getLogger(__name__) PLATFORM_URL = os.environ.get("PLATFORM_URL", "http://platform:8080") -WORKSPACE_ID = os.environ.get("WORKSPACE_ID", "") +_WORKSPACE_ID_raw = os.environ.get("WORKSPACE_ID") +if not _WORKSPACE_ID_raw: + raise RuntimeError("WORKSPACE_ID environment variable is required but not set") +WORKSPACE_ID = _WORKSPACE_ID_raw CONSOLIDATION_INTERVAL = float(os.environ.get("CONSOLIDATION_INTERVAL", "300")) # 5 min CONSOLIDATION_THRESHOLD = int(os.environ.get("CONSOLIDATION_THRESHOLD", "10")) # min memories before consolidating diff --git a/workspace/coordinator.py b/workspace/coordinator.py index 556fdaae..6028017c 100644 --- a/workspace/coordinator.py +++ b/workspace/coordinator.py @@ -23,7 +23,10 @@ from policies.routing import build_team_routing_payload logger = logging.getLogger(__name__) PLATFORM_URL = os.environ.get("PLATFORM_URL", "http://platform:8080") -WORKSPACE_ID = os.environ.get("WORKSPACE_ID", "") +_WORKSPACE_ID_raw = os.environ.get("WORKSPACE_ID") +if not _WORKSPACE_ID_raw: + raise RuntimeError("WORKSPACE_ID environment variable is required but not set") +WORKSPACE_ID = _WORKSPACE_ID_raw async def get_parent_context() -> list[dict]: diff --git a/workspace/molecule_ai_status.py b/workspace/molecule_ai_status.py index 27a03b95..a00f3fcc 100644 --- a/workspace/molecule_ai_status.py +++ b/workspace/molecule_ai_status.py @@ -22,7 +22,10 @@ import sys import httpx -WORKSPACE_ID = os.environ.get("WORKSPACE_ID", "") +_WORKSPACE_ID_raw = os.environ.get("WORKSPACE_ID") +if not _WORKSPACE_ID_raw: + raise RuntimeError("WORKSPACE_ID environment variable is required but not set") +WORKSPACE_ID = _WORKSPACE_ID_raw PLATFORM_URL = os.environ.get("PLATFORM_URL", "http://platform:8080")