From e1cdb5c9c6c0377db594640cd4e647fa8d7367c7 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Wed, 15 Apr 2026 21:17:37 -0700 Subject: [PATCH] fix(security): /transcript endpoint fails closed when auth token missing (#328) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Severity HIGH. The /transcript route in main.py used `if expected:` around the bearer-token compare, so `get_token()` returning None (no /configs/.auth_token on disk — bootstrap window, deleted file, OSError) silently skipped the entire auth check. Any container on molecule-monorepo-net could GET /transcript during the provisioning window and walk away with the full session log (user messages, Claude tool calls, assistant replies). The platform's TranscriptHandler always has a valid token (it acquired one at workspace registration), so tightening this gate has no legitimate-caller impact. Only unauthenticated sniffers lose access, which was never the intended contract of #287. Fix: 1. Extracted the auth gate into `workspace-template/transcript_auth.py` — a 20-line module with no heavy imports so the security-critical code is unit-testable without standing up the full uvicorn/a2a/httpx stack (the former inline guard could only be tested end-to-end, which explains why the regression shipped in #287). 2. `transcript_authorized(expected, auth_header)` returns False when `expected` is None or empty — the #328 fix — and otherwise does strict equality against "Bearer ". 3. main.py's inline handler calls the extracted function: if not _transcript_authorized(get_token(), auth_header): return 401 4. New tests/test_transcript_auth.py covers: None token, empty token, valid bearer, wrong bearer, missing header, case-sensitive prefix, whitespace fuzzing. All 7 pass. Closes #328 Co-Authored-By: Claude Opus 4.6 (1M context) --- workspace-template/main.py | 22 +++++++-- .../tests/test_transcript_auth.py | 47 +++++++++++++++++++ workspace-template/transcript_auth.py | 30 ++++++++++++ 3 files changed, 94 insertions(+), 5 deletions(-) create mode 100644 workspace-template/tests/test_transcript_auth.py create mode 100644 workspace-template/transcript_auth.py diff --git a/workspace-template/main.py b/workspace-template/main.py index 60766e71..e339ab6c 100644 --- a/workspace-template/main.py +++ b/workspace-template/main.py @@ -45,6 +45,11 @@ def get_machine_ip() -> str: # pragma: no cover return "127.0.0.1" +# Re-exported from transcript_auth for the inline /transcript handler. +# Separate module keeps the security-critical gate import-light + unit-testable. +from transcript_auth import transcript_authorized as _transcript_authorized + + async def main(): # pragma: no cover workspace_id = os.environ.get("WORKSPACE_ID", "workspace-default") config_path = os.environ.get("WORKSPACE_CONFIG_PATH", "/configs") @@ -293,12 +298,19 @@ async def main(): # pragma: no cover # Require workspace bearer token — the same token issued at registration # and stored in /configs/.auth_token. Any container on molecule-monorepo-net # could otherwise read the full session log. Closes #287. + # + # #328: fail CLOSED when the token file is unavailable. get_token() + # returns None during the bootstrap window (first register hasn't + # completed), if /configs/.auth_token was deleted, or on OSError. + # The old `if expected:` guard treated all three cases as "skip + # auth" — an unauthenticated container on the same Docker network + # could read the entire session log during that window. Deny + # instead. The platform's TranscriptHandler acquires the token + # during registration, so once the bootstrap completes it always + # has a valid credential to present. from platform_auth import get_token - expected = get_token() - if expected: - auth_header = request.headers.get("Authorization", "") - if auth_header != f"Bearer {expected}": - return JSONResponse({"error": "unauthorized"}, status_code=401) + if not _transcript_authorized(get_token(), request.headers.get("Authorization", "")): + return JSONResponse({"error": "unauthorized"}, status_code=401) try: since = int(request.query_params.get("since", "0")) limit = int(request.query_params.get("limit", "100")) diff --git a/workspace-template/tests/test_transcript_auth.py b/workspace-template/tests/test_transcript_auth.py new file mode 100644 index 00000000..e28f4d21 --- /dev/null +++ b/workspace-template/tests/test_transcript_auth.py @@ -0,0 +1,47 @@ +"""Tests for the #328 fix — /transcript endpoint must fail-CLOSED when +the workspace auth token is not yet on disk. + +Prior behaviour (regressed in #287): `if expected:` skipped the auth +check when `get_token()` returned None, so any container on +`molecule-monorepo-net` could read the full session log during the +bootstrap window. The fix lifts the guard into transcript_auth.py for +testability. +""" + +from transcript_auth import transcript_authorized + + +def test_missing_token_fails_closed(): + # #328 regression: None token MUST return False (was the fail-open bug). + assert transcript_authorized(None, "Bearer anything") is False + + +def test_empty_token_fails_closed(): + # Empty string is as-bad-as None — also a fail-closed case. + assert transcript_authorized("", "Bearer anything") is False + + +def test_valid_bearer_passes(): + assert transcript_authorized("tok-123", "Bearer tok-123") is True + + +def test_wrong_bearer_fails(): + assert transcript_authorized("tok-123", "Bearer other-token") is False + + +def test_missing_header_fails_even_when_expected_is_set(): + # Empty auth header (not sent at all) must fail — client forgot. + assert transcript_authorized("tok-123", "") is False + + +def test_case_sensitive_bearer_prefix(): + # Strict equality matches platform wsauth.BearerTokenFromHeader + # which is also case-sensitive on the "Bearer " prefix. Documenting + # the behavior so a future refactor is a conscious choice. + assert transcript_authorized("tok-123", "bearer tok-123") is False + + +def test_extra_whitespace_in_header_fails(): + # Strict equality — accidental double space between Bearer and token + # must fail so an adversary can't test fuzzed variations. + assert transcript_authorized("tok-123", "Bearer tok-123") is False diff --git a/workspace-template/transcript_auth.py b/workspace-template/transcript_auth.py new file mode 100644 index 00000000..49b0f622 --- /dev/null +++ b/workspace-template/transcript_auth.py @@ -0,0 +1,30 @@ +"""Auth gate for the /transcript Starlette route. + +Extracted from main.py so the security-critical logic is unit-testable +without standing up the full uvicorn/a2a/httpx import stack. + +#328: the route must fail CLOSED when the expected token is unavailable +(bootstrap window, missing file, OSError). The previous implementation +treated a missing token as "skip auth entirely" — any container on the +same Docker network could read the session log during provisioning. +""" + + +def transcript_authorized(expected_token: str | None, auth_header: str) -> bool: + """Return True iff /transcript should serve the request. + + Args: + expected_token: the workspace's registered bearer token, or None + if `/configs/.auth_token` is absent / unreadable. + auth_header: raw value of the Authorization request header. + + Behavior: + - None/empty expected → fail closed (401). This is the #328 fix; + a missing token file is an auth failure, not a bypass. + - Non-empty expected: strict equality check against "Bearer ". + Bearer prefix is case-sensitive (matches the platform's + wsauth.BearerTokenFromHeader contract). + """ + if not expected_token: + return False + return auth_header == f"Bearer {expected_token}"