diff --git a/docs/runbooks/admin-auth.md b/docs/runbooks/admin-auth.md new file mode 100644 index 00000000..7bf40843 --- /dev/null +++ b/docs/runbooks/admin-auth.md @@ -0,0 +1,62 @@ +# Admin Authentication Runbook + +## Test-token route: lock in staging and production + +The `GET /admin/workspaces/:id/test-token` endpoint mints fresh workspace auth tokens. +It is gated by `TestTokensEnabled()` which returns `true` only when `MOLECULE_ENV != "production"`. + +**Effect**: if `MOLECULE_ENV` is unset or set to `development` / `dev` in a staging or production +tenant, the test-token route remains enabled. While the route is protected by `subtle.ConstantTimeCompare` +against `ADMIN_TOKEN` (returns 404 when disabled, not 403), the safest posture is to lock it +out in any environment where it is not intentionally used. + +### Required: set MOLECULE_ENV in all non-dev environments + +```bash +# In your tenant / EC2 / Railway environment variables: +MOLECULE_ENV=production +``` + +This matches the production tenant default. When `MOLECULE_ENV=production`: + +- `TestTokensEnabled()` → `false` +- `GET /admin/workspaces/:id/test-token` → 404 (route disabled) + +### Startup visibility + +workspace-server logs the test-token route state at boot: + +``` +Platform starting on ... (dev-mode-fail-open=...) +``` + +Additionally, when `TestTokensEnabled()` is `true` (route enabled), the server emits an INFO line +so operators can confirm the setting in logs: + +``` +[molecule-git-token-helper] NOTE: /admin/workspaces/:id/test-token is ENABLED +(running with MOLECULE_ENV != production) +``` + +If you do not see this line and the route is still accessible, verify `MOLECULE_ENV` is not set to +`development`, `dev`, or any value that is not exactly `production`. + +### Dev environments + +In local dev (`MOLECULE_ENV=development` or unset with no `ADMIN_TOKEN`), the test-token route +is intentionally enabled — it is the only way to bootstrap a workspace bearer token without a running +canvas. This is the correct default for developer workstations. + +## Admin bearer token (`ADMIN_TOKEN`) + +The platform uses `ADMIN_TOKEN` as the bearer credential for admin-gated endpoints: + +| Endpoint | Auth method | +|----------|-------------| +| `GET/POST/PATCH/DELETE /workspaces` | `Authorization: Bearer ` | +| `GET /admin/liveness` | `Authorization: Bearer ` | +| `POST /org/import` | `Authorization: Bearer ` | +| `GET /admin/workspaces/:id/test-token` | `Authorization: Bearer ` (enabled only when `MOLECULE_ENV != "production"`) | + +Missing or invalid `ADMIN_TOKEN` → AdminAuth fails open in dev mode (no token set), or +returns 401 in production mode (token set but invalid). diff --git a/workspace-server/cmd/server/main.go b/workspace-server/cmd/server/main.go index 743b6780..1d6ff911 100644 --- a/workspace-server/cmd/server/main.go +++ b/workspace-server/cmd/server/main.go @@ -367,6 +367,9 @@ func main() { // Start server in goroutine go func() { log.Printf("Platform starting on %s:%s (dev-mode-fail-open=%v)", bindHost, port, middleware.IsDevModeFailOpen()) + if handlers.TestTokensEnabled() { + log.Printf("NOTE: /admin/workspaces/:id/test-token is ENABLED (MOLECULE_ENV=%q — set MOLECULE_ENV=production in staging/prod to lock this route)", os.Getenv("MOLECULE_ENV")) + } if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed { log.Fatalf("Server failed: %v", err) } diff --git a/workspace/scripts/molecule-git-token-helper.sh b/workspace/scripts/molecule-git-token-helper.sh index 125d5109..d7862e7f 100755 --- a/workspace/scripts/molecule-git-token-helper.sh +++ b/workspace/scripts/molecule-git-token-helper.sh @@ -46,7 +46,15 @@ # 2. Fetch fresh token from platform API. # 3. If platform is unreachable, fall back to GITHUB_TOKEN / GH_TOKEN # env var (set at container start, valid for up to 60 min). -# 4. If all fail, exit 1 so git falls through to the next credential +# 4. If env is unset, fall back to ${CONFIGS_DIR:-/configs}/.github-token +# static token file (operator-placed PAT as incident workaround). +# Empty file rejected; whitespace stripped before use. +# Written by operator into the agent-writable /configs dir so +# no root and no platform restart needed to activate. +# Both _fetch_token (git path) and _refresh_gh (gh CLI path) use +# this fallback — otherwise git would work but gh auth status would +# still be unauthenticated post-incident. +# 5. If all fail, exit 1 so git falls through to the next credential # helper in the chain (if any). # # # gh CLI integration @@ -197,7 +205,7 @@ _fetch_token_from_api() { echo "${token}" } -# _fetch_token — return a fresh token using cache > API > env fallback chain. +# _fetch_token — return a fresh token using cache > API > env > static fallback chain. # Outputs the raw token string on success; exits non-zero if all sources fail. _fetch_token() { # 1. Try cache first. @@ -222,6 +230,20 @@ _fetch_token() { return 0 fi + # 4. Static token fallback — operator-placed PAT in the agent-writable + # configs dir. Written without root; no platform restart needed. + # Both this helper and _refresh_gh use the same fallback so git + # and gh both recover from a platform outage. + static_token_file="${CONFIGS_DIR:-/configs}/.github-token" + if [ -f "${static_token_file}" ]; then + static_token=$(tr -d '[:space:]' < "${static_token_file}") + if [ -n "${static_token}" ]; then + echo "[molecule-git-token-helper] API + env unreachable, falling back to static .github-token" >&2 + echo "${static_token}" + return 0 + fi + fi + echo "[molecule-git-token-helper] all token sources exhausted" >&2 return 1 } @@ -240,15 +262,36 @@ case "${ACTION}" in # No-op — the platform manages token lifecycle. ;; _fetch_token) - # Return raw token (cache > API > env fallback). + # Return raw token (cache > API > env > static fallback). _fetch_token ;; _refresh_gh) # Refresh cache AND update gh CLI auth in one shot. # Called by molecule-gh-token-refresh.sh background daemon. # Force-bypass cache to get a definitely fresh token. + # + # Chain: API > static fallback. Env is deliberately excluded here — + # _refresh_gh is a background daemon that re-runs every 30 min; + # if we used the env fallback on every cycle the gh CLI would stay + # stuck on a stale env token instead of recovering when the API + # comes back. Static fallback is intentionally operator-activated + # only (file presence gates it). api_token=$(_fetch_token_from_api) || { - echo "[molecule-git-token-helper] _refresh_gh: API fetch failed" >&2 + # API down — try static token fallback. + static_token_file="${CONFIGS_DIR:-/configs}/.github-token" + if [ -f "${static_token_file}" ]; then + static_token=$(tr -d '[:space:]' < "${static_token_file}") + if [ -n "${static_token}" ]; then + echo "[molecule-git-token-helper] _refresh_gh: API unreachable, using static .github-token" >&2 + _write_cache "${static_token}" + echo "${static_token}" | gh auth login --hostname github.com --with-token 2>/dev/null || { + echo "[molecule-git-token-helper] _refresh_gh: gh auth login with static token failed (non-fatal)" >&2 + } + echo "[molecule-git-token-helper] _refresh_gh: static token used successfully" >&2 + return 0 + fi + fi + echo "[molecule-git-token-helper] _refresh_gh: API fetch failed and no static fallback" >&2 exit 1 } _write_cache "${api_token}"