feat(saas): deliver platform_inbound_secret via /registry/register (RFC #2312, PR-F)

Closes the SaaS-side gap that PR-A acknowledged but didn't fix: SaaS
workspaces have no persistent /configs volume, so the platform_inbound_secret
that PR-A's provisioner wrote at workspace creation never reaches the
runtime. Without this, even after the entire RFC #2312 stack lands,
SaaS chat upload would 401 (workspace fails-closed when /configs/.platform_inbound_secret
is missing).

Solution: return the secret in the /registry/register response body
on every register call. The runtime extracts it and persists to
/configs/.platform_inbound_secret at mode 0600. Idempotent — Docker-
mode workspaces also receive it and overwrite the value the provisioner
already wrote (same value until rotation).

Why on every register, not just first-register:
  * SaaS containers can be restarted (deploys, drains, EBS detach/
    re-attach) — /configs is rebuilt empty on each fresh start.
  * The auth_token is "issue once" because re-issuing rotates and
    invalidates the previous one. The inbound secret has no rotation
    flow yet (#2318) so re-sending the same value is harmless.
  * Eliminates the bootstrap window where a restarted SaaS workspace
    has no inbound secret on disk and would 401 every platform call.

Changes:
  * workspace-server/internal/handlers/registry.go — Register handler
    reads workspaces.platform_inbound_secret via wsauth.ReadPlatformInboundSecret
    and includes it in the response body. Legacy workspaces (NULL
    column) get a successful registration with the field omitted.
  * workspace-server/internal/handlers/registry_test.go — two new tests:
      - TestRegister_ReturnsPlatformInboundSecret_RFC2312_PRF: secret
        present in DB → secret in response, alongside auth_token.
      - TestRegister_NoInboundSecret_OmitsField: NULL column → field
        omitted, registration still 200.
  * workspace/platform_inbound_auth.py — adds save_inbound_secret(secret).
    Atomic write via tmp + os.replace, mode 0600 from os.open(O_CREAT,
    0o600) so a concurrent reader never sees 0644-default. Resets the
    in-process cache after write so the next get_inbound_secret() returns
    the freshly-written value (rotation-safe when it lands).
  * workspace/main.py — register-response handler extracts
    platform_inbound_secret alongside auth_token and persists via
    save_inbound_secret. Mirrors the existing save_token pattern.
  * workspace/tests/test_platform_inbound_auth.py — 6 new tests for
    save_inbound_secret: writes file, mode 0600, overwrite-existing,
    cache invalidation after save, empty-input no-op, parent-dir creation
    for fresh installs.

Test results:
  * go test ./internal/handlers/ ./internal/wsauth/ — all green
  * pytest workspace/tests/ — 1272 passed (was 1266 before this PR)

Refs #2312 (parent RFC), #2308 (chat upload 503 incident).
Stacks: PR-A #2313 → PR-B #2314 → PR-C #2315 → this PR.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hongming Wang 2026-04-29 15:12:34 -07:00
parent c02cb0e1b6
commit 055e447355
5 changed files with 250 additions and 0 deletions

View File

@ -341,6 +341,27 @@ func (h *RegistryHandler) Register(c *gin.Context) {
log.Printf("Registry: token existence check failed for %s: %v", payload.ID, hasLiveErr)
}
// RFC #2312 PR-F: return the workspace's platform_inbound_secret so SaaS
// workspaces (which have no persistent /configs volume across container
// restarts) can re-populate /configs/.platform_inbound_secret on every
// register call. Docker-mode workspaces also receive it — the workspace-
// side write is idempotent (same value every call until a future
// rotation flow lands), so the duplication is harmless.
//
// NOT gated by hasLive: the inbound secret is minted at workspace
// creation in workspace_provision.go (PR-A), independent of the
// outbound auth_token's "issue once" lifecycle. Returning it here is
// the only delivery path for SaaS, where the platform's CP provisioner
// has no volume to write into.
if secret, secretErr := wsauth.ReadPlatformInboundSecret(ctx, db.DB, payload.ID); secretErr == nil {
response["platform_inbound_secret"] = secret
} else if !errors.Is(secretErr, wsauth.ErrNoInboundSecret) {
// ErrNoInboundSecret is the expected case for legacy workspaces
// that predate migration 044 — quiet. Other errors (DB hiccup, etc.)
// log loud so ops notices.
log.Printf("Registry: read platform_inbound_secret for %s failed: %v", payload.ID, secretErr)
}
c.JSON(http.StatusOK, response)
}

View File

@ -889,6 +889,129 @@ func TestRegister_C18_BootstrapAllowedNoTokens(t *testing.T) {
}
}
// TestRegister_ReturnsPlatformInboundSecret_RFC2312_PRF verifies that
// /registry/register includes the workspace's platform_inbound_secret
// in the response body when one is on file. This is the SaaS delivery
// path: SaaS workspaces have no persistent /configs volume, so they
// re-fetch the secret on every register call (idempotent in Docker mode
// where the provisioner already wrote the same value to the volume at
// workspace creation).
func TestRegister_ReturnsPlatformInboundSecret_RFC2312_PRF(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewRegistryHandler(broadcaster)
const wsID = "00000000-0000-0000-0000-000000002312"
const inboundSecret = "the-platform-inbound-secret-value"
// requireWorkspaceToken — bootstrap allowed (no live tokens).
mock.ExpectQuery("SELECT COUNT\\(\\*\\) FROM workspace_auth_tokens").
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
// Workspace upsert.
mock.ExpectExec("INSERT INTO workspaces").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectQuery("SELECT url FROM workspaces WHERE id").
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"url"}).AddRow("http://localhost:9100"))
mock.ExpectExec("INSERT INTO structure_events").
WillReturnResult(sqlmock.NewResult(0, 1))
// Phase 30.1 token issuance — first-register path.
mock.ExpectQuery("SELECT COUNT\\(\\*\\) FROM workspace_auth_tokens").
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
mock.ExpectExec("INSERT INTO workspace_auth_tokens").
WillReturnResult(sqlmock.NewResult(1, 1))
// RFC #2312 PR-F: ReadPlatformInboundSecret query — returns the value
// the provisioner stored at workspace creation. The handler MUST
// include this in the response body so the workspace can persist it
// to /configs/.platform_inbound_secret.
mock.ExpectQuery(`SELECT platform_inbound_secret FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"platform_inbound_secret"}).AddRow(inboundSecret))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest("POST", "/registry/register",
bytes.NewBufferString(`{"id":"`+wsID+`","url":"http://localhost:9100","agent_card":{"name":"x"}}`))
c.Request.Header.Set("Content-Type", "application/json")
handler.Register(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("parse response: %v", err)
}
got, ok := resp["platform_inbound_secret"].(string)
if !ok {
t.Fatalf("expected platform_inbound_secret in response, got: %v", resp)
}
if got != inboundSecret {
t.Errorf("secret mismatch: got %q, want %q", got, inboundSecret)
}
// auth_token should also be present (first-register path).
if resp["auth_token"] == nil {
t.Error("expected auth_token in response (first-register path)")
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet expectations: %v", err)
}
}
// TestRegister_NoInboundSecret_OmitsField verifies that legacy workspaces
// that predate migration 044 (NULL platform_inbound_secret column) still
// get a successful registration — the field is just omitted from the
// response. The Register handler logs the absence quietly.
func TestRegister_NoInboundSecret_OmitsField(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewRegistryHandler(broadcaster)
const wsID = "00000000-0000-0000-0000-000000002312"
mock.ExpectQuery("SELECT COUNT\\(\\*\\) FROM workspace_auth_tokens").
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
mock.ExpectExec("INSERT INTO workspaces").WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectQuery("SELECT url FROM workspaces WHERE id").
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"url"}).AddRow("http://localhost:9100"))
mock.ExpectExec("INSERT INTO structure_events").WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectQuery("SELECT COUNT\\(\\*\\) FROM workspace_auth_tokens").
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
mock.ExpectExec("INSERT INTO workspace_auth_tokens").WillReturnResult(sqlmock.NewResult(1, 1))
// NULL secret — legacy workspace.
mock.ExpectQuery(`SELECT platform_inbound_secret FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"platform_inbound_secret"}).AddRow(nil))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest("POST", "/registry/register",
bytes.NewBufferString(`{"id":"`+wsID+`","url":"http://localhost:9100","agent_card":{"name":"x"}}`))
c.Request.Header.Set("Content-Type", "application/json")
handler.Register(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200 even without inbound secret, got %d", w.Code)
}
var resp map[string]interface{}
_ = json.Unmarshal(w.Body.Bytes(), &resp)
if _, present := resp["platform_inbound_secret"]; present {
t.Errorf("expected platform_inbound_secret to be ABSENT for legacy workspace, got: %v", resp["platform_inbound_secret"])
}
}
// TestRegister_C18_HijackBlockedNoBearer verifies the C18 attack is blocked:
// when a workspace already has a live token, /register without a bearer → 401.
func TestRegister_C18_HijackBlockedNoBearer(t *testing.T) {

View File

@ -310,6 +310,17 @@ async def main(): # pragma: no cover
from platform_auth import save_token
save_token(tok)
print(f"Saved workspace auth token (prefix={tok[:8]}…)")
# RFC #2312 PR-F: persist platform_inbound_secret if the
# platform supplied one. Idempotent — writing the same
# value over an existing file is harmless. Required for
# SaaS where there's no persistent /configs volume; on
# Docker mode it overwrites the value the provisioner
# already wrote at workspace creation.
inbound = body.get("platform_inbound_secret")
if inbound:
from platform_inbound_auth import save_inbound_secret
save_inbound_secret(inbound)
print(f"Saved platform_inbound_secret (prefix={inbound[:8]}…)")
except Exception as parse_exc:
print(f"Warning: couldn't parse register response for token: {parse_exc}")
except Exception as e:

View File

@ -72,6 +72,47 @@ def reset_cache() -> None:
_cached_secret = None
def save_inbound_secret(secret: str) -> None:
"""Persist a freshly-received platform_inbound_secret to disk.
Called from the /registry/register response handler when the platform
returns a `platform_inbound_secret` field. Mirrors platform_auth.save_token's
pattern: 0600 file in CONFIGS_DIR, atomic write via tmp + rename so a
concurrent reader never sees a partial file.
Idempotent: writing the same value over an existing file is a no-op
from the workspace's perspective. Resets the in-process cache so the
next get_inbound_secret() returns the freshly-written value (matters
when a future rotation flow lands and the platform sends a different
secret on a subsequent register call).
"""
global _cached_secret
if not secret:
return
path = _secret_file()
path.parent.mkdir(parents=True, exist_ok=True)
tmp = path.with_suffix(path.suffix + ".tmp")
try:
# Open with 0600 from the start so a concurrent reader can never
# see a 0644-default fd before the chmod. mode= is honored by
# os.open underneath; pathlib.write_text does not expose it.
fd = os.open(str(tmp), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
with os.fdopen(fd, "w") as f:
f.write(secret)
os.replace(str(tmp), str(path))
# Race-safe in-process cache update: clear first, then let next
# caller re-read disk. Avoids the "stored new, cache still has
# old" window if get_inbound_secret races with this write.
_cached_secret = None
except OSError as exc:
logger.warning("platform_inbound_auth: save %s failed: %s", path, exc)
# Best-effort cleanup of the tmp file.
try:
os.unlink(str(tmp))
except OSError as cleanup_exc:
logger.debug("platform_inbound_auth: unlink tmp %s failed: %s", tmp, cleanup_exc)
def inbound_authorized(expected_secret: str | None, auth_header: str) -> bool:
"""Return True iff a /internal/* request should be served.

View File

@ -118,3 +118,57 @@ def test_end_to_end_file_to_authorized(configs_dir: Path):
secret = get_inbound_secret()
assert inbound_authorized(secret, "Bearer e2e-secret") is True
assert inbound_authorized(secret, "Bearer not-this") is False
# ───────────── save_inbound_secret (RFC #2312 PR-F) ─────────────
from platform_inbound_auth import save_inbound_secret
def test_save_inbound_secret_writes_file(configs_dir: Path):
save_inbound_secret("fresh-secret-from-register")
assert (configs_dir / ".platform_inbound_secret").read_text() == "fresh-secret-from-register"
def test_save_inbound_secret_writes_0600_mode(configs_dir: Path):
"""File mode MUST be 0600. Anything else lets co-resident processes
read the bearer the platform uses to call /internal/* endpoints."""
save_inbound_secret("mode-test")
mode = (configs_dir / ".platform_inbound_secret").stat().st_mode & 0o777
assert mode == 0o600, f"expected 0600, got {oct(mode)}"
def test_save_inbound_secret_overwrites_existing(configs_dir: Path):
"""Idempotent — saving over an existing file replaces the content
cleanly (atomic via tmp + rename)."""
(configs_dir / ".platform_inbound_secret").write_text("old-value")
save_inbound_secret("new-value")
assert (configs_dir / ".platform_inbound_secret").read_text() == "new-value"
def test_save_inbound_secret_invalidates_cache(configs_dir: Path):
"""After saving, the next get_inbound_secret() must return the NEW
value, not the cached old one. Otherwise rotation would be silently
broken once we ever rotate."""
(configs_dir / ".platform_inbound_secret").write_text("v1")
assert get_inbound_secret() == "v1" # primes cache
save_inbound_secret("v2")
assert get_inbound_secret() == "v2" # cache invalidated, re-reads
def test_save_inbound_secret_empty_is_noop(configs_dir: Path):
"""An empty secret string is treated as 'platform didn't return one'
and ignored the existing file (if any) stays untouched."""
(configs_dir / ".platform_inbound_secret").write_text("existing")
save_inbound_secret("")
assert (configs_dir / ".platform_inbound_secret").read_text() == "existing"
def test_save_inbound_secret_creates_parent_dir(tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
"""If CONFIGS_DIR doesn't exist yet (very first boot), save_inbound_secret
creates it rather than KeyError-ing."""
nonexistent = tmp_path / "fresh" / "configs"
monkeypatch.setenv("CONFIGS_DIR", str(nonexistent))
platform_inbound_auth.reset_cache()
save_inbound_secret("bootstrap-value")
assert (nonexistent / ".platform_inbound_secret").read_text() == "bootstrap-value"