feat(saas): deliver platform_inbound_secret via /registry/register (RFC #2312, PR-F)
Closes the SaaS-side gap that PR-A acknowledged but didn't fix: SaaS workspaces have no persistent /configs volume, so the platform_inbound_secret that PR-A's provisioner wrote at workspace creation never reaches the runtime. Without this, even after the entire RFC #2312 stack lands, SaaS chat upload would 401 (workspace fails-closed when /configs/.platform_inbound_secret is missing). Solution: return the secret in the /registry/register response body on every register call. The runtime extracts it and persists to /configs/.platform_inbound_secret at mode 0600. Idempotent — Docker- mode workspaces also receive it and overwrite the value the provisioner already wrote (same value until rotation). Why on every register, not just first-register: * SaaS containers can be restarted (deploys, drains, EBS detach/ re-attach) — /configs is rebuilt empty on each fresh start. * The auth_token is "issue once" because re-issuing rotates and invalidates the previous one. The inbound secret has no rotation flow yet (#2318) so re-sending the same value is harmless. * Eliminates the bootstrap window where a restarted SaaS workspace has no inbound secret on disk and would 401 every platform call. Changes: * workspace-server/internal/handlers/registry.go — Register handler reads workspaces.platform_inbound_secret via wsauth.ReadPlatformInboundSecret and includes it in the response body. Legacy workspaces (NULL column) get a successful registration with the field omitted. * workspace-server/internal/handlers/registry_test.go — two new tests: - TestRegister_ReturnsPlatformInboundSecret_RFC2312_PRF: secret present in DB → secret in response, alongside auth_token. - TestRegister_NoInboundSecret_OmitsField: NULL column → field omitted, registration still 200. * workspace/platform_inbound_auth.py — adds save_inbound_secret(secret). Atomic write via tmp + os.replace, mode 0600 from os.open(O_CREAT, 0o600) so a concurrent reader never sees 0644-default. Resets the in-process cache after write so the next get_inbound_secret() returns the freshly-written value (rotation-safe when it lands). * workspace/main.py — register-response handler extracts platform_inbound_secret alongside auth_token and persists via save_inbound_secret. Mirrors the existing save_token pattern. * workspace/tests/test_platform_inbound_auth.py — 6 new tests for save_inbound_secret: writes file, mode 0600, overwrite-existing, cache invalidation after save, empty-input no-op, parent-dir creation for fresh installs. Test results: * go test ./internal/handlers/ ./internal/wsauth/ — all green * pytest workspace/tests/ — 1272 passed (was 1266 before this PR) Refs #2312 (parent RFC), #2308 (chat upload 503 incident). Stacks: PR-A #2313 → PR-B #2314 → PR-C #2315 → this PR. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
c02cb0e1b6
commit
055e447355
@ -341,6 +341,27 @@ func (h *RegistryHandler) Register(c *gin.Context) {
|
||||
log.Printf("Registry: token existence check failed for %s: %v", payload.ID, hasLiveErr)
|
||||
}
|
||||
|
||||
// RFC #2312 PR-F: return the workspace's platform_inbound_secret so SaaS
|
||||
// workspaces (which have no persistent /configs volume across container
|
||||
// restarts) can re-populate /configs/.platform_inbound_secret on every
|
||||
// register call. Docker-mode workspaces also receive it — the workspace-
|
||||
// side write is idempotent (same value every call until a future
|
||||
// rotation flow lands), so the duplication is harmless.
|
||||
//
|
||||
// NOT gated by hasLive: the inbound secret is minted at workspace
|
||||
// creation in workspace_provision.go (PR-A), independent of the
|
||||
// outbound auth_token's "issue once" lifecycle. Returning it here is
|
||||
// the only delivery path for SaaS, where the platform's CP provisioner
|
||||
// has no volume to write into.
|
||||
if secret, secretErr := wsauth.ReadPlatformInboundSecret(ctx, db.DB, payload.ID); secretErr == nil {
|
||||
response["platform_inbound_secret"] = secret
|
||||
} else if !errors.Is(secretErr, wsauth.ErrNoInboundSecret) {
|
||||
// ErrNoInboundSecret is the expected case for legacy workspaces
|
||||
// that predate migration 044 — quiet. Other errors (DB hiccup, etc.)
|
||||
// log loud so ops notices.
|
||||
log.Printf("Registry: read platform_inbound_secret for %s failed: %v", payload.ID, secretErr)
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, response)
|
||||
}
|
||||
|
||||
|
||||
@ -889,6 +889,129 @@ func TestRegister_C18_BootstrapAllowedNoTokens(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestRegister_ReturnsPlatformInboundSecret_RFC2312_PRF verifies that
|
||||
// /registry/register includes the workspace's platform_inbound_secret
|
||||
// in the response body when one is on file. This is the SaaS delivery
|
||||
// path: SaaS workspaces have no persistent /configs volume, so they
|
||||
// re-fetch the secret on every register call (idempotent in Docker mode
|
||||
// where the provisioner already wrote the same value to the volume at
|
||||
// workspace creation).
|
||||
func TestRegister_ReturnsPlatformInboundSecret_RFC2312_PRF(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewRegistryHandler(broadcaster)
|
||||
|
||||
const wsID = "00000000-0000-0000-0000-000000002312"
|
||||
const inboundSecret = "the-platform-inbound-secret-value"
|
||||
|
||||
// requireWorkspaceToken — bootstrap allowed (no live tokens).
|
||||
mock.ExpectQuery("SELECT COUNT\\(\\*\\) FROM workspace_auth_tokens").
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
|
||||
|
||||
// Workspace upsert.
|
||||
mock.ExpectExec("INSERT INTO workspaces").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectQuery("SELECT url FROM workspaces WHERE id").
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"url"}).AddRow("http://localhost:9100"))
|
||||
mock.ExpectExec("INSERT INTO structure_events").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
// Phase 30.1 token issuance — first-register path.
|
||||
mock.ExpectQuery("SELECT COUNT\\(\\*\\) FROM workspace_auth_tokens").
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
|
||||
mock.ExpectExec("INSERT INTO workspace_auth_tokens").
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
|
||||
// RFC #2312 PR-F: ReadPlatformInboundSecret query — returns the value
|
||||
// the provisioner stored at workspace creation. The handler MUST
|
||||
// include this in the response body so the workspace can persist it
|
||||
// to /configs/.platform_inbound_secret.
|
||||
mock.ExpectQuery(`SELECT platform_inbound_secret FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"platform_inbound_secret"}).AddRow(inboundSecret))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("POST", "/registry/register",
|
||||
bytes.NewBufferString(`{"id":"`+wsID+`","url":"http://localhost:9100","agent_card":{"name":"x"}}`))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
handler.Register(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
var resp map[string]interface{}
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("parse response: %v", err)
|
||||
}
|
||||
got, ok := resp["platform_inbound_secret"].(string)
|
||||
if !ok {
|
||||
t.Fatalf("expected platform_inbound_secret in response, got: %v", resp)
|
||||
}
|
||||
if got != inboundSecret {
|
||||
t.Errorf("secret mismatch: got %q, want %q", got, inboundSecret)
|
||||
}
|
||||
// auth_token should also be present (first-register path).
|
||||
if resp["auth_token"] == nil {
|
||||
t.Error("expected auth_token in response (first-register path)")
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRegister_NoInboundSecret_OmitsField verifies that legacy workspaces
|
||||
// that predate migration 044 (NULL platform_inbound_secret column) still
|
||||
// get a successful registration — the field is just omitted from the
|
||||
// response. The Register handler logs the absence quietly.
|
||||
func TestRegister_NoInboundSecret_OmitsField(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewRegistryHandler(broadcaster)
|
||||
|
||||
const wsID = "00000000-0000-0000-0000-000000002312"
|
||||
|
||||
mock.ExpectQuery("SELECT COUNT\\(\\*\\) FROM workspace_auth_tokens").
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
|
||||
mock.ExpectExec("INSERT INTO workspaces").WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectQuery("SELECT url FROM workspaces WHERE id").
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"url"}).AddRow("http://localhost:9100"))
|
||||
mock.ExpectExec("INSERT INTO structure_events").WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectQuery("SELECT COUNT\\(\\*\\) FROM workspace_auth_tokens").
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
|
||||
mock.ExpectExec("INSERT INTO workspace_auth_tokens").WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
// NULL secret — legacy workspace.
|
||||
mock.ExpectQuery(`SELECT platform_inbound_secret FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"platform_inbound_secret"}).AddRow(nil))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("POST", "/registry/register",
|
||||
bytes.NewBufferString(`{"id":"`+wsID+`","url":"http://localhost:9100","agent_card":{"name":"x"}}`))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
handler.Register(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200 even without inbound secret, got %d", w.Code)
|
||||
}
|
||||
var resp map[string]interface{}
|
||||
_ = json.Unmarshal(w.Body.Bytes(), &resp)
|
||||
if _, present := resp["platform_inbound_secret"]; present {
|
||||
t.Errorf("expected platform_inbound_secret to be ABSENT for legacy workspace, got: %v", resp["platform_inbound_secret"])
|
||||
}
|
||||
}
|
||||
|
||||
// TestRegister_C18_HijackBlockedNoBearer verifies the C18 attack is blocked:
|
||||
// when a workspace already has a live token, /register without a bearer → 401.
|
||||
func TestRegister_C18_HijackBlockedNoBearer(t *testing.T) {
|
||||
|
||||
@ -310,6 +310,17 @@ async def main(): # pragma: no cover
|
||||
from platform_auth import save_token
|
||||
save_token(tok)
|
||||
print(f"Saved workspace auth token (prefix={tok[:8]}…)")
|
||||
# RFC #2312 PR-F: persist platform_inbound_secret if the
|
||||
# platform supplied one. Idempotent — writing the same
|
||||
# value over an existing file is harmless. Required for
|
||||
# SaaS where there's no persistent /configs volume; on
|
||||
# Docker mode it overwrites the value the provisioner
|
||||
# already wrote at workspace creation.
|
||||
inbound = body.get("platform_inbound_secret")
|
||||
if inbound:
|
||||
from platform_inbound_auth import save_inbound_secret
|
||||
save_inbound_secret(inbound)
|
||||
print(f"Saved platform_inbound_secret (prefix={inbound[:8]}…)")
|
||||
except Exception as parse_exc:
|
||||
print(f"Warning: couldn't parse register response for token: {parse_exc}")
|
||||
except Exception as e:
|
||||
|
||||
@ -72,6 +72,47 @@ def reset_cache() -> None:
|
||||
_cached_secret = None
|
||||
|
||||
|
||||
def save_inbound_secret(secret: str) -> None:
|
||||
"""Persist a freshly-received platform_inbound_secret to disk.
|
||||
|
||||
Called from the /registry/register response handler when the platform
|
||||
returns a `platform_inbound_secret` field. Mirrors platform_auth.save_token's
|
||||
pattern: 0600 file in CONFIGS_DIR, atomic write via tmp + rename so a
|
||||
concurrent reader never sees a partial file.
|
||||
|
||||
Idempotent: writing the same value over an existing file is a no-op
|
||||
from the workspace's perspective. Resets the in-process cache so the
|
||||
next get_inbound_secret() returns the freshly-written value (matters
|
||||
when a future rotation flow lands and the platform sends a different
|
||||
secret on a subsequent register call).
|
||||
"""
|
||||
global _cached_secret
|
||||
if not secret:
|
||||
return
|
||||
path = _secret_file()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp = path.with_suffix(path.suffix + ".tmp")
|
||||
try:
|
||||
# Open with 0600 from the start so a concurrent reader can never
|
||||
# see a 0644-default fd before the chmod. mode= is honored by
|
||||
# os.open underneath; pathlib.write_text does not expose it.
|
||||
fd = os.open(str(tmp), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
|
||||
with os.fdopen(fd, "w") as f:
|
||||
f.write(secret)
|
||||
os.replace(str(tmp), str(path))
|
||||
# Race-safe in-process cache update: clear first, then let next
|
||||
# caller re-read disk. Avoids the "stored new, cache still has
|
||||
# old" window if get_inbound_secret races with this write.
|
||||
_cached_secret = None
|
||||
except OSError as exc:
|
||||
logger.warning("platform_inbound_auth: save %s failed: %s", path, exc)
|
||||
# Best-effort cleanup of the tmp file.
|
||||
try:
|
||||
os.unlink(str(tmp))
|
||||
except OSError as cleanup_exc:
|
||||
logger.debug("platform_inbound_auth: unlink tmp %s failed: %s", tmp, cleanup_exc)
|
||||
|
||||
|
||||
def inbound_authorized(expected_secret: str | None, auth_header: str) -> bool:
|
||||
"""Return True iff a /internal/* request should be served.
|
||||
|
||||
|
||||
@ -118,3 +118,57 @@ def test_end_to_end_file_to_authorized(configs_dir: Path):
|
||||
secret = get_inbound_secret()
|
||||
assert inbound_authorized(secret, "Bearer e2e-secret") is True
|
||||
assert inbound_authorized(secret, "Bearer not-this") is False
|
||||
|
||||
|
||||
# ───────────── save_inbound_secret (RFC #2312 PR-F) ─────────────
|
||||
|
||||
from platform_inbound_auth import save_inbound_secret
|
||||
|
||||
|
||||
def test_save_inbound_secret_writes_file(configs_dir: Path):
|
||||
save_inbound_secret("fresh-secret-from-register")
|
||||
assert (configs_dir / ".platform_inbound_secret").read_text() == "fresh-secret-from-register"
|
||||
|
||||
|
||||
def test_save_inbound_secret_writes_0600_mode(configs_dir: Path):
|
||||
"""File mode MUST be 0600. Anything else lets co-resident processes
|
||||
read the bearer the platform uses to call /internal/* endpoints."""
|
||||
save_inbound_secret("mode-test")
|
||||
mode = (configs_dir / ".platform_inbound_secret").stat().st_mode & 0o777
|
||||
assert mode == 0o600, f"expected 0600, got {oct(mode)}"
|
||||
|
||||
|
||||
def test_save_inbound_secret_overwrites_existing(configs_dir: Path):
|
||||
"""Idempotent — saving over an existing file replaces the content
|
||||
cleanly (atomic via tmp + rename)."""
|
||||
(configs_dir / ".platform_inbound_secret").write_text("old-value")
|
||||
save_inbound_secret("new-value")
|
||||
assert (configs_dir / ".platform_inbound_secret").read_text() == "new-value"
|
||||
|
||||
|
||||
def test_save_inbound_secret_invalidates_cache(configs_dir: Path):
|
||||
"""After saving, the next get_inbound_secret() must return the NEW
|
||||
value, not the cached old one. Otherwise rotation would be silently
|
||||
broken once we ever rotate."""
|
||||
(configs_dir / ".platform_inbound_secret").write_text("v1")
|
||||
assert get_inbound_secret() == "v1" # primes cache
|
||||
save_inbound_secret("v2")
|
||||
assert get_inbound_secret() == "v2" # cache invalidated, re-reads
|
||||
|
||||
|
||||
def test_save_inbound_secret_empty_is_noop(configs_dir: Path):
|
||||
"""An empty secret string is treated as 'platform didn't return one'
|
||||
and ignored — the existing file (if any) stays untouched."""
|
||||
(configs_dir / ".platform_inbound_secret").write_text("existing")
|
||||
save_inbound_secret("")
|
||||
assert (configs_dir / ".platform_inbound_secret").read_text() == "existing"
|
||||
|
||||
|
||||
def test_save_inbound_secret_creates_parent_dir(tmp_path: Path, monkeypatch: pytest.MonkeyPatch):
|
||||
"""If CONFIGS_DIR doesn't exist yet (very first boot), save_inbound_secret
|
||||
creates it rather than KeyError-ing."""
|
||||
nonexistent = tmp_path / "fresh" / "configs"
|
||||
monkeypatch.setenv("CONFIGS_DIR", str(nonexistent))
|
||||
platform_inbound_auth.reset_cache()
|
||||
save_inbound_secret("bootstrap-value")
|
||||
assert (nonexistent / ".platform_inbound_secret").read_text() == "bootstrap-value"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user