diff --git a/workspace-server/internal/handlers/workspace_provision.go b/workspace-server/internal/handlers/workspace_provision.go index b78b9405..53e7d31d 100644 --- a/workspace-server/internal/handlers/workspace_provision.go +++ b/workspace-server/internal/handlers/workspace_provision.go @@ -232,6 +232,7 @@ func (h *WorkspaceHandler) provisionWorkspaceOpts(workspaceID, templatePath stri // WriteFilesToContainer, which runs immediately after ContainerStart and // wins the race against the Python adapter's startup time (~1-2 s). h.issueAndInjectToken(ctx, workspaceID, &cfg) + h.issueAndInjectInboundSecret(ctx, workspaceID, &cfg) url, err := h.provisioner.Start(ctx, cfg) if err != nil { @@ -437,6 +438,45 @@ func (h *WorkspaceHandler) issueAndInjectToken(ctx context.Context, workspaceID log.Printf("Provisioner: injected fresh auth token for workspace %s into config volume", workspaceID) } +// issueAndInjectInboundSecret mints the platform→workspace shared secret +// (RFC #2312, migration 044) and persists the plaintext into the +// workspaces.platform_inbound_secret column so platform-side handlers can +// read it back on every forward call. +// +// Docker mode also writes the plaintext into cfg.ConfigFiles +// [".platform_inbound_secret"] so WriteFilesToContainer drops it on the +// /configs volume alongside .auth_token. +// +// SaaS mode persists to the DB but does NOT write a local file from +// here — there is no workspace-server-managed volume in SaaS. The +// workspace receives the secret out-of-band via the /registry/register +// response (mirrors the existing .auth_token bootstrap path). +// +// Best-effort: failure logs and continues. The workspace-side +// /internal/* handlers fail-closed when the file is missing, so a +// failed mint surfaces as 401 on the platform's first forward call — +// loud, debuggable, no silent fail-open. +func (h *WorkspaceHandler) issueAndInjectInboundSecret(ctx context.Context, workspaceID string, cfg *provisioner.WorkspaceConfig) { + secret, err := wsauth.IssuePlatformInboundSecret(ctx, db.DB, workspaceID) + if err != nil { + log.Printf("Provisioner: failed to issue platform_inbound_secret for %s: %v — chat upload + other /internal endpoints will 401", workspaceID, err) + return + } + + if saasMode() { + // Plaintext lives in the DB column; the workspace will fetch it + // via /registry/register response (handled in a follow-up PR). + log.Printf("Provisioner: minted platform_inbound_secret for %s (SaaS mode — workspace will receive via register response)", workspaceID) + return + } + + if cfg.ConfigFiles == nil { + cfg.ConfigFiles = make(map[string][]byte) + } + cfg.ConfigFiles[".platform_inbound_secret"] = []byte(secret) + log.Printf("Provisioner: injected platform_inbound_secret for workspace %s into config volume", workspaceID) +} + // findTemplateByName looks for a workspace-configs-templates directory matching a name. func findTemplateByName(configsDir, name string) string { entries, err := os.ReadDir(configsDir) diff --git a/workspace-server/internal/wsauth/platform_inbound.go b/workspace-server/internal/wsauth/platform_inbound.go new file mode 100644 index 00000000..5f48c073 --- /dev/null +++ b/workspace-server/internal/wsauth/platform_inbound.go @@ -0,0 +1,97 @@ +// Package wsauth — platform→workspace inbound secret (per-workspace bearer +// the platform presents when calling INTO a workspace's HTTP server). +// +// Asymmetric to IssueToken in this same package, which mints the +// **outbound** bearer (workspace → platform). See the per-function +// comments and migration 044 for the full rationale on why the two +// roles use distinct secrets stored in different shapes. +// +// IssueToken IssuePlatformInboundSecret +// ────────── ─────────────────────────── +// workspace_auth_tokens row workspaces.platform_inbound_secret column +// plaintext returned once, plaintext stored AND returned (the platform +// hash stored must read it back on every forward call) +// bcrypt-shape compare string-equality compare on workspace side +package wsauth + +import ( + "context" + "crypto/rand" + "database/sql" + "encoding/base64" + "errors" + "fmt" +) + +// platformInboundSecretBytes is the raw-random length before base64url +// encoding. Same 256-bit entropy floor as workspace_auth_tokens; the +// shape lets a future rotation script substitute one for the other +// without changing the bearer-presenter on either side. +const platformInboundSecretBytes = 32 + +// ErrNoInboundSecret is returned by ReadPlatformInboundSecret when the +// row exists but the column is NULL or empty. Callers MUST treat this +// as a structural failure (the row was created by a path that didn't +// mint a secret, or the migration ran but the row predates it) and +// surface the nil bearer to the user as a 500-class error rather than +// silently sending an unauthenticated request to the workspace. +var ErrNoInboundSecret = errors.New("wsauth: workspace has no platform_inbound_secret on file") + +// IssuePlatformInboundSecret generates a fresh per-workspace shared +// secret, persists the plaintext into workspaces.platform_inbound_secret, +// and returns the plaintext so the provisioner can write it into +// /configs/.platform_inbound_secret on the workspace's volume. +// +// The plaintext is INTENTIONALLY retained on the platform side: every +// platform→workspace forward call reads it back to put in the +// Authorization header. Hashing would force a re-mint on every call +// (defeating the purpose of the shared secret) or a separate plaintext +// store (defeating the simplicity). Encryption-at-rest is delegated to +// the underlying Postgres volume — application-layer encryption via +// SECRETS_ENCRYPTION_KEY is a defense-in-depth follow-up. +func IssuePlatformInboundSecret(ctx context.Context, db *sql.DB, workspaceID string) (string, error) { + if workspaceID == "" { + return "", fmt.Errorf("wsauth: workspaceID required") + } + buf := make([]byte, platformInboundSecretBytes) + if _, err := rand.Read(buf); err != nil { + return "", fmt.Errorf("wsauth: generate platform_inbound_secret: %w", err) + } + plaintext := base64.RawURLEncoding.EncodeToString(buf) + + _, err := db.ExecContext(ctx, ` + UPDATE workspaces SET platform_inbound_secret = $1 WHERE id = $2 + `, plaintext, workspaceID) + if err != nil { + return "", fmt.Errorf("wsauth: persist platform_inbound_secret: %w", err) + } + return plaintext, nil +} + +// ReadPlatformInboundSecret returns the plaintext secret for a workspace +// or ErrNoInboundSecret if the column is NULL/empty. Used by platform- +// side handlers that forward HTTPS calls into the workspace. +// +// Callers MUST handle ErrNoInboundSecret explicitly; never default to +// an empty bearer. An empty Authorization header would let any caller +// through if the workspace's auth is fail-open (it is not today, but +// defense-in-depth keeps it that way). +func ReadPlatformInboundSecret(ctx context.Context, db *sql.DB, workspaceID string) (string, error) { + if workspaceID == "" { + return "", fmt.Errorf("wsauth: workspaceID required") + } + var secret sql.NullString + err := db.QueryRowContext(ctx, + `SELECT platform_inbound_secret FROM workspaces WHERE id = $1`, workspaceID, + ).Scan(&secret) + if err == sql.ErrNoRows { + return "", ErrNoInboundSecret + } + if err != nil { + return "", fmt.Errorf("wsauth: read platform_inbound_secret: %w", err) + } + if !secret.Valid || secret.String == "" { + return "", ErrNoInboundSecret + } + return secret.String, nil +} diff --git a/workspace-server/internal/wsauth/platform_inbound_test.go b/workspace-server/internal/wsauth/platform_inbound_test.go new file mode 100644 index 00000000..2c1cda63 --- /dev/null +++ b/workspace-server/internal/wsauth/platform_inbound_test.go @@ -0,0 +1,129 @@ +package wsauth + +import ( + "context" + "errors" + "regexp" + "testing" + + "github.com/DATA-DOG/go-sqlmock" +) + +// IssuePlatformInboundSecret persists the plaintext (not a hash) so the +// platform can read it back on every forward call. This is the primary +// shape difference vs. IssueToken; pin it explicitly. +func TestIssuePlatformInboundSecret_PersistsPlaintext(t *testing.T) { + db, mock := setupMock(t) + + // Capture the plaintext written by the UPDATE so we can verify the + // returned value matches what was stored. AnyArg captures, then we + // pull the captured value via a separate ExpectExec... wait, sqlmock + // doesn't return the captured args. Use a regex-style match on the + // SQL and trust that the function persisted SOMETHING — then assert + // the returned plaintext shape (length + alphabet) matches the + // generator. The end-to-end "platform reads the same value back" is + // covered by ReadPlatformInboundSecret tests. + mock.ExpectExec(`UPDATE workspaces SET platform_inbound_secret = \$1 WHERE id = \$2`). + WithArgs(sqlmock.AnyArg(), "ws-abc"). + WillReturnResult(sqlmock.NewResult(1, 1)) + + plaintext, err := IssuePlatformInboundSecret(context.Background(), db, "ws-abc") + if err != nil { + t.Fatalf("IssuePlatformInboundSecret: %v", err) + } + if len(plaintext) < 40 { + t.Errorf("plaintext too short for 256-bit entropy: len=%d", len(plaintext)) + } + if !regexp.MustCompile(`^[A-Za-z0-9_-]+$`).MatchString(plaintext) { + t.Errorf("plaintext contains non-urlsafe chars: %q", plaintext) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet expectations: %v", err) + } +} + +// Re-issue rotates: the same workspace gets a different secret on the +// second call. Without rotation, a leaked secret would be permanent. +func TestIssuePlatformInboundSecret_RotatesOnReissue(t *testing.T) { + db, mock := setupMock(t) + mock.ExpectExec(`UPDATE workspaces SET platform_inbound_secret`).WillReturnResult(sqlmock.NewResult(1, 1)) + mock.ExpectExec(`UPDATE workspaces SET platform_inbound_secret`).WillReturnResult(sqlmock.NewResult(1, 1)) + + a, _ := IssuePlatformInboundSecret(context.Background(), db, "ws-1") + b, _ := IssuePlatformInboundSecret(context.Background(), db, "ws-1") + if a == b { + t.Errorf("expected fresh secret on re-issue, got %q twice", a) + } +} + +func TestIssuePlatformInboundSecret_RejectsEmptyWorkspaceID(t *testing.T) { + db, _ := setupMock(t) + _, err := IssuePlatformInboundSecret(context.Background(), db, "") + if err == nil { + t.Error("expected error for empty workspaceID, got nil") + } +} + +func TestReadPlatformInboundSecret_HappyPath(t *testing.T) { + db, mock := setupMock(t) + mock.ExpectQuery(`SELECT platform_inbound_secret FROM workspaces WHERE id = \$1`). + WithArgs("ws-abc"). + WillReturnRows(sqlmock.NewRows([]string{"platform_inbound_secret"}).AddRow("the-plaintext")) + + got, err := ReadPlatformInboundSecret(context.Background(), db, "ws-abc") + if err != nil { + t.Fatalf("ReadPlatformInboundSecret: %v", err) + } + if got != "the-plaintext" { + t.Errorf("got %q, want %q", got, "the-plaintext") + } +} + +// NULL column → ErrNoInboundSecret, not empty string. This is load-bearing: +// callers that ignored err and used the empty string would send an +// unauthenticated request to the workspace. +func TestReadPlatformInboundSecret_NullReturnsErrNoInbound(t *testing.T) { + db, mock := setupMock(t) + mock.ExpectQuery(`SELECT platform_inbound_secret FROM workspaces WHERE id = \$1`). + WithArgs("ws-abc"). + WillReturnRows(sqlmock.NewRows([]string{"platform_inbound_secret"}).AddRow(nil)) + + _, err := ReadPlatformInboundSecret(context.Background(), db, "ws-abc") + if !errors.Is(err, ErrNoInboundSecret) { + t.Errorf("expected ErrNoInboundSecret on NULL, got %v", err) + } +} + +// Empty string is treated identically to NULL. +func TestReadPlatformInboundSecret_EmptyReturnsErrNoInbound(t *testing.T) { + db, mock := setupMock(t) + mock.ExpectQuery(`SELECT platform_inbound_secret FROM workspaces WHERE id = \$1`). + WithArgs("ws-abc"). + WillReturnRows(sqlmock.NewRows([]string{"platform_inbound_secret"}).AddRow("")) + + _, err := ReadPlatformInboundSecret(context.Background(), db, "ws-abc") + if !errors.Is(err, ErrNoInboundSecret) { + t.Errorf("expected ErrNoInboundSecret on empty, got %v", err) + } +} + +// Missing workspace row → ErrNoInboundSecret (collapsed from sql.ErrNoRows). +func TestReadPlatformInboundSecret_MissingWorkspace(t *testing.T) { + db, mock := setupMock(t) + mock.ExpectQuery(`SELECT platform_inbound_secret FROM workspaces WHERE id = \$1`). + WithArgs("ws-missing"). + WillReturnRows(sqlmock.NewRows([]string{"platform_inbound_secret"})) + + _, err := ReadPlatformInboundSecret(context.Background(), db, "ws-missing") + if !errors.Is(err, ErrNoInboundSecret) { + t.Errorf("expected ErrNoInboundSecret on missing row, got %v", err) + } +} + +func TestReadPlatformInboundSecret_RejectsEmptyWorkspaceID(t *testing.T) { + db, _ := setupMock(t) + _, err := ReadPlatformInboundSecret(context.Background(), db, "") + if err == nil { + t.Error("expected error for empty workspaceID, got nil") + } +} diff --git a/workspace-server/migrations/044_platform_inbound_secret.down.sql b/workspace-server/migrations/044_platform_inbound_secret.down.sql new file mode 100644 index 00000000..35e035c1 --- /dev/null +++ b/workspace-server/migrations/044_platform_inbound_secret.down.sql @@ -0,0 +1,9 @@ +-- Reverse 044_platform_inbound_secret.up.sql +-- +-- Drops the per-workspace platform→workspace shared secret column. +-- +-- Destructive: any current value is irrecoverable. Re-running the .up.sql +-- adds the column back as NULL; existing workspaces would then need a +-- reprovision (or a separate backfill) to mint a fresh secret. +ALTER TABLE workspaces + DROP COLUMN IF EXISTS platform_inbound_secret; diff --git a/workspace-server/migrations/044_platform_inbound_secret.up.sql b/workspace-server/migrations/044_platform_inbound_secret.up.sql new file mode 100644 index 00000000..a99b8920 --- /dev/null +++ b/workspace-server/migrations/044_platform_inbound_secret.up.sql @@ -0,0 +1,58 @@ +-- 044_platform_inbound_secret.up.sql +-- +-- Per-workspace shared secret that the **platform** presents when calling +-- INTO a workspace's HTTP server (e.g. POST /internal/chat/uploads/ingest). +-- +-- Asymmetric to workspace_auth_tokens, by design: +-- +-- workspace_auth_tokens workspaces.platform_inbound_secret +-- ───────────────────── ───────────────────────────────── +-- workspace → platform platform → workspace +-- plaintext NEVER stored plaintext STORED here +-- bcrypt-style hash compare string-equality compare +-- workspace presents bearer platform presents bearer +-- platform validates by hash workspace validates by file compare +-- +-- Why a separate column / role: +-- +-- * Principle of least privilege. Platform's authority over a workspace +-- is structurally different from the workspace's authority on the +-- platform; one bearer for both blurs that. +-- * Independent rotation. A platform-side compromise can be contained by +-- rotating only this column fleet-wide, without revoking every +-- workspace's outbound auth (which would take down heartbeat / A2A). +-- * Audit clarity. workspace-side logs see distinct bearers for +-- "platform call" vs "user call". +-- * Cheap to add now, expensive to retrofit. Splitting later forces a +-- fleet-wide rolling rotation while old + new clients coexist. +-- +-- Why plaintext (not hashed): +-- +-- The platform needs the plaintext at every forward call to put in the +-- Authorization header. Storing only a hash would force a re-mint on +-- every call (defeating the purpose) or a separate plaintext store +-- (defeating the simplicity). +-- +-- Encryption at rest: this column relies on Postgres-volume / disk +-- encryption (Railway-managed, AES-256). Application-layer encryption +-- via the existing SECRETS_ENCRYPTION_KEY pathway (workspace_secrets) is +-- a defense-in-depth follow-up — tracked separately so this migration +-- stays additive. +-- +-- Workspace-side delivery: the plaintext is written into +-- /configs/.platform_inbound_secret at provision time alongside the +-- existing /configs/.auth_token. Both files are 0600 and live on the +-- workspace's volume only. +-- +-- Nullable for forward-compat: existing rows have no value until the +-- workspace is reprovisioned. /internal/* handlers MUST refuse to serve +-- when the file is empty (fail-closed, not fail-open) so legacy rows +-- can't be hit pre-migration without a fresh secret. +-- +-- Reverse plan: the .down.sql drops the column. Any caller relying on +-- the secret would 404 after the column is gone — the column add is +-- additive in the proper direction; the down is destructive only because +-- the value would be irrecoverable. + +ALTER TABLE workspaces + ADD COLUMN IF NOT EXISTS platform_inbound_secret TEXT NULL;