fix(wsauth): token kinds — provisioning no longer revokes the Create 201 bearer (core#1644) #2682

Merged
claude-ceo-assistant merged 2 commits from fix/wsauth-token-kinds-1644 into main 2026-06-13 01:18:06 +00:00
14 changed files with 142 additions and 22 deletions
@@ -56,7 +56,7 @@ func (h *AdminWorkspaceTokenHandler) Create(c *gin.Context) {
return
}
token, err := wsauth.IssueToken(c.Request.Context(), db.DB, workspaceID)
token, err := wsauth.IssueAPIToken(c.Request.Context(), db.DB, workspaceID)
if err != nil {
log.Printf("admin workspace tokens: issue failed for %s: %v", workspaceID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to create token"})
@@ -21,7 +21,7 @@ func TestAdminWorkspaceTokenHandler_Create_HappyPath(t *testing.T) {
WithArgs(wsUUID1).
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
mock.ExpectExec(`INSERT INTO workspace_auth_tokens`).
WithArgs(wsUUID1, sqlmock.AnyArg(), sqlmock.AnyArg()).
WithArgs(wsUUID1, sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()).
WillReturnResult(sqlmock.NewResult(1, 1))
w := makeReq(t, NewAdminWorkspaceTokenHandler().Create, "POST",
@@ -46,7 +46,7 @@ func TestRotateExternalCredentials_HappyPath(t *testing.T) {
// 3. Mint a fresh token
mock.ExpectExec(`INSERT INTO workspace_auth_tokens`).
WithArgs("ws-ext", sqlmock.AnyArg(), sqlmock.AnyArg()).
WithArgs("ws-ext", sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()).
WillReturnResult(sqlmock.NewResult(0, 1))
w := httptest.NewRecorder()
@@ -461,7 +461,7 @@ func TestWorkspaceCreate_ReturnsAuthToken_201(t *testing.T) {
// token_hash, prefix). This is the assertion that the new code path
// reaches the DB.
mock.ExpectExec("INSERT INTO workspace_auth_tokens").
WithArgs(sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()).
WithArgs(sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()).
WillReturnResult(sqlmock.NewResult(0, 1))
w := httptest.NewRecorder()
@@ -622,7 +622,7 @@ func (h *RegistryHandler) Register(c *gin.Context) {
// live token; they bootstrap one here on their next register call.
// New workspaces always pass through this path on their first boot.
response := gin.H{"status": "registered", "delivery_mode": effectiveMode}
if hasLive, hasLiveErr := wsauth.HasAnyLiveToken(ctx, db.DB, payload.ID); hasLiveErr == nil && !hasLive {
if hasLive, hasLiveErr := wsauth.HasLiveInstanceToken(ctx, db.DB, payload.ID); hasLiveErr == nil && !hasLive {
token, tokErr := wsauth.IssueToken(ctx, db.DB, payload.ID)
if tokErr != nil {
// Don't fail the whole register on token-issuance error — the
@@ -1141,11 +1141,14 @@ func (h *RegistryHandler) UpdateCard(c *gin.Context) {
func (h *RegistryHandler) requireWorkspaceToken(
ctx gincontext, c *gin.Context, workspaceID string,
) error {
hasLive, err := wsauth.HasAnyLiveToken(ctx, db.DB, workspaceID)
// Bootstrap allowance keys on INSTANCE tokens only: a live API-kind
// token (the Create 201 bearer a platform caller holds) must not force
// the fresh, credential-less instance to present a bearer. core#1644.
hasLive, err := wsauth.HasLiveInstanceToken(ctx, db.DB, workspaceID)
if err != nil {
// DB error checking token existence — fail open so we don't take
// the whole heartbeat path down on a transient hiccup. Log loudly.
log.Printf("wsauth: HasAnyLiveToken(%s) failed: %v — allowing request", workspaceID, err)
log.Printf("wsauth: HasLiveInstanceToken(%s) failed: %v — allowing request", workspaceID, err)
return nil
}
if !hasLive {
@@ -174,7 +174,7 @@ func TestRegister_200_DoesNotLogFailure(t *testing.T) {
WithArgs("ws-ok").
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
mock.ExpectExec("INSERT INTO workspace_auth_tokens").
WithArgs("ws-ok", sqlmock.AnyArg(), sqlmock.AnyArg()).
WithArgs("ws-ok", sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()).
WillReturnResult(sqlmock.NewResult(1, 1))
w := httptest.NewRecorder()
@@ -1266,7 +1266,7 @@ func TestRegister_C18_BootstrapAllowedNoTokens(t *testing.T) {
// IssueToken INSERT.
mock.ExpectExec("INSERT INTO workspace_auth_tokens").
WithArgs("ws-new", sqlmock.AnyArg(), sqlmock.AnyArg()).
WithArgs("ws-new", sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()).
WillReturnResult(sqlmock.NewResult(1, 1))
w := httptest.NewRecorder()
+1 -1
View File
@@ -117,7 +117,7 @@ func (h *TokenHandler) Create(c *gin.Context) {
return
}
token, err := wsauth.IssueToken(c.Request.Context(), db.DB, workspaceID)
token, err := wsauth.IssueAPIToken(c.Request.Context(), db.DB, workspaceID)
if err != nil {
log.Printf("tokens: issue failed for %s: %v", workspaceID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to create token"})
@@ -982,7 +982,7 @@ func (h *WorkspaceHandler) Create(c *gin.Context) {
"status": "provisioning",
"workspace_access": workspaceAccess,
}
if authToken, tokErr := wsauth.IssueToken(ctx, db.DB, id); tokErr != nil {
if authToken, tokErr := wsauth.IssueAPIToken(ctx, db.DB, id); tokErr != nil {
log.Printf("Create workspace %s: inline auth_token mint failed (non-fatal — caller can use POST /admin/workspaces/:id/tokens): %v", id, tokErr)
} else {
resp["auth_token"] = authToken
@@ -390,7 +390,10 @@ func (h *WorkspaceHandler) buildProvisionerConfig(
// provisioning continues — the workspace will get 401 on its first heartbeat
// and can recover on the next restart.
func (h *WorkspaceHandler) issueAndInjectToken(ctx context.Context, workspaceID string, cfg *provisioner.WorkspaceConfig) {
// Revoke any existing live tokens FIRST — this must run in both modes.
// Revoke existing live INSTANCE tokens FIRST — this must run in both
// modes. API-kind tokens (the Create 201 inline bearer, admin/Token
// mints) are deliberately NOT revoked: provisioning invalidating the
// caller bearer it just returned was the core#1644 contract break.
// In SaaS mode the revoke is load-bearing on re-provision: without it,
// the previous workspace instance's live token sits in the DB, and
// RegistryHandler.requireWorkspaceToken on the fresh instance's first
@@ -399,7 +402,7 @@ func (h *WorkspaceHandler) issueAndInjectToken(ctx context.Context, workspaceID
// the CP provisioner doesn't carry cfg.ConfigFiles across user-data).
// Revoking clears the gate so the register handler's bootstrap path
// can mint a fresh token and return the plaintext in the response.
if err := wsauth.RevokeAllForWorkspace(ctx, db.DB, workspaceID); err != nil {
if err := wsauth.RevokeInstanceTokensForWorkspace(ctx, db.DB, workspaceID); err != nil {
log.Printf("Provisioner: failed to revoke existing tokens for %s: %v — skipping auth-token injection", workspaceID, err)
return
}
@@ -1030,7 +1030,7 @@ func TestIssueAndInjectToken_HappyPath(t *testing.T) {
// IssueToken INSERT
mock.ExpectExec(`INSERT INTO workspace_auth_tokens`).
WithArgs("ws-418-happy", sqlmock.AnyArg(), sqlmock.AnyArg()).
WithArgs("ws-418-happy", sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()).
WillReturnResult(sqlmock.NewResult(1, 1))
cfg := provisioner.WorkspaceConfig{}
@@ -1069,7 +1069,7 @@ func TestIssueAndInjectToken_RotatesExistingToken(t *testing.T) {
// IssueToken INSERT for the new token
mock.ExpectExec(`INSERT INTO workspace_auth_tokens`).
WithArgs("ws-418-rotate", sqlmock.AnyArg(), sqlmock.AnyArg()).
WithArgs("ws-418-rotate", sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()).
WillReturnResult(sqlmock.NewResult(1, 1))
cfg := provisioner.WorkspaceConfig{
@@ -1135,7 +1135,7 @@ func TestIssueAndInjectToken_IssueFailSkipsInjection(t *testing.T) {
WillReturnResult(sqlmock.NewResult(0, 0))
mock.ExpectExec(`INSERT INTO workspace_auth_tokens`).
WithArgs("ws-418-issue-fail", sqlmock.AnyArg(), sqlmock.AnyArg()).
WithArgs("ws-418-issue-fail", sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()).
WillReturnError(fmt.Errorf("db: constraint violation"))
cfg := provisioner.WorkspaceConfig{}
@@ -1163,7 +1163,7 @@ func TestIssueAndInjectToken_NilConfigFilesAllocated(t *testing.T) {
WillReturnResult(sqlmock.NewResult(0, 0))
mock.ExpectExec(`INSERT INTO workspace_auth_tokens`).
WithArgs("ws-418-nil-cfg", sqlmock.AnyArg(), sqlmock.AnyArg()).
WithArgs("ws-418-nil-cfg", sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()).
WillReturnResult(sqlmock.NewResult(1, 1))
cfg := provisioner.WorkspaceConfig{} // ConfigFiles intentionally nil
@@ -1834,7 +1834,7 @@ func TestProvisionWorkspaceCP_InstanceIDPersistFail_MarksFailed(t *testing.T) {
WithArgs("ws-cp-orphan").
WillReturnResult(sqlmock.NewResult(0, 0))
mock.ExpectExec(`INSERT INTO workspace_auth_tokens`).
WithArgs("ws-cp-orphan", sqlmock.AnyArg(), sqlmock.AnyArg()).
WithArgs("ws-cp-orphan", sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()).
WillReturnResult(sqlmock.NewResult(1, 1))
mock.ExpectExec(`UPDATE workspaces SET platform_inbound_secret`).
WithArgs(sqlmock.AnyArg(), "ws-cp-orphan").
@@ -1920,7 +1920,7 @@ func TestProvisionWorkspaceCP_InstanceIDPersistFail_RetrySucceeds(t *testing.T)
WithArgs("ws-cp-retry-ok").
WillReturnResult(sqlmock.NewResult(0, 0))
mock.ExpectExec(`INSERT INTO workspace_auth_tokens`).
WithArgs("ws-cp-retry-ok", sqlmock.AnyArg(), sqlmock.AnyArg()).
WithArgs("ws-cp-retry-ok", sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()).
WillReturnResult(sqlmock.NewResult(1, 1))
mock.ExpectExec(`UPDATE workspaces SET platform_inbound_secret`).
WithArgs(sqlmock.AnyArg(), "ws-cp-retry-ok").
+43 -3
View File
@@ -46,7 +46,22 @@ var ErrInvalidToken = errors.New("invalid or revoked workspace token")
// Callers should treat the returned string as secret material and pass it
// straight to the agent (env var, bundle response body, etc.) without
// logging it.
// IssueToken mints an INSTANCE-kind token (held by the workspace runtime).
// Kept under its original name so the register-bootstrap, docker-inject and
// external pre-register call sites are unchanged.
func IssueToken(ctx context.Context, db *sql.DB, workspaceID string) (string, error) {
return issueTokenKind(ctx, db, workspaceID, "instance")
}
// IssueAPIToken mints an API-kind token (held by a platform caller: the
// POST /workspaces 201 inline bearer, TokenHandler.Create, the admin
// first-bearer endpoint). API tokens SURVIVE provisioning (which revokes
// only instance tokens) -- this is the core#1644 contract fix.
func IssueAPIToken(ctx context.Context, db *sql.DB, workspaceID string) (string, error) {
return issueTokenKind(ctx, db, workspaceID, "api")
}
func issueTokenKind(ctx context.Context, db *sql.DB, workspaceID, kind string) (string, error) {
buf := make([]byte, tokenPayloadBytes)
if _, err := rand.Read(buf); err != nil {
return "", fmt.Errorf("wsauth: generate token: %w", err)
@@ -57,9 +72,9 @@ func IssueToken(ctx context.Context, db *sql.DB, workspaceID string) (string, er
prefix := plaintext[:tokenPrefixLen]
_, err := db.ExecContext(ctx, `
INSERT INTO workspace_auth_tokens (workspace_id, token_hash, prefix)
VALUES ($1, $2, $3)
`, workspaceID, hash[:], prefix)
INSERT INTO workspace_auth_tokens (workspace_id, token_hash, prefix, kind)
VALUES ($1, $2, $3, $4)
`, workspaceID, hash[:], prefix, kind)
if err != nil {
return "", fmt.Errorf("wsauth: persist token: %w", err)
}
@@ -198,6 +213,31 @@ func WorkspaceExists(ctx context.Context, db *sql.DB, workspaceID string) (bool,
// the heartbeat handler — a legacy workspace that registered before
// tokens existed needs exactly one issued on its first post-upgrade
// heartbeat rather than being rejected outright.
// RevokeInstanceTokensForWorkspace revokes only INSTANCE-kind tokens (the
// runtime-held ones). Used by provisioning: the old instance's credential
// must die and the fresh instance must get the bootstrap allowance, while
// caller-held API tokens (the Create 201 contract) survive. core#1644.
func RevokeInstanceTokensForWorkspace(ctx context.Context, db *sql.DB, workspaceID string) error {
_, err := db.ExecContext(ctx, `
UPDATE workspace_auth_tokens
SET revoked_at = now()
WHERE workspace_id = $1 AND revoked_at IS NULL AND kind = 'instance'`,
workspaceID)
return err
}
// HasLiveInstanceToken reports whether a live INSTANCE-kind token exists.
// This is the register-bootstrap predicate: a live API token must NOT block
// the fresh instance's first register. core#1644.
func HasLiveInstanceToken(ctx context.Context, db *sql.DB, workspaceID string) (bool, error) {
var n int
err := db.QueryRowContext(ctx, `
SELECT COUNT(*) FROM workspace_auth_tokens
WHERE workspace_id = $1 AND revoked_at IS NULL AND kind = 'instance'`,
workspaceID).Scan(&n)
return n > 0, err
}
func HasAnyLiveToken(ctx context.Context, db *sql.DB, workspaceID string) (bool, error) {
var n int
err := db.QueryRowContext(ctx, `
@@ -40,6 +40,7 @@ func TestIssueToken_PersistsHashNotPlaintext(t *testing.T) {
"ws-abc",
sqlmock.AnyArg(), // hash (bytea)
sqlmock.AnyArg(), // prefix
"instance", // kind: IssueToken mints instance tokens
).
WillReturnResult(sqlmock.NewResult(1, 1))
@@ -409,3 +410,60 @@ func TestValidateAnyToken_EmptyTokenRejected(t *testing.T) {
t.Errorf("got %v, want ErrInvalidToken", err)
}
}
// --- core#1644 token-kind contract -----------------------------------------
func TestIssueAPIToken_PersistsAPIKind(t *testing.T) {
db, mock := setupMock(t)
mock.ExpectExec(`INSERT INTO workspace_auth_tokens`).
WithArgs("ws-abc", sqlmock.AnyArg(), sqlmock.AnyArg(), "api").
WillReturnResult(sqlmock.NewResult(1, 1))
if _, err := IssueAPIToken(context.Background(), db, "ws-abc"); err != nil {
t.Fatalf("IssueAPIToken: %v", err)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet expectations: %v", err)
}
}
func TestRevokeInstanceTokens_FiltersKind(t *testing.T) {
db, mock := setupMock(t)
// The revoke must carry the kind='instance' filter: provisioning kills
// the old runtime credential but MUST NOT clobber caller-held API
// tokens (the Create 201 contract — core#1644).
mock.ExpectExec(`UPDATE workspace_auth_tokens\s+SET revoked_at = now\(\)\s+WHERE workspace_id = \$1 AND revoked_at IS NULL AND kind = 'instance'`).
WithArgs("ws-abc").
WillReturnResult(sqlmock.NewResult(0, 2))
if err := RevokeInstanceTokensForWorkspace(context.Background(), db, "ws-abc"); err != nil {
t.Fatalf("RevokeInstanceTokensForWorkspace: %v", err)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet expectations: %v", err)
}
}
func TestHasLiveInstanceToken_FiltersKind(t *testing.T) {
db, mock := setupMock(t)
// The bootstrap allowance counts only instance tokens: a live API token
// must not block the fresh instance's first register (core#1644).
mock.ExpectQuery(`SELECT COUNT\(\*\) FROM workspace_auth_tokens\s+WHERE workspace_id = \$1 AND revoked_at IS NULL AND kind = 'instance'`).
WithArgs("ws-abc").
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
live, err := HasLiveInstanceToken(context.Background(), db, "ws-abc")
if err != nil {
t.Fatalf("HasLiveInstanceToken: %v", err)
}
if live {
t.Errorf("expected no live instance token")
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet expectations: %v", err)
}
}
@@ -0,0 +1 @@
ALTER TABLE workspace_auth_tokens DROP COLUMN IF EXISTS kind;
@@ -0,0 +1,15 @@
-- Token KINDS: 'instance' (held by the workspace runtime; minted by register
-- bootstrap / docker-mode inject / external pre-register) vs 'api' (held by
-- platform callers; minted by POST /workspaces 201, TokenHandler.Create, the
-- admin first-bearer endpoint).
--
-- Why (core#1644): provisioning revokes tokens so the fresh instance can
-- bootstrap-register, but that clobbered the api token the Create 201 had
-- just returned (PR#1669) -- the platform broke its own API contract.
-- With kinds, provision revokes ONLY instance tokens and the bootstrap
-- allowance keys on live INSTANCE tokens, so caller bearers survive.
--
-- Existing rows default to 'instance' (today's behavior, runtime-held).
-- Idempotent: the migration runner re-applies all files every boot.
ALTER TABLE workspace_auth_tokens
ADD COLUMN IF NOT EXISTS kind TEXT NOT NULL DEFAULT 'instance';