fix(auth): inject fresh bearer token into config volume on every provision (closes #418)

Container rebuild or volume wipe caused workspaces to lose /configs/.auth_token.
On re-registration the platform returned no auth_token (HasAnyLiveToken==true →
no re-issue), leaving the workspace unable to authenticate any subsequent API call.

Fix: provisionWorkspaceOpts now calls issueAndInjectToken before Start(). This
revokes any existing live tokens (plaintext is irrecoverable from the stored hash,
so rotation is the only safe path) and issues a fresh token that is written into
cfg.ConfigFiles[".auth_token"]. WriteFilesToContainer delivers it to /configs
immediately after ContainerStart, racing safely ahead of the Python adapter's
1-2s startup time.

Failure modes are soft: revoke or issue errors skip injection with a warning;
provisioning continues and the workspace recovers on the next restart.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Molecule AI Backend Engineer 2026-04-16 10:58:50 +00:00 committed by Hongming Wang
parent cdc1caf6f9
commit 92a2c84d76
2 changed files with 204 additions and 0 deletions

View File

@ -12,6 +12,7 @@ import (
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/wsauth"
)
// provisionWorkspace handles async container deployment with timeout.
@ -121,6 +122,17 @@ func (h *WorkspaceHandler) provisionWorkspaceOpts(workspaceID, templatePath stri
}
}
// Issue/rotate the workspace auth token and inject the plaintext into the
// config volume so the workspace always has a valid bearer credential on
// disk, even after a container rebuild wiped the volume (issue #418).
//
// We must rotate (revoke-then-issue) rather than reuse because the DB only
// stores sha256(plaintext) — we cannot reconstruct the original token to
// write it back. The new plaintext is written into /configs/.auth_token via
// WriteFilesToContainer, which runs immediately after ContainerStart and
// wins the race against the Python adapter's startup time (~1-2 s).
h.issueAndInjectToken(ctx, workspaceID, &cfg)
url, err := h.provisioner.Start(ctx, cfg)
if err != nil {
// Persist the error text to last_sample_error so the canvas and
@ -221,6 +233,37 @@ func (h *WorkspaceHandler) buildProvisionerConfig(
}
}
// issueAndInjectToken rotates the workspace auth token and injects the
// plaintext into cfg.ConfigFiles[".auth_token"] so it is written into the
// /configs volume by WriteFilesToContainer immediately after the container
// starts (issue #418: container rebuild wipes /configs/.auth_token).
//
// Rotation strategy: since the DB only stores sha256(plaintext) we can never
// recover an existing token. We revoke all live tokens first and issue a
// fresh one. On any error the injection is skipped and a warning is logged;
// provisioning continues — the workspace will get 401 on its first heartbeat
// and can recover on the next restart.
func (h *WorkspaceHandler) issueAndInjectToken(ctx context.Context, workspaceID string, cfg *provisioner.WorkspaceConfig) {
// Revoke any existing live tokens. If this fails we bail out rather than
// issuing a second live token whose plaintext we can't also deliver.
if err := wsauth.RevokeAllForWorkspace(ctx, db.DB, workspaceID); err != nil {
log.Printf("Provisioner: failed to revoke existing tokens for %s: %v — skipping auth-token injection", workspaceID, err)
return
}
token, err := wsauth.IssueToken(ctx, db.DB, workspaceID)
if err != nil {
log.Printf("Provisioner: failed to issue auth token for %s: %v — skipping auth-token injection", workspaceID, err)
return
}
if cfg.ConfigFiles == nil {
cfg.ConfigFiles = make(map[string][]byte)
}
cfg.ConfigFiles[".auth_token"] = []byte(token)
log.Printf("Provisioner: injected fresh auth token for workspace %s into config volume", workspaceID)
}
// findTemplateByName looks for a workspace-configs-templates directory matching a name.
func findTemplateByName(configsDir, name string) string {
entries, err := os.ReadDir(configsDir)

View File

@ -1,13 +1,16 @@
package handlers
import (
"context"
"fmt"
"os"
"path/filepath"
"strings"
"testing"
"github.com/DATA-DOG/go-sqlmock"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
"gopkg.in/yaml.v3"
)
@ -595,6 +598,164 @@ func TestBuildProvisionerConfig_WorkspacePathFromEnv(t *testing.T) {
}
}
// ==================== issueAndInjectToken (issue #418) ====================
// TestIssueAndInjectToken_HappyPath verifies that on a normal (re)provision the
// helper revokes existing tokens, issues a fresh one, and injects the plaintext
// into cfg.ConfigFiles[".auth_token"].
func TestIssueAndInjectToken_HappyPath(t *testing.T) {
mock := setupTestDB(t)
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
// RevokeAllForWorkspace UPDATE (0 rows — no prior tokens, still succeeds)
mock.ExpectExec(`UPDATE workspace_auth_tokens SET revoked_at`).
WithArgs("ws-418-happy").
WillReturnResult(sqlmock.NewResult(0, 0))
// IssueToken INSERT
mock.ExpectExec(`INSERT INTO workspace_auth_tokens`).
WithArgs("ws-418-happy", sqlmock.AnyArg(), sqlmock.AnyArg()).
WillReturnResult(sqlmock.NewResult(1, 1))
cfg := provisioner.WorkspaceConfig{}
handler.issueAndInjectToken(context.Background(), "ws-418-happy", &cfg)
tok, ok := cfg.ConfigFiles[".auth_token"]
if !ok {
t.Fatal("expected .auth_token in ConfigFiles after injection")
}
if len(tok) == 0 {
t.Error("expected non-empty token bytes in ConfigFiles[.auth_token]")
}
// Plaintext should be a valid base64url-encoded string (43 chars for 32 random bytes)
if len(tok) != 43 {
t.Errorf("expected 43-char token, got %d chars: %q", len(tok), tok)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet SQL expectations: %v", err)
}
}
// TestIssueAndInjectToken_RotatesExistingToken verifies that when a workspace
// already has a live token (the rebuild scenario), the helper revokes it before
// issuing a fresh one so we never accumulate stale live tokens in the DB.
func TestIssueAndInjectToken_RotatesExistingToken(t *testing.T) {
mock := setupTestDB(t)
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
// RevokeAllForWorkspace: 1 existing token revoked
mock.ExpectExec(`UPDATE workspace_auth_tokens SET revoked_at`).
WithArgs("ws-418-rotate").
WillReturnResult(sqlmock.NewResult(1, 1))
// IssueToken INSERT for the new token
mock.ExpectExec(`INSERT INTO workspace_auth_tokens`).
WithArgs("ws-418-rotate", sqlmock.AnyArg(), sqlmock.AnyArg()).
WillReturnResult(sqlmock.NewResult(1, 1))
cfg := provisioner.WorkspaceConfig{
ConfigFiles: map[string][]byte{
"config.yaml": []byte("name: test\n"),
},
}
handler.issueAndInjectToken(context.Background(), "ws-418-rotate", &cfg)
// Existing config file must still be present
if _, ok := cfg.ConfigFiles["config.yaml"]; !ok {
t.Error("issueAndInjectToken must not remove existing ConfigFiles entries")
}
tok, ok := cfg.ConfigFiles[".auth_token"]
if !ok {
t.Fatal("expected .auth_token in ConfigFiles after rotation")
}
if len(tok) == 0 {
t.Error("expected non-empty rotated token")
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet SQL expectations: %v", err)
}
}
// TestIssueAndInjectToken_RevokeFailSkipsInjection verifies that a DB error on
// the revoke step causes the helper to skip injection entirely — we must never
// issue a token that can't be delivered to the workspace, nor leave a second
// live token that the old file might accidentally present.
func TestIssueAndInjectToken_RevokeFailSkipsInjection(t *testing.T) {
mock := setupTestDB(t)
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
mock.ExpectExec(`UPDATE workspace_auth_tokens SET revoked_at`).
WithArgs("ws-418-revoke-fail").
WillReturnError(fmt.Errorf("db: connection lost"))
// No INSERT should follow
cfg := provisioner.WorkspaceConfig{}
handler.issueAndInjectToken(context.Background(), "ws-418-revoke-fail", &cfg)
if _, ok := cfg.ConfigFiles[".auth_token"]; ok {
t.Error("token must NOT be injected when revoke fails")
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet SQL expectations: %v", err)
}
}
// TestIssueAndInjectToken_IssueFailSkipsInjection verifies that a DB error on
// IssueToken also skips injection without panicking.
func TestIssueAndInjectToken_IssueFailSkipsInjection(t *testing.T) {
mock := setupTestDB(t)
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
mock.ExpectExec(`UPDATE workspace_auth_tokens SET revoked_at`).
WithArgs("ws-418-issue-fail").
WillReturnResult(sqlmock.NewResult(0, 0))
mock.ExpectExec(`INSERT INTO workspace_auth_tokens`).
WithArgs("ws-418-issue-fail", sqlmock.AnyArg(), sqlmock.AnyArg()).
WillReturnError(fmt.Errorf("db: constraint violation"))
cfg := provisioner.WorkspaceConfig{}
handler.issueAndInjectToken(context.Background(), "ws-418-issue-fail", &cfg)
if _, ok := cfg.ConfigFiles[".auth_token"]; ok {
t.Error("token must NOT be injected when IssueToken fails")
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet SQL expectations: %v", err)
}
}
// TestIssueAndInjectToken_NilConfigFilesAllocated verifies that a nil
// ConfigFiles map is allocated before the token is written.
func TestIssueAndInjectToken_NilConfigFilesAllocated(t *testing.T) {
mock := setupTestDB(t)
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
mock.ExpectExec(`UPDATE workspace_auth_tokens SET revoked_at`).
WithArgs("ws-418-nil-cfg").
WillReturnResult(sqlmock.NewResult(0, 0))
mock.ExpectExec(`INSERT INTO workspace_auth_tokens`).
WithArgs("ws-418-nil-cfg", sqlmock.AnyArg(), sqlmock.AnyArg()).
WillReturnResult(sqlmock.NewResult(1, 1))
cfg := provisioner.WorkspaceConfig{} // ConfigFiles intentionally nil
handler.issueAndInjectToken(context.Background(), "ws-418-nil-cfg", &cfg)
if cfg.ConfigFiles == nil {
t.Fatal("ConfigFiles must be allocated when nil before writing token")
}
if _, ok := cfg.ConfigFiles[".auth_token"]; !ok {
t.Error("expected .auth_token to be present after allocation")
}
}
// contains is a helper for substring matching in tests
func contains(s, substr string) bool {
return len(s) >= len(substr) && (s == substr || len(s) > 0 && containsStr(s, substr))