feat(registry): admin endpoint to revoke a workspace's auth tokens (cross-cloud migration fix) #2738

Merged
devops-engineer merged 1 commits from fix/migrate-revoke-stale-auth-token into main 2026-06-13 09:13:14 +00:00
3 changed files with 163 additions and 0 deletions
@@ -0,0 +1,54 @@
package handlers
import (
"log"
"net/http"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/wsauth"
"github.com/gin-gonic/gin"
)
// RevokeAuthTokens revokes every live workspace_auth_tokens row for a
// workspace, so the NEXT /registry/register call for that workspace is
// bootstrap-allowed (no live token on file → requireWorkspaceToken lets the
// first registration through and issues a fresh token).
//
// Why this exists — cross-cloud migration (CP#672 + migrate-provider):
// when the CP migrates a workspace to another cloud it provisions a FRESH
// container. CP#672 persists only /workspace + /home/agent/.claude — NOT
// /configs — so the migrated container boots with an empty
// /configs/.auth_token and cannot present the bearer the SOURCE box minted.
// The source's token is still live in workspace_auth_tokens, so the migrated
// container's /registry/register 401s (C18 ownership guard) and the workspace
// is wedged: it serves its agent-card but never re-registers, so its
// advertised URL never flips to the new box.
//
// The single-tenant Docker deployment self-heals this via
// sweepStaleTokensWithoutContainer (orphan_sweeper.go) — but that sweeper
// only runs in single-tenant Docker mode (no Docker daemon in CP/SaaS), so a
// per-tenant SaaS platform never revokes the stale token and the migration
// 401-wedges forever. The platform's own restart pipeline already does the
// right thing (workspace_restart.go → issueAndInjectToken →
// wsauth.RevokeAllForWorkspace); this endpoint exposes the SAME revoke so the
// CP migrator — which provisions the target out-of-band, bypassing the restart
// pipeline — can trigger it as part of the cutover.
//
// AdminAuth-gated (wired in router.go's wsAdmin group): only the CP (holding
// the tenant admin token) may revoke a workspace's tokens. Idempotent —
// revoking an already-revoked / never-registered workspace is a no-op 200, so
// the migrator can call it unconditionally.
func (h *WorkspaceHandler) RevokeAuthTokens(c *gin.Context) {
id := c.Param("id")
if id == "" {
c.JSON(http.StatusBadRequest, gin.H{"error": "workspace id required"})
return
}
if err := wsauth.RevokeAllForWorkspace(c.Request.Context(), db.DB, id); err != nil {
log.Printf("RevokeAuthTokens: revoke %s: %v", id, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "revoke failed"})
return
}
log.Printf("RevokeAuthTokens: revoked live auth tokens for workspace %s (migration cutover / admin)", id)
c.JSON(http.StatusOK, gin.H{"status": "revoked", "workspace_id": id})
}
@@ -0,0 +1,101 @@
package handlers
import (
"errors"
"net/http"
"net/http/httptest"
"testing"
"github.com/DATA-DOG/go-sqlmock"
"github.com/gin-gonic/gin"
)
// RevokeAuthTokens revokes a workspace's live tokens so the migrated
// container's next /registry/register is bootstrap-allowed. The happy path
// runs the wsauth.RevokeAllForWorkspace UPDATE and returns 200.
func TestRevokeAuthTokens_HappyPath(t *testing.T) {
h, mock := setupBootstrapHandler(t)
mock.ExpectExec(`UPDATE workspace_auth_tokens`).
WithArgs("ws-migrated").
WillReturnResult(sqlmock.NewResult(0, 1))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-migrated"}}
c.Request = httptest.NewRequest("POST", "/admin/workspaces/ws-migrated/revoke-auth-tokens", nil)
h.RevokeAuthTokens(c)
if w.Code != http.StatusOK {
t.Fatalf("want 200, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet: %v", err)
}
}
// Idempotent: revoking a workspace with no live tokens (already-revoked /
// never-registered) affects 0 rows but is still a 200 — the migrator calls
// this unconditionally on every cutover.
func TestRevokeAuthTokens_NoLiveTokensStillOK(t *testing.T) {
h, mock := setupBootstrapHandler(t)
mock.ExpectExec(`UPDATE workspace_auth_tokens`).
WithArgs("ws-fresh").
WillReturnResult(sqlmock.NewResult(0, 0))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-fresh"}}
c.Request = httptest.NewRequest("POST", "/admin/workspaces/ws-fresh/revoke-auth-tokens", nil)
h.RevokeAuthTokens(c)
if w.Code != http.StatusOK {
t.Fatalf("want 200, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet: %v", err)
}
}
// An empty :id is a 400 before any DB work.
func TestRevokeAuthTokens_EmptyIDIs400(t *testing.T) {
h, _ := setupBootstrapHandler(t)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: ""}}
c.Request = httptest.NewRequest("POST", "/admin/workspaces//revoke-auth-tokens", nil)
h.RevokeAuthTokens(c)
if w.Code != http.StatusBadRequest {
t.Fatalf("want 400, got %d: %s", w.Code, w.Body.String())
}
}
// A DB failure surfaces as 500 so the migrator can fail the cutover rather
// than retire the source against a workspace that will 401-wedge.
func TestRevokeAuthTokens_DBErrorIs500(t *testing.T) {
h, mock := setupBootstrapHandler(t)
mock.ExpectExec(`UPDATE workspace_auth_tokens`).
WithArgs("ws-dberr").
WillReturnError(errors.New("connection reset"))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-dberr"}}
c.Request = httptest.NewRequest("POST", "/admin/workspaces/ws-dberr/revoke-auth-tokens", nil)
h.RevokeAuthTokens(c)
if w.Code != http.StatusInternalServerError {
t.Fatalf("want 500, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet: %v", err)
}
}
@@ -188,6 +188,14 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi
// so the canvas flips to failed in seconds instead of waiting
// for the 10-minute provision-timeout sweeper.
wsAdmin.POST("/admin/workspaces/:id/bootstrap-failed", wh.BootstrapFailed)
// Revoke a workspace's live auth tokens so its next /registry/register
// is bootstrap-allowed. The CP migrator calls this during a cross-cloud
// cutover: the migrated container boots with an empty /configs (no
// .auth_token — CP#672 doesn't persist /configs) and would otherwise
// 401 forever against the SOURCE box's still-live token. Mirrors the
// revoke the restart pipeline already does (issueAndInjectToken →
// RevokeAllForWorkspace); the SaaS path has no stale-token sweeper.
wsAdmin.POST("/admin/workspaces/:id/revoke-auth-tokens", wh.RevokeAuthTokens)
// Per-workspace LLM billing mode override (internal#691). Used by
// CP's /cp/admin/workspaces/:id/llm-billing-mode proxy + (via that
// proxy) by the canvas Config-tab "LLM Billing" section. Default-