Merge pull request #64 from Molecule-AI/fix/issue-15-refresh-oauth-on-restart

fix(secrets): auto-refresh global_secrets on workspace restart (#15)
This commit is contained in:
Hongming Wang 2026-04-14 12:49:19 -07:00 committed by GitHub
commit 383582fbbf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 152 additions and 0 deletions

View File

@ -1,6 +1,7 @@
package handlers
import (
"context"
"database/sql"
"log"
"net/http"
@ -355,9 +356,56 @@ func (h *SecretsHandler) SetGlobal(c *gin.Context) {
return
}
// Issue #15: global secrets are injected into containers as env vars at
// Start() time, so a rotating token (e.g. CLAUDE_CODE_OAUTH_TOKEN) doesn't
// reach existing workspaces until the container is recreated. Auto-restart
// every workspace whose env is affected — i.e. those WITHOUT a
// workspace-level override of the same key.
go h.restartAllAffectedByGlobalKey(body.Key)
c.JSON(http.StatusOK, gin.H{"status": "saved", "key": body.Key, "scope": "global"})
}
// restartAllAffectedByGlobalKey restarts every non-paused, non-removed
// workspace that would inherit the given global-secret key (i.e. does NOT
// have a workspace-level override). Used on SetGlobal / DeleteGlobal so
// rotated credentials (OAuth tokens, API keys) propagate without a manual
// restart loop. See issue #15.
func (h *SecretsHandler) restartAllAffectedByGlobalKey(key string) {
if h.restartFunc == nil {
return
}
ctx := context.Background()
rows, err := db.DB.QueryContext(ctx, `
SELECT id FROM workspaces
WHERE status NOT IN ('removed', 'paused')
AND COALESCE(runtime, '') <> 'external'
AND id NOT IN (
SELECT workspace_id FROM workspace_secrets WHERE key = $1
)
`, key)
if err != nil {
log.Printf("Global secret %s: failed to list affected workspaces for auto-restart: %v", key, err)
return
}
defer rows.Close()
var ids []string
for rows.Next() {
var id string
if err := rows.Scan(&id); err == nil {
ids = append(ids, id)
}
}
if len(ids) == 0 {
return
}
log.Printf("Global secret %s changed: auto-restarting %d workspace(s) to refresh env", key, len(ids))
for _, id := range ids {
go h.restartFunc(id)
}
}
// DeleteGlobal handles DELETE /admin/secrets/:key
func (h *SecretsHandler) DeleteGlobal(c *gin.Context) {
key := c.Param("key")
@ -376,6 +424,10 @@ func (h *SecretsHandler) DeleteGlobal(c *gin.Context) {
return
}
// Issue #15: propagate deletion to running containers — otherwise they
// keep the stale env var until manual restart.
go h.restartAllAffectedByGlobalKey(key)
c.JSON(http.StatusOK, gin.H{"status": "deleted", "key": key, "scope": "global"})
}

View File

@ -684,3 +684,103 @@ func TestSecretsValues_InvalidWorkspaceID(t *testing.T) {
t.Errorf("expected 400, got %d", w.Code)
}
}
// ==================== Global secret auto-restart (issue #15) ====================
// TestSetGlobal_AutoRestartsAffectedWorkspaces documents the fix for #15:
// rotating a global secret (e.g. CLAUDE_CODE_OAUTH_TOKEN) must propagate to
// every running workspace without a manual restart loop. The handler should
// fire RestartByID for each non-paused/non-removed workspace that does NOT
// have a workspace-level override of the same key.
func TestSetGlobal_AutoRestartsAffectedWorkspaces(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
restarted := make(chan string, 4)
restartFunc := func(wsID string) { restarted <- wsID }
handler := NewSecretsHandler(restartFunc)
// INSERT ... ON CONFLICT for the global secret itself.
mock.ExpectExec("INSERT INTO global_secrets").
WithArgs("CLAUDE_CODE_OAUTH_TOKEN", sqlmock.AnyArg(), sqlmock.AnyArg()).
WillReturnResult(sqlmock.NewResult(0, 1))
// Query for affected workspaces — ws-A inherits, ws-B overrides (excluded).
mock.ExpectQuery("SELECT id FROM workspaces").
WithArgs("CLAUDE_CODE_OAUTH_TOKEN").
WillReturnRows(sqlmock.NewRows([]string{"id"}).
AddRow("ws-a").
AddRow("ws-c"))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
body := `{"key":"CLAUDE_CODE_OAUTH_TOKEN","value":"sk-ant-oat01-new"}`
c.Request = httptest.NewRequest("POST", "/admin/secrets", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.SetGlobal(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
// Collect both expected restarts (order not guaranteed).
seen := map[string]bool{}
deadline := time.After(2 * time.Second)
for len(seen) < 2 {
select {
case id := <-restarted:
seen[id] = true
case <-deadline:
t.Fatalf("auto-restart not fired for all affected workspaces; got %v", seen)
}
}
if !seen["ws-a"] || !seen["ws-c"] {
t.Errorf("expected ws-a and ws-c restarted, got %v", seen)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestDeleteGlobal_AutoRestartsAffectedWorkspaces covers the delete branch of #15.
func TestDeleteGlobal_AutoRestartsAffectedWorkspaces(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
restarted := make(chan string, 2)
handler := NewSecretsHandler(func(id string) { restarted <- id })
mock.ExpectExec("DELETE FROM global_secrets").
WithArgs("OLD_KEY").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectQuery("SELECT id FROM workspaces").
WithArgs("OLD_KEY").
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("ws-x"))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "key", Value: "OLD_KEY"}}
c.Request = httptest.NewRequest("DELETE", "/admin/secrets/OLD_KEY", nil)
handler.DeleteGlobal(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
select {
case id := <-restarted:
if id != "ws-x" {
t.Errorf("expected ws-x, got %q", id)
}
case <-time.After(2 * time.Second):
t.Fatal("auto-restart not fired")
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}