Files
hongming db5ffed2b5
Block internal-flavored paths / Block forbidden paths (push) Waiting to run
ci-arm64-advisory / fast-checks (push) Waiting to run
CI / Detect changes (push) Waiting to run
CI / Platform (Go) (push) Blocked by required conditions
CI / Canvas (Next.js) (push) Blocked by required conditions
CI / Shellcheck (E2E scripts) (push) Blocked by required conditions
CI / Canvas Deploy Reminder (push) Blocked by required conditions
CI / Python Lint & Test (push) Waiting to run
E2E API Smoke Test / detect-changes (push) Waiting to run
E2E API Smoke Test / E2E API Smoke Test (push) Blocked by required conditions
E2E Chat / detect-changes (push) Waiting to run
E2E Chat / E2E Chat (push) Blocked by required conditions
E2E Staging Canvas (Playwright) / detect-changes (push) Waiting to run
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Blocked by required conditions
Handlers Postgres Integration / detect-changes (push) Waiting to run
Handlers Postgres Integration / Handlers Postgres Integration (push) Blocked by required conditions
Harness Replays / detect-changes (push) Waiting to run
Harness Replays / Harness Replays (push) Blocked by required conditions
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (push) Waiting to run
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (push) Waiting to run
publish-workspace-server-image / Production auto-deploy (push) Blocked by required conditions
Secret scan / Scan diff for credential-shaped strings (push) Waiting to run
CI / all-required (push) Has been cancelled
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (push) Has been cancelled
publish-workspace-server-image / build-and-push (push) Has been cancelled
E2E Staging SaaS (full lifecycle) / pr-validate (push) Successful in 50s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (local) (push) Successful in 1m20s
E2E Staging External Runtime / E2E Staging External Runtime (push) Successful in 5m20s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (push) Successful in 6m34s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (push) Successful in 6m2s
feat(workspace-server): per-workspace llm_billing_mode override (internal#691) (#1927)
Co-authored-by: hongming <hongmingwang@moleculesai.app>
Co-committed-by: hongming <hongmingwang@moleculesai.app>
2026-05-26 22:57:22 +00:00

860 lines
31 KiB
Go

package handlers
import (
"context"
"database/sql"
"log"
"net/http"
"os"
"regexp"
"strings"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/audit"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/crypto"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/wsauth"
"github.com/gin-gonic/gin"
)
var uuidRegex = regexp.MustCompile(`^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$`)
var platformManagedDirectLLMBypassKeys = map[string]struct{}{
"AI_GATEWAY_API_KEY": {},
"ANTHROPIC_API_KEY": {},
"ANTHROPIC_AUTH_TOKEN": {},
"ARCEEAI_API_KEY": {},
"CLAUDE_CODE_OAUTH_TOKEN": {},
"DASHSCOPE_API_KEY": {},
"DEEPSEEK_API_KEY": {},
"GEMINI_API_KEY": {},
"GLM_API_KEY": {},
"HERMES_CUSTOM_API_KEY": {},
"HERMES_CUSTOM_BASE_URL": {},
"HF_TOKEN": {},
"KIMI_API_KEY": {},
"KIMI_CN_API_KEY": {},
"MINIMAX_API_KEY": {},
"MINIMAX_CN_API_KEY": {},
"NOUS_API_KEY": {},
"OPENAI_API_KEY": {},
"OPENAI_BASE_URL": {},
"OPENROUTER_API_KEY": {},
"XAI_API_KEY": {},
"ZAI_API_KEY": {},
}
func isPlatformManagedDirectLLMBypassKey(key string) bool {
_, ok := platformManagedDirectLLMBypassKeys[strings.ToUpper(strings.TrimSpace(key))]
return ok
}
// platformManagedLLMModeForWorkspace replaces the org-level platformManagedLLMMode
// gate with a per-workspace resolved-mode check (internal#691). The strip-list
// is enforced ONLY when this specific workspace's resolved mode is
// platform_managed — a workspace with a byok override is allowed to write its
// own CLAUDE_CODE_OAUTH_TOKEN / vendor key via the canvas Secrets tab.
//
// Default-closed: if the resolver hits a DB error, falls back to
// platform_managed (the safe-default behavior), so a transient DB failure
// during a secret write still rejects the bypass-list keys — fail safer not
// freer. This matches the resolver's documented contract.
func platformManagedLLMModeForWorkspace(c *gin.Context, workspaceID string) bool {
orgMode := strings.ToLower(strings.TrimSpace(os.Getenv("MOLECULE_LLM_BILLING_MODE")))
res, err := ResolveLLMBillingMode(c.Request.Context(), workspaceID, orgMode)
if err != nil {
log.Printf("secrets: resolve billing mode for workspace=%s failed: %v (defaulting to platform_managed for safety)", workspaceID, err)
}
return strings.EqualFold(res.ResolvedMode, LLMBillingModePlatformManaged)
}
// platformManagedLLMMode is the legacy org-level gate retained for any test
// harness still asserting the env-var-only behavior. Production code paths
// must call platformManagedLLMModeForWorkspace instead so a workspace-level
// byok override actually takes effect on the secrets-write path.
func platformManagedLLMMode() bool {
return strings.EqualFold(strings.TrimSpace(os.Getenv("MOLECULE_LLM_BILLING_MODE")), "platform_managed")
}
// rejectPlatformManagedDirectLLMBypassForWorkspace is the per-workspace
// successor to rejectPlatformManagedDirectLLMBypass (internal#691). The
// strip-list ONLY applies when this specific workspace resolves to
// platform_managed; byok/disabled workspaces can write their own vendor keys.
func rejectPlatformManagedDirectLLMBypassForWorkspace(c *gin.Context, workspaceID, key string) bool {
if !platformManagedLLMModeForWorkspace(c, workspaceID) || !isPlatformManagedDirectLLMBypassKey(key) {
return false
}
c.JSON(http.StatusBadRequest, gin.H{
"error": "direct vendor key writes are blocked for platform-managed workspaces; use MODEL/LLM_PROVIDER or the platform LLM proxy env instead, or set this workspace's billing mode to 'byok' via /admin/workspaces/:id/llm-billing-mode",
"key": key,
"workspace_id": workspaceID,
})
return true
}
// rejectPlatformManagedDirectLLMBypass is the legacy org-level shim. Retained
// only for backwards compatibility with any external/test caller still on the
// old shape; new code MUST use the per-workspace variant above. Production
// code paths (the secrets.go handlers + workspace.go create-secret path) all
// switched in internal#691.
func rejectPlatformManagedDirectLLMBypass(c *gin.Context, key string) bool {
if !platformManagedLLMMode() || !isPlatformManagedDirectLLMBypassKey(key) {
return false
}
c.JSON(http.StatusBadRequest, gin.H{
"error": "direct Hermes custom provider secrets are blocked for platform-managed LLM workspaces; use MODEL/LLM_PROVIDER or the platform LLM proxy env instead",
"key": key,
})
return true
}
type SecretsHandler struct {
restartFunc func(workspaceID string) // Optional: auto-restart after secret change
}
func NewSecretsHandler(restartFunc func(string)) *SecretsHandler {
return &SecretsHandler{restartFunc: restartFunc}
}
// List handles GET /workspaces/:id/secrets
// Returns a merged view: workspace-level overrides + inherited global secrets.
// Each entry includes a "scope" field ("workspace" or "global") so the frontend
// can distinguish overrides from inherited defaults. Never exposes values.
func (h *SecretsHandler) List(c *gin.Context) {
workspaceID := c.Param("id")
if !uuidRegex.MatchString(workspaceID) {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid workspace ID"})
return
}
ctx := c.Request.Context()
// 1. Workspace-level secrets
wsKeys := map[string]bool{}
secrets := make([]map[string]interface{}, 0)
rows, err := db.DB.QueryContext(ctx,
`SELECT key, created_at, updated_at FROM workspace_secrets WHERE workspace_id = $1 ORDER BY key`,
workspaceID)
if err != nil {
log.Printf("List secrets error: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "query failed"})
return
}
defer rows.Close()
for rows.Next() {
var key, createdAt, updatedAt string
if err := rows.Scan(&key, &createdAt, &updatedAt); err != nil {
continue
}
wsKeys[key] = true
secrets = append(secrets, map[string]interface{}{
"key": key,
"has_value": true,
"scope": "workspace",
"created_at": createdAt,
"updated_at": updatedAt,
})
}
if err := rows.Err(); err != nil {
log.Printf("List workspace secrets iteration error: %v", err)
}
// 2. Global secrets not overridden at workspace level
globalRows, err := db.DB.QueryContext(ctx,
`SELECT key, created_at, updated_at FROM global_secrets ORDER BY key`)
if err != nil {
log.Printf("List global secrets (merged) error: %v", err)
// Non-fatal: return workspace secrets only
c.JSON(http.StatusOK, secrets)
return
}
defer globalRows.Close()
for globalRows.Next() {
var key, createdAt, updatedAt string
if err := globalRows.Scan(&key, &createdAt, &updatedAt); err != nil {
continue
}
if wsKeys[key] {
continue // workspace override exists — skip global
}
secrets = append(secrets, map[string]interface{}{
"key": key,
"has_value": true,
"scope": "global",
"created_at": createdAt,
"updated_at": updatedAt,
})
}
if err := globalRows.Err(); err != nil {
log.Printf("List global secrets iteration error: %v", err)
}
c.JSON(http.StatusOK, secrets)
}
// Values handles GET /workspaces/:id/secrets/values — returns the merged
// decrypted secrets as a flat `{"KEY": "value"}` JSON map so remote agents
// can pull their secrets on startup instead of having them pushed at
// container-create time. Phase 30.2.
//
// Authentication: the workspace must present its own Phase 30.1 auth token
// in `Authorization: Bearer …`. Legacy workspaces with no live token on file
// are grandfathered through (same lazy-bootstrap contract as
// /registry/heartbeat) so in-flight workspaces keep working during the
// rollout. Anything else → 401.
//
// The same merge rule as List applies: workspace secrets override globals
// with the same key. Values are returned verbatim (no base64, no JSON
// escaping beyond the standard), matching the env-var shape the provisioner
// would have injected at container-create.
func (h *SecretsHandler) Values(c *gin.Context) {
workspaceID := c.Param("id")
if !uuidRegex.MatchString(workspaceID) {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid workspace ID"})
return
}
ctx := c.Request.Context()
// Auth gate (Phase 30.1/30.2): enforce the bearer token when the
// workspace has any live token on file. Grandfather legacy workspaces
// through so a rolling upgrade doesn't lock them out.
hasLive, hlErr := wsauth.HasAnyLiveToken(ctx, db.DB, workspaceID)
if hlErr != nil {
// DB hiccup checking token existence — the handler's security
// posture is "fail closed" here because unlike heartbeat, we're
// about to return plaintext secrets. Heartbeat can safely
// fail-open because it only reports state.
log.Printf("wsauth: HasAnyLiveToken(%s) failed for secrets.Values: %v", workspaceID, hlErr)
c.JSON(http.StatusInternalServerError, gin.H{"error": "auth check failed"})
return
}
if hasLive {
tok := wsauth.BearerTokenFromHeader(c.GetHeader("Authorization"))
if tok == "" {
c.JSON(http.StatusUnauthorized, gin.H{"error": "missing workspace auth token"})
return
}
if err := wsauth.ValidateToken(ctx, db.DB, workspaceID, tok); err != nil {
c.JSON(http.StatusUnauthorized, gin.H{"error": "invalid workspace auth token"})
return
}
}
// Merged secrets: globals first, then workspace overrides (same as
// provisioner path in workspace_provision.go so env-vars look identical
// whether the workspace was bootstrapped locally or remotely).
out := map[string]string{}
// Track decrypt failures so we can refuse the response with a list
// instead of returning a partial bundle that boots a broken agent.
var failedKeys []string
globalRows, gErr := db.DB.QueryContext(ctx,
`SELECT key, encrypted_value, encryption_version FROM global_secrets`)
if gErr == nil {
defer globalRows.Close()
for globalRows.Next() {
var k string
var v []byte
var ver int
if globalRows.Scan(&k, &v, &ver) == nil {
decrypted, decErr := crypto.DecryptVersioned(v, ver)
if decErr != nil {
// Fail-loud (mirrors workspace_provision.go's posture):
// a remote agent that boots with only PART of its secrets
// will fail at task time with mysterious KeyErrors. Better
// to refuse to serve the bundle and force the operator to
// rotate the broken key.
log.Printf("secrets.Values: decrypt global %s failed (version=%d): %v", k, ver, decErr)
failedKeys = append(failedKeys, "global:"+k)
continue
}
out[k] = string(decrypted)
}
}
if err := globalRows.Err(); err != nil {
log.Printf("secrets.Values: global rows iteration error: %v", err)
}
}
wsRows, wErr := db.DB.QueryContext(ctx,
`SELECT key, encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = $1`,
workspaceID)
if wErr == nil {
defer wsRows.Close()
for wsRows.Next() {
var k string
var v []byte
var ver int
if wsRows.Scan(&k, &v, &ver) == nil {
decrypted, decErr := crypto.DecryptVersioned(v, ver)
if decErr != nil {
log.Printf("secrets.Values: decrypt workspace %s failed (version=%d): %v", k, ver, decErr)
failedKeys = append(failedKeys, "workspace:"+k)
continue
}
out[k] = string(decrypted) // workspace override wins over global
}
}
if err := wsRows.Err(); err != nil {
log.Printf("secrets.Values: workspace rows iteration error: %v", err)
}
}
if len(failedKeys) > 0 {
c.JSON(http.StatusInternalServerError, gin.H{
"error": "one or more secrets failed to decrypt; refusing to return partial bundle",
"failed_keys": failedKeys,
})
return
}
c.JSON(http.StatusOK, out)
}
// Set handles POST /workspaces/:id/secrets
func (h *SecretsHandler) Set(c *gin.Context) {
workspaceID := c.Param("id")
if !uuidRegex.MatchString(workspaceID) {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid workspace ID"})
return
}
ctx := c.Request.Context()
var body struct {
Key string `json:"key" binding:"required"`
Value string `json:"value" binding:"required"`
}
if err := c.ShouldBindJSON(&body); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body"})
return
}
if rejectPlatformManagedDirectLLMBypassForWorkspace(c, workspaceID, body.Key) {
return
}
// Encrypt the value (AES-256-GCM if SECRETS_ENCRYPTION_KEY is set, plaintext otherwise)
encrypted, err := crypto.Encrypt([]byte(body.Value))
if err != nil {
log.Printf("Encrypt secret error: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to encrypt secret"})
return
}
// Persist encryption_version alongside the bytes (#85). ON CONFLICT
// also rewrites the version — re-setting a secret while encryption
// is enabled upgrades a historical plaintext row to AES-GCM.
version := crypto.CurrentEncryptionVersion()
_, err = db.DB.ExecContext(ctx, `
INSERT INTO workspace_secrets (workspace_id, key, encrypted_value, encryption_version)
VALUES ($1, $2, $3, $4)
ON CONFLICT (workspace_id, key) DO UPDATE
SET encrypted_value = $3, encryption_version = $4, updated_at = now()
`, workspaceID, body.Key, encrypted, version)
if err != nil {
log.Printf("Set secret error: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to save secret"})
return
}
// Phase 1 audit: structured event for the security trail. Inline (not
// goroutine) so the event is durable before we ack the user; emit is
// best-effort and never errors out of the request path.
audit.Emit(c.Request.Context(), "secret.set", map[string]any{
"workspace_id": workspaceID,
"key": body.Key,
"value_hash": audit.HashValuePrefix(body.Value, 8),
"scope": "workspace",
"operation": "set",
})
// Auto-restart workspace to pick up new secret.
// RFC internal#524 Layer 1: route through globalGoAsync so tests can
// drain the detached restart goroutine before db.DB is swapped — see
// drainTestAsync in handlers_test.go and the canonical 69d9b4e3 fix.
if h.restartFunc != nil {
wsID := workspaceID
globalGoAsync(func() { h.restartFunc(wsID) })
}
c.JSON(http.StatusOK, gin.H{"status": "saved", "key": body.Key})
}
// Delete handles DELETE /workspaces/:id/secrets/:key
func (h *SecretsHandler) Delete(c *gin.Context) {
workspaceID := c.Param("id")
if !uuidRegex.MatchString(workspaceID) {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid workspace ID"})
return
}
key := c.Param("key")
ctx := c.Request.Context()
result, err := db.DB.ExecContext(ctx,
`DELETE FROM workspace_secrets WHERE workspace_id = $1 AND key = $2`,
workspaceID, key)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to delete secret"})
return
}
rows, err := result.RowsAffected()
if err != nil {
log.Printf("DeleteWorkspace: RowsAffected error: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to delete secret"})
return
}
if rows == 0 {
c.JSON(http.StatusNotFound, gin.H{"error": "secret not found"})
return
}
// Phase 1 audit: structured event for the security trail. Only on
// real deletes (rows>0) — a 404 is not a state change.
audit.Emit(c.Request.Context(), "secret.delete", map[string]any{
"workspace_id": workspaceID,
"key": key,
"scope": "workspace",
"operation": "delete",
})
// Auto-restart workspace to pick up removed secret.
// RFC internal#524 Layer 1: see Set() above for the drain rationale.
if h.restartFunc != nil {
wsID := workspaceID
globalGoAsync(func() { h.restartFunc(wsID) })
}
c.JSON(http.StatusOK, gin.H{"status": "deleted", "key": key})
}
// ---------------------------------------------------------------------------
// Global secrets — platform-wide API keys that apply to all workspaces.
// Workspace-level secrets with the same key override globals.
// ---------------------------------------------------------------------------
// ListGlobal handles GET /admin/secrets
func (h *SecretsHandler) ListGlobal(c *gin.Context) {
ctx := c.Request.Context()
rows, err := db.DB.QueryContext(ctx,
`SELECT key, created_at, updated_at FROM global_secrets ORDER BY key`)
if err != nil {
log.Printf("List global secrets error: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "query failed"})
return
}
defer rows.Close()
secrets := make([]map[string]interface{}, 0)
for rows.Next() {
var key, createdAt, updatedAt string
if err := rows.Scan(&key, &createdAt, &updatedAt); err != nil {
continue
}
secrets = append(secrets, map[string]interface{}{
"key": key,
"has_value": true,
"created_at": createdAt,
"updated_at": updatedAt,
"scope": "global",
})
}
if err := rows.Err(); err != nil {
log.Printf("ListGlobal iteration error: %v", err)
}
c.JSON(http.StatusOK, secrets)
}
// SetGlobal handles POST /admin/secrets
func (h *SecretsHandler) SetGlobal(c *gin.Context) {
ctx := c.Request.Context()
var body struct {
Key string `json:"key" binding:"required"`
Value string `json:"value" binding:"required"`
}
if err := c.ShouldBindJSON(&body); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body"})
return
}
if rejectPlatformManagedDirectLLMBypass(c, body.Key) {
return
}
encrypted, err := crypto.Encrypt([]byte(body.Value))
if err != nil {
log.Printf("Encrypt global secret error: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to encrypt"})
return
}
globalVersion := crypto.CurrentEncryptionVersion()
_, err = db.DB.ExecContext(ctx, `
INSERT INTO global_secrets (key, encrypted_value, encryption_version)
VALUES ($1, $2, $3)
ON CONFLICT (key) DO UPDATE
SET encrypted_value = $2, encryption_version = $3, updated_at = now()
`, body.Key, encrypted, globalVersion)
if err != nil {
log.Printf("Set global secret error: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to save"})
return
}
// Issue #15: global secrets are injected into containers as env vars at
// Start() time, so a rotating token (e.g. CLAUDE_CODE_OAUTH_TOKEN) doesn't
// reach existing workspaces until the container is recreated. Auto-restart
// every workspace whose env is affected — i.e. those WITHOUT a
// workspace-level override of the same key.
//
// RFC internal#524 Layer 1: globalGoAsync so tests drain the fan-out
// (which itself spawns N more globalGoAsync restart calls below) before
// db.DB swap. Without this, the SELECT for affected workspaces races a
// subsequent test's db.DB restore.
key := body.Key
globalGoAsync(func() { h.restartAllAffectedByGlobalKey(key) })
// Phase 1 audit: admin-scope secret write — high-value security event.
auditCtx := audit.WithActorKind(c.Request.Context(), audit.ActorAdmin)
audit.Emit(auditCtx, "secret.set", map[string]any{
"key": body.Key,
"value_hash": audit.HashValuePrefix(body.Value, 8),
"scope": "global",
"operation": "set",
})
c.JSON(http.StatusOK, gin.H{"status": "saved", "key": body.Key, "scope": "global"})
}
// restartAllAffectedByGlobalKey restarts every non-paused, non-removed
// workspace that would inherit the given global-secret key (i.e. does NOT
// have a workspace-level override). Used on SetGlobal / DeleteGlobal so
// rotated credentials (OAuth tokens, API keys) propagate without a manual
// restart loop. See issue #15.
func (h *SecretsHandler) restartAllAffectedByGlobalKey(key string) {
if h.restartFunc == nil {
return
}
ctx := context.Background()
rows, err := db.DB.QueryContext(ctx, `
SELECT id FROM workspaces
WHERE status NOT IN ('removed', 'paused')
AND COALESCE(runtime, '') <> 'external'
AND id NOT IN (
SELECT workspace_id FROM workspace_secrets WHERE key = $1
)
`, key)
if err != nil {
log.Printf("Global secret %s: failed to list affected workspaces for auto-restart: %v", key, err)
return
}
defer rows.Close()
var ids []string
for rows.Next() {
var id string
if err := rows.Scan(&id); err == nil {
ids = append(ids, id)
}
}
if err := rows.Err(); err != nil {
log.Printf("restartAllAffectedByGlobalKey: iteration error: %v", err)
}
if len(ids) == 0 {
return
}
log.Printf("Global secret %s changed: auto-restarting %d workspace(s) to refresh env", key, len(ids))
for _, id := range ids {
// RFC internal#524 Layer 1: per-workspace restart via globalGoAsync
// so each restart goroutine is drained before db.DB is swapped in
// the test cleanup chain.
wsID := id
globalGoAsync(func() { h.restartFunc(wsID) })
}
}
// DeleteGlobal handles DELETE /admin/secrets/:key
func (h *SecretsHandler) DeleteGlobal(c *gin.Context) {
key := c.Param("key")
ctx := c.Request.Context()
result, err := db.DB.ExecContext(ctx,
`DELETE FROM global_secrets WHERE key = $1`, key)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to delete"})
return
}
rows, err := result.RowsAffected()
if err != nil {
log.Printf("DeleteGlobal: RowsAffected error: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to delete"})
return
}
if rows == 0 {
c.JSON(http.StatusNotFound, gin.H{"error": "secret not found"})
return
}
// Issue #15: propagate deletion to running containers — otherwise they
// keep the stale env var until manual restart.
// RFC internal#524 Layer 1: globalGoAsync for the same drain rationale
// as SetGlobal above.
k := key
globalGoAsync(func() { h.restartAllAffectedByGlobalKey(k) })
// Phase 1 audit: admin-scope secret delete.
auditCtx := audit.WithActorKind(c.Request.Context(), audit.ActorAdmin)
audit.Emit(auditCtx, "secret.delete", map[string]any{
"key": key,
"scope": "global",
"operation": "delete",
})
c.JSON(http.StatusOK, gin.H{"status": "deleted", "key": key, "scope": "global"})
}
// GetModel handles GET /workspaces/:id/model
// Returns the current model configuration for a workspace.
func (h *SecretsHandler) GetModel(c *gin.Context) {
workspaceID := c.Param("id")
ctx := c.Request.Context()
// Check if MODEL secret exists.
//
// Historical note: this row was named MODEL_PROVIDER pre-2026-05-19
// (see ab12af50 + a7e8892 root-cause analysis). The column name
// MODEL_PROVIDER was misleading — it never held a provider slug,
// only the picked model id (e.g. "minimax/MiniMax-M2.7"). The
// misnomer caused workspace-server's applyRuntimeModelEnv to
// overwrite a legitimate persona-env MODEL with whatever literal
// string lived in MODEL_PROVIDER (often "minimax" or "claude-code"
// — not a valid model id), wedging adapters at SDK initialize.
// CP-side slot-separation (cp#213 + cp#220) already corrected the
// CP-side analogue; this is the workspace-server companion. A
// migration in 20260519000000_workspace_secrets_model_provider_rename.up.sql
// moves any legacy rows to the new key on rollout.
var modelBytes []byte
var modelVersion int
err := db.DB.QueryRowContext(ctx,
`SELECT encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = $1 AND key = 'MODEL'`,
workspaceID).Scan(&modelBytes, &modelVersion)
if err == sql.ErrNoRows {
c.JSON(http.StatusOK, gin.H{"model": "", "source": "default"})
return
}
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "query failed"})
return
}
decrypted, err := crypto.DecryptVersioned(modelBytes, modelVersion)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to decrypt"})
return
}
c.JSON(http.StatusOK, gin.H{"model": string(decrypted), "source": "workspace_secrets"})
}
// setModelSecret writes (or clears, when value=="") the MODEL workspace
// secret. Extracted from SetModel so non-handler call sites (notably
// WorkspaceHandler.Create — first-deploy path that persists the
// canvas-selected model so applyRuntimeModelEnv's restart fallback finds
// it) can reuse the encryption + upsert logic without inlining the SQL.
//
// The row was previously keyed MODEL_PROVIDER (misnomer — it never held
// a provider, only a model id). Renamed to MODEL on 2026-05-19; the
// 20260519000000_workspace_secrets_model_provider_rename migration moves
// any legacy rows on rollout.
//
// Returns nil on success. Caller is responsible for any restart trigger;
// the gin handler re-adds that after a successful write.
func setModelSecret(ctx context.Context, workspaceID, model string) error {
if model == "" {
_, err := db.DB.ExecContext(ctx,
`DELETE FROM workspace_secrets WHERE workspace_id = $1 AND key = 'MODEL'`,
workspaceID)
return err
}
encrypted, err := crypto.Encrypt([]byte(model))
if err != nil {
return err
}
version := crypto.CurrentEncryptionVersion()
_, err = db.DB.ExecContext(ctx, `
INSERT INTO workspace_secrets (workspace_id, key, encrypted_value, encryption_version)
VALUES ($1, 'MODEL', $2, $3)
ON CONFLICT (workspace_id, key) DO UPDATE
SET encrypted_value = $2, encryption_version = $3, updated_at = now()
`, workspaceID, encrypted, version)
return err
}
// SetModel handles PUT /workspaces/:id/model — writes the model slug
// into workspace_secrets as MODEL (the key GetModel reads).
// For hermes, the value is a hermes-native slug like "minimax/MiniMax-M2.7";
// for claude-code it's the legacy "provider:model" form. Either way it's just
// an opaque string the runtime interprets on its next start.
//
// Empty string clears the override. Triggers auto-restart so the new
// env (HERMES_DEFAULT_MODEL etc.) takes effect immediately — without
// this the user clicks Save+Restart, the canvas PUT lands, but the
// already-restarting container misses the window and boots with the
// old value.
func (h *SecretsHandler) SetModel(c *gin.Context) {
workspaceID := c.Param("id")
if !uuidRegex.MatchString(workspaceID) {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid workspace ID"})
return
}
ctx := c.Request.Context()
var body struct {
Model string `json:"model"`
}
if err := c.ShouldBindJSON(&body); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body"})
return
}
if err := setModelSecret(ctx, workspaceID, body.Model); err != nil {
log.Printf("SetModel error: %v", err)
if body.Model == "" {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to clear model"})
} else {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to save model"})
}
return
}
if h.restartFunc != nil {
// RFC internal#524 Layer 1: globalGoAsync (see Set()).
wsID := workspaceID
globalGoAsync(func() { h.restartFunc(wsID) })
}
if body.Model == "" {
c.JSON(http.StatusOK, gin.H{"status": "cleared"})
return
}
c.JSON(http.StatusOK, gin.H{"status": "saved", "model": body.Model})
}
// GetProvider handles GET /workspaces/:id/provider
// Returns the explicit LLM provider override stored as the LLM_PROVIDER
// workspace secret. Mirror of GetModel — same shape, same response keys
// (provider/source) to keep canvas wiring symmetric.
//
// Why a sibling endpoint rather than overloading PUT /model: the new
// `provider` field (Option B, PR #2441) is orthogonal to the model
// slug. A user might keep the same model alias and switch providers
// (e.g., route the same alias through a different gateway), or keep
// the same provider and switch models. Co-storing them under one
// endpoint forces a single Save+Restart round-trip per change; two
// endpoints let the canvas update each independently.
func (h *SecretsHandler) GetProvider(c *gin.Context) {
workspaceID := c.Param("id")
ctx := c.Request.Context()
var bytesVal []byte
var version int
err := db.DB.QueryRowContext(ctx,
`SELECT encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = $1 AND key = 'LLM_PROVIDER'`,
workspaceID).Scan(&bytesVal, &version)
if err == sql.ErrNoRows {
c.JSON(http.StatusOK, gin.H{"provider": "", "source": "default"})
return
}
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "query failed"})
return
}
decrypted, err := crypto.DecryptVersioned(bytesVal, version)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to decrypt"})
return
}
c.JSON(http.StatusOK, gin.H{"provider": string(decrypted), "source": "workspace_secrets"})
}
// setProviderSecret writes (or clears, when value=="") the LLM_PROVIDER
// workspace secret. Extracted from SetProvider so non-handler call sites
// (notably WorkspaceHandler.Create — first-deploy path that derives
// LLM_PROVIDER from the canvas-selected model slug so CP user-data picks
// it up as a YAML field in /configs/config.yaml AND it survives across
// restarts when CP regenerates the config) can reuse the encryption +
// upsert logic without inlining the SQL.
//
// Returns nil on success. Caller is responsible for any restart trigger;
// the gin handler re-adds that after a successful write.
func setProviderSecret(ctx context.Context, workspaceID, provider string) error {
if provider == "" {
_, err := db.DB.ExecContext(ctx,
`DELETE FROM workspace_secrets WHERE workspace_id = $1 AND key = 'LLM_PROVIDER'`,
workspaceID)
return err
}
encrypted, err := crypto.Encrypt([]byte(provider))
if err != nil {
return err
}
version := crypto.CurrentEncryptionVersion()
_, err = db.DB.ExecContext(ctx, `
INSERT INTO workspace_secrets (workspace_id, key, encrypted_value, encryption_version)
VALUES ($1, 'LLM_PROVIDER', $2, $3)
ON CONFLICT (workspace_id, key) DO UPDATE
SET encrypted_value = $2, encryption_version = $3, updated_at = now()
`, workspaceID, encrypted, version)
return err
}
// SetProvider handles PUT /workspaces/:id/provider — writes the provider
// slug into workspace_secrets as LLM_PROVIDER. Empty string clears the
// override. Triggers auto-restart so the new env is in effect on the
// next boot — without this the canvas Save+Restart can race the
// already-restarting container and miss the window.
//
// CP user-data (controlplane PR #364) reads LLM_PROVIDER from env and
// writes it into /configs/config.yaml at boot, so the choice survives
// restart. Without that PR this endpoint still works but the value is
// only sticky when the workspace_secrets row is read on every restart
// (the secret-load path) — slower failure mode, same eventual behavior.
func (h *SecretsHandler) SetProvider(c *gin.Context) {
workspaceID := c.Param("id")
if !uuidRegex.MatchString(workspaceID) {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid workspace ID"})
return
}
ctx := c.Request.Context()
var body struct {
Provider string `json:"provider"`
}
if err := c.ShouldBindJSON(&body); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body"})
return
}
if err := setProviderSecret(ctx, workspaceID, body.Provider); err != nil {
log.Printf("SetProvider error: %v", err)
if body.Provider == "" {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to clear provider"})
} else {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to save provider"})
}
return
}
if h.restartFunc != nil {
// RFC internal#524 Layer 1: globalGoAsync (see Set()).
wsID := workspaceID
globalGoAsync(func() { h.restartFunc(wsID) })
}
if body.Provider == "" {
c.JSON(http.StatusOK, gin.H{"status": "cleared"})
return
}
c.JSON(http.StatusOK, gin.H{"status": "saved", "provider": body.Provider})
}