forked from molecule-ai/molecule-core
refactor: extract readOrLazyHealInboundSecret to dedup chat_files + registry
The lazy-heal-on-miss pattern landed in two places this session: PR #2372 (chat_files.go::resolveWorkspaceForwardCreds — Upload + Download) and PR #2375 (registry.go::Register). Both implementations did the same thing: read → if ErrNoInboundSecret then mint inline → return outcome Different response-shape requirements but the same core mechanic. Three sites' worth of drift potential: any future heal-time condition we add (audit log, alert, secret rotation, observability) had to be applied to each site, with partial application silently re-opening the gap. Fix: extract readOrLazyHealInboundSecret in workspace_provision_shared.go returning (secret, healed, err). Each caller maps the outcome to its response shape: - chat_files: healed=true → 503 with retry hint; err != nil → 503 with RFC-#2312 reprovision hint - registry: healed=true|false + err==nil → include in response; err != nil → omit field (workspace can retry on next register) Net effect: - Single source of truth for the read+heal mechanic - Response-shape decisions stay in callers (they DO differ per feature) - Future heal-time conditions go in one place - Behavior preserved: existing TestRegister_NoInboundSecret_LazyHeals, TestRegister_NoInboundSecret_LazyHealMintFailureOmitsField, TestChatUpload_NoInboundSecret_LazyHeal*, TestChatDownload_NoInboundSecret_LazyHeal* all pass unchanged Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
eef1969e30
commit
30a569c742
@ -30,7 +30,6 @@ package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
@ -41,7 +40,6 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/wsauth"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
@ -120,41 +118,28 @@ func resolveWorkspaceForwardCreds(c *gin.Context, ctx context.Context, workspace
|
||||
// upstream gate rather than re-validating here. Tracked at #2316
|
||||
// for follow-up: forward-time re-validation as defense-in-depth.
|
||||
|
||||
secret, err := wsauth.ReadPlatformInboundSecret(ctx, db.DB, workspaceID)
|
||||
secret, healed, err := readOrLazyHealInboundSecret(ctx, workspaceID, "chat_files "+op)
|
||||
if err != nil {
|
||||
if errors.Is(err, wsauth.ErrNoInboundSecret) {
|
||||
// Lazy-heal: mint the secret now so future requests
|
||||
// succeed. Pre-2026-04-30 the SaaS provision path didn't
|
||||
// mint — every prod workspace started life with NULL
|
||||
// platform_inbound_secret. The shared-mint refactor closes
|
||||
// the new-workspace gap; this lazy-heal closes the
|
||||
// existing-workspace gap without requiring a destructive
|
||||
// reprovision.
|
||||
//
|
||||
// Why the request still 503s: the workspace's local
|
||||
// /configs/.platform_inbound_secret is also empty until the
|
||||
// next /registry/register response (registry.go:344-362)
|
||||
// propagates the freshly-minted secret. The user retries
|
||||
// once the workspace's heartbeat picks up the new secret
|
||||
// (typically <30s).
|
||||
if _, mintErr := wsauth.IssuePlatformInboundSecret(ctx, db.DB, workspaceID); mintErr != nil {
|
||||
log.Printf("chat_files %s: lazy-heal mint failed for %s: %v", op, workspaceID, mintErr)
|
||||
c.JSON(http.StatusServiceUnavailable, gin.H{
|
||||
"error": "workspace not yet enrolled in v2 " + op + " (RFC #2312)",
|
||||
"detail": "Failed to mint inbound secret. Reprovision the workspace if this persists.",
|
||||
})
|
||||
return "", "", false
|
||||
}
|
||||
log.Printf("chat_files %s: lazy-healed platform_inbound_secret for %s — retry once workspace re-registers (#2312 backfill)", op, workspaceID)
|
||||
c.JSON(http.StatusServiceUnavailable, gin.H{
|
||||
"error": "workspace re-registering — please retry in 30 seconds",
|
||||
"detail": "Inbound secret was just minted. Workspace will pick it up on its next heartbeat.",
|
||||
"retry_after_seconds": 30,
|
||||
})
|
||||
return "", "", false
|
||||
}
|
||||
log.Printf("chat_files %s: read platform_inbound_secret failed for %s: %v", op, workspaceID, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to read workspace secret"})
|
||||
// Either a non-NoInboundSecret read error (DB hiccup) or a mint
|
||||
// failure during lazy-heal. The chat_files contract is to surface
|
||||
// 503 with the RFC-#2312 reprovision hint in both cases — the user
|
||||
// can't proceed and needs ops attention.
|
||||
c.JSON(http.StatusServiceUnavailable, gin.H{
|
||||
"error": "workspace not yet enrolled in v2 " + op + " (RFC #2312)",
|
||||
"detail": "Failed to mint inbound secret. Reprovision the workspace if this persists.",
|
||||
})
|
||||
return "", "", false
|
||||
}
|
||||
if healed {
|
||||
// The platform now has the secret but the workspace's
|
||||
// /configs/.platform_inbound_secret is still empty until the next
|
||||
// /registry/register response propagates it. User retries after
|
||||
// the workspace's next heartbeat picks up the new secret (~30s).
|
||||
c.JSON(http.StatusServiceUnavailable, gin.H{
|
||||
"error": "workspace re-registering — please retry in 30 seconds",
|
||||
"detail": "Inbound secret was just minted. Workspace will pick it up on its next heartbeat.",
|
||||
"retry_after_seconds": 30,
|
||||
})
|
||||
return "", "", false
|
||||
}
|
||||
return wsURL, secret, true
|
||||
|
||||
@ -451,29 +451,19 @@ func (h *RegistryHandler) Register(c *gin.Context) {
|
||||
// has no volume to write into.
|
||||
//
|
||||
// Lazy-heal (2026-04-30): if the column is NULL (legacy workspace
|
||||
// provisioned before the shared-mint refactor), mint it inline and
|
||||
// include in the response. Without this, legacy workspaces would need
|
||||
// two round-trips before chat upload works — chat_files lazy-heals
|
||||
// platform-side on first attempt, then the workspace must heartbeat
|
||||
// to receive the freshly-minted secret. Heal-on-register collapses
|
||||
// that to one round-trip.
|
||||
secret, secretErr := wsauth.ReadPlatformInboundSecret(ctx, db.DB, payload.ID)
|
||||
if secretErr != nil && errors.Is(secretErr, wsauth.ErrNoInboundSecret) {
|
||||
minted, mintErr := wsauth.IssuePlatformInboundSecret(ctx, db.DB, payload.ID)
|
||||
if mintErr != nil {
|
||||
log.Printf("Registry: lazy-heal mint of platform_inbound_secret failed for %s: %v — workspace will 503 on chat upload until next register", payload.ID, mintErr)
|
||||
} else {
|
||||
secret = minted
|
||||
secretErr = nil
|
||||
log.Printf("Registry: lazy-healed platform_inbound_secret for %s (#2312 backfill)", payload.ID)
|
||||
}
|
||||
}
|
||||
if secretErr == nil {
|
||||
// provisioned before the shared-mint refactor), mint inline and
|
||||
// include in the response. Without this, legacy workspaces would
|
||||
// need two round-trips before chat upload works — chat_files
|
||||
// lazy-heals platform-side on first attempt, then the workspace
|
||||
// must heartbeat to receive the freshly-minted secret.
|
||||
// Heal-on-register collapses that to one round-trip.
|
||||
if secret, _, healErr := readOrLazyHealInboundSecret(ctx, payload.ID, "Registry"); healErr == nil {
|
||||
response["platform_inbound_secret"] = secret
|
||||
} else if !errors.Is(secretErr, wsauth.ErrNoInboundSecret) {
|
||||
// Non-NoInboundSecret read errors (DB hiccup, etc.) — log loud.
|
||||
log.Printf("Registry: read platform_inbound_secret for %s failed: %v", payload.ID, secretErr)
|
||||
}
|
||||
// Errors are non-fatal here — the workspace is online and can serve
|
||||
// non-/internal traffic. The lazy-heal helper has already logged
|
||||
// whichever sub-step failed (read or mint). If the secret never lands,
|
||||
// chat upload surfaces the issue loudly with the RFC-#2312 hint.
|
||||
|
||||
c.JSON(http.StatusOK, response)
|
||||
}
|
||||
|
||||
@ -36,14 +36,49 @@ package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"log"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/wsauth"
|
||||
)
|
||||
|
||||
// readOrLazyHealInboundSecret reads the workspace's
|
||||
// platform_inbound_secret. On ErrNoInboundSecret it mints inline
|
||||
// (lazy-heal — RFC #2312 backfill for workspaces provisioned before
|
||||
// the shared-mint refactor closed the gap).
|
||||
//
|
||||
// Returns:
|
||||
// - (secret, false, nil) — secret was already present
|
||||
// - (secret, true, nil) — secret was missing; just minted
|
||||
// - ("", false, err) — read failed (non-NoInboundSecret) OR mint failed
|
||||
//
|
||||
// opLabel prefixes log lines so operators can tell which feature
|
||||
// triggered the heal (e.g. "Registry", "chat_files Upload"). Centralized
|
||||
// here so the next "what to do when the secret is missing" decision —
|
||||
// rotation, audit, alerting — goes in ONE place. Same drift-prevention
|
||||
// rationale as resolveWorkspaceForwardCreds and mintWorkspaceSecrets.
|
||||
func readOrLazyHealInboundSecret(ctx context.Context, workspaceID, opLabel string) (secret string, healed bool, err error) {
|
||||
s, readErr := wsauth.ReadPlatformInboundSecret(ctx, db.DB, workspaceID)
|
||||
if readErr == nil {
|
||||
return s, false, nil
|
||||
}
|
||||
if !errors.Is(readErr, wsauth.ErrNoInboundSecret) {
|
||||
log.Printf("%s: read platform_inbound_secret failed for %s: %v", opLabel, workspaceID, readErr)
|
||||
return "", false, readErr
|
||||
}
|
||||
minted, mintErr := wsauth.IssuePlatformInboundSecret(ctx, db.DB, workspaceID)
|
||||
if mintErr != nil {
|
||||
log.Printf("%s: lazy-heal mint of platform_inbound_secret failed for %s: %v", opLabel, workspaceID, mintErr)
|
||||
return "", false, mintErr
|
||||
}
|
||||
log.Printf("%s: lazy-healed platform_inbound_secret for %s (#2312 backfill)", opLabel, workspaceID)
|
||||
return minted, true, nil
|
||||
}
|
||||
|
||||
// preparedProvisionContext carries the computed env + cfg through
|
||||
// the per-mode provisioner-Start step. Returned from
|
||||
// prepareProvisionContext when the caller proceeds; nil + non-empty
|
||||
|
||||
Loading…
Reference in New Issue
Block a user