Merge pull request #2375 from Molecule-AI/auto/registry-lazy-heal-inbound-secret

fix(registry): lazy-heal platform_inbound_secret on register for legacy workspaces
This commit is contained in:
Hongming Wang 2026-04-30 10:47:49 +00:00 committed by GitHub
commit eef1969e30
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 102 additions and 8 deletions

View File

@ -449,12 +449,29 @@ func (h *RegistryHandler) Register(c *gin.Context) {
// outbound auth_token's "issue once" lifecycle. Returning it here is
// the only delivery path for SaaS, where the platform's CP provisioner
// has no volume to write into.
if secret, secretErr := wsauth.ReadPlatformInboundSecret(ctx, db.DB, payload.ID); secretErr == nil {
//
// Lazy-heal (2026-04-30): if the column is NULL (legacy workspace
// provisioned before the shared-mint refactor), mint it inline and
// include in the response. Without this, legacy workspaces would need
// two round-trips before chat upload works — chat_files lazy-heals
// platform-side on first attempt, then the workspace must heartbeat
// to receive the freshly-minted secret. Heal-on-register collapses
// that to one round-trip.
secret, secretErr := wsauth.ReadPlatformInboundSecret(ctx, db.DB, payload.ID)
if secretErr != nil && errors.Is(secretErr, wsauth.ErrNoInboundSecret) {
minted, mintErr := wsauth.IssuePlatformInboundSecret(ctx, db.DB, payload.ID)
if mintErr != nil {
log.Printf("Registry: lazy-heal mint of platform_inbound_secret failed for %s: %v — workspace will 503 on chat upload until next register", payload.ID, mintErr)
} else {
secret = minted
secretErr = nil
log.Printf("Registry: lazy-healed platform_inbound_secret for %s (#2312 backfill)", payload.ID)
}
}
if secretErr == nil {
response["platform_inbound_secret"] = secret
} else if !errors.Is(secretErr, wsauth.ErrNoInboundSecret) {
// ErrNoInboundSecret is the expected case for legacy workspaces
// that predate migration 044 — quiet. Other errors (DB hiccup, etc.)
// log loud so ops notices.
// Non-NoInboundSecret read errors (DB hiccup, etc.) — log loud.
log.Printf("Registry: read platform_inbound_secret for %s failed: %v", payload.ID, secretErr)
}

View File

@ -991,7 +991,19 @@ func TestRegister_ReturnsPlatformInboundSecret_RFC2312_PRF(t *testing.T) {
// that predate migration 044 (NULL platform_inbound_secret column) still
// get a successful registration — the field is just omitted from the
// response. The Register handler logs the absence quietly.
func TestRegister_NoInboundSecret_OmitsField(t *testing.T) {
// TestRegister_NoInboundSecret_LazyHeals — legacy workspace path:
// when ReadPlatformInboundSecret returns ErrNoInboundSecret (NULL
// column), Register MUST mint inline and include the freshly-minted
// secret in the response. Without this, legacy workspaces would need
// two round-trips before chat upload works (chat_files heals
// platform-side → workspace must heartbeat → next chat upload).
//
// Pre-fix this test asserted the field was ABSENT; that was correct
// for the missing behavior, but happened to pass even with my
// register-time lazy-heal change because sqlmock unmatched UPDATE
// caused the mint to fail and fall back to omit-field. Splitting
// into success + failure tests pins both branches.
func TestRegister_NoInboundSecret_LazyHeals(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
@ -1002,7 +1014,6 @@ func TestRegister_NoInboundSecret_OmitsField(t *testing.T) {
mock.ExpectQuery("SELECT COUNT\\(\\*\\) FROM workspace_auth_tokens").
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
// resolveDeliveryMode — no row yet, default push (#2339).
mock.ExpectQuery("SELECT delivery_mode FROM workspaces WHERE id").
WithArgs(wsID).
WillReturnError(sql.ErrNoRows)
@ -1019,6 +1030,12 @@ func TestRegister_NoInboundSecret_OmitsField(t *testing.T) {
mock.ExpectQuery(`SELECT platform_inbound_secret FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"platform_inbound_secret"}).AddRow(nil))
// Lazy-heal mint MUST land. If this expectation isn't matched, the
// register handler skipped backfill and legacy workspaces would
// need 2 round-trips before chat upload works.
mock.ExpectExec(`UPDATE workspaces SET platform_inbound_secret = \$1 WHERE id = \$2`).
WithArgs(sqlmock.AnyArg(), wsID).
WillReturnResult(sqlmock.NewResult(0, 1))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@ -1029,12 +1046,72 @@ func TestRegister_NoInboundSecret_OmitsField(t *testing.T) {
handler.Register(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200 even without inbound secret, got %d", w.Code)
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
_ = json.Unmarshal(w.Body.Bytes(), &resp)
secret, present := resp["platform_inbound_secret"]
if !present {
t.Errorf("expected platform_inbound_secret to be PRESENT (lazy-healed), got response: %v", resp)
}
if s, ok := secret.(string); !ok || s == "" {
t.Errorf("expected non-empty platform_inbound_secret string, got %T %v", secret, secret)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations not met — register-time lazy-heal mint did NOT run, regression of #2312 backfill: %v", err)
}
}
// TestRegister_NoInboundSecret_LazyHealMintFailureOmitsField pins the
// alternate branch: if the lazy-heal mint itself fails (DB hiccup),
// Register MUST still respond 200 (workspace is online) but omit the
// field. The next register call will retry the heal.
func TestRegister_NoInboundSecret_LazyHealMintFailureOmitsField(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewRegistryHandler(broadcaster)
const wsID = "00000000-0000-0000-0000-000000002313"
mock.ExpectQuery("SELECT COUNT\\(\\*\\) FROM workspace_auth_tokens").
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
mock.ExpectQuery("SELECT delivery_mode FROM workspaces WHERE id").
WithArgs(wsID).
WillReturnError(sql.ErrNoRows)
mock.ExpectExec("INSERT INTO workspaces").WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectQuery("SELECT url FROM workspaces WHERE id").
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"url"}).AddRow("http://localhost:9100"))
mock.ExpectExec("INSERT INTO structure_events").WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectQuery("SELECT COUNT\\(\\*\\) FROM workspace_auth_tokens").
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
mock.ExpectExec("INSERT INTO workspace_auth_tokens").WillReturnResult(sqlmock.NewResult(1, 1))
mock.ExpectQuery(`SELECT platform_inbound_secret FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"platform_inbound_secret"}).AddRow(nil))
// Mint fails — handler must NOT 500; just omit field + log.
mock.ExpectExec(`UPDATE workspaces SET platform_inbound_secret = \$1 WHERE id = \$2`).
WithArgs(sqlmock.AnyArg(), wsID).
WillReturnError(sql.ErrConnDone)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest("POST", "/registry/register",
bytes.NewBufferString(`{"id":"`+wsID+`","url":"http://localhost:9100","agent_card":{"name":"x"}}`))
c.Request.Header.Set("Content-Type", "application/json")
handler.Register(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200 even when lazy-heal fails (workspace is online), got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
_ = json.Unmarshal(w.Body.Bytes(), &resp)
if _, present := resp["platform_inbound_secret"]; present {
t.Errorf("expected platform_inbound_secret to be ABSENT for legacy workspace, got: %v", resp["platform_inbound_secret"])
t.Errorf("expected platform_inbound_secret to be ABSENT when mint failed, got: %v", resp["platform_inbound_secret"])
}
}