fix(platform-agent): pin LLM_PROVIDER=platform when concierge MODEL is empty #3160

Merged
devops-engineer merged 1 commits from fix/concierge-provider-empty-model into main 2026-06-22 17:27:57 +00:00
2 changed files with 66 additions and 4 deletions
@@ -502,10 +502,17 @@ func (h *WorkspaceHandler) ensureConciergeProvider(ctx context.Context, workspac
// just above (the fresh-boot seed) — so reading it here is sufficient and
// avoids a redundant secret decrypt.
model := strings.TrimSpace(envVars["MODEL"])
// Only pin when the model is in the platform-managed namespace that needs it.
// A non-platform model (e.g. `sonnet`, a BYOK `claude-…`) resolves on its
// own; forcing `platform` there would mis-route auth and break the agent.
if !strings.HasPrefix(strings.ToLower(model), platformManagedModelPrefix) {
// Pin platform when the model is platform-managed OR unresolved (empty). An
// empty MODEL here is NOT a BYOK/self-host signal — those carry a stored
// LLM_PROVIDER (handled by the early-return above) or an explicit non-platform
// MODEL (skipped just below). Empty means an unresolved fresh/rebuilt-from-DB
// payload, which defaults to the platform-managed family; skipping the pin
// there (the old `HasPrefix("", …)`==false path) left the concierge without
// LLM_PROVIDER, so the runtime could not drop the inherited tenant
// CLAUDE_CODE_OAUTH_TOKEN and the agent 401'd against the CP LLM proxy. Only a
// NON-empty non-platform model (an explicit BYOK pick) resolves on its own;
// forcing `platform` there would mis-route auth and break the agent.
if model != "" && !strings.HasPrefix(strings.ToLower(model), platformManagedModelPrefix) {
return
}
@@ -717,6 +717,61 @@ func TestApplyConciergeProvisionConfig_SeedsModel(t *testing.T) {
})
}
// TestEnsureConciergeProvider_EmptyModelPins is the direct-unit regression gate
// for the fix/concierge-provider-empty-model fix: the pin gate changed from
// `if !strings.HasPrefix(strings.ToLower(model), platformManagedModelPrefix)`
// to `if model != "" && !strings.HasPrefix(...)`. The old form computed
// HasPrefix("", "moonshot/") == false for an EMPTY model and so returned early
// WITHOUT pinning — leaving a fresh/rebuilt-from-DB concierge payload (whose
// MODEL env was not yet populated) with no LLM_PROVIDER, which 401'd against the
// CP LLM proxy. The fix treats an unresolved (empty) model as the platform-
// managed default and pins. This calls ensureConciergeProvider DIRECTLY (not via
// applyConciergeProvisionConfig) to isolate the gate.
func TestEnsureConciergeProvider_EmptyModelPins(t *testing.T) {
h := &WorkspaceHandler{}
const providerSelQuery = `SELECT encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1 AND key = 'LLM_PROVIDER'`
const secretInsert = `INSERT INTO workspace_secrets`
t.Run("empty MODEL (rebuilt-from-DB payload) still pins platform", func(t *testing.T) {
mock := setupTestDB(t)
// No LLM_PROVIDER stored yet → existence SELECT empty → proceed to the gate.
mock.ExpectQuery(providerSelQuery).WithArgs("ws-empty-model").
WillReturnRows(sqlmock.NewRows([]string{"encrypted_value", "encryption_version"}))
// Empty model is the platform-managed default → the pin MUST persist.
mock.ExpectExec(secretInsert).
WithArgs("ws-empty-model", sqlmock.AnyArg(), sqlmock.AnyArg()).
WillReturnResult(sqlmock.NewResult(0, 1))
env := map[string]string{} // no MODEL key — unresolved fresh/rebuilt payload
h.ensureConciergeProvider(context.Background(), "ws-empty-model", env)
if env["LLM_PROVIDER"] != conciergeProvider {
t.Errorf("empty MODEL did not pin LLM_PROVIDER=%q; got %q (env=%v) — concierge would 401 against the CP LLM proxy", conciergeProvider, env["LLM_PROVIDER"], env)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations (LLM_PROVIDER pin not persisted): %v", err)
}
})
t.Run("explicit BYOK non-platform model still skips the pin", func(t *testing.T) {
mock := setupTestDB(t)
// No LLM_PROVIDER stored yet → existence SELECT empty → proceed to the gate.
mock.ExpectQuery(providerSelQuery).WithArgs("ws-byok-model").
WillReturnRows(sqlmock.NewRows([]string{"encrypted_value", "encryption_version"}))
// NO ExpectExec: a non-empty BYOK model resolves on its own → no pin.
env := map[string]string{"MODEL": "anthropic:claude-opus-4-8"}
h.ensureConciergeProvider(context.Background(), "ws-byok-model", env)
if _, ok := env["LLM_PROVIDER"]; ok {
t.Errorf("explicit BYOK model wrongly pinned LLM_PROVIDER=%q — would mis-route a BYOK/self-host concierge", env["LLM_PROVIDER"])
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations (an unexpected INSERT means it pinned a BYOK model): %v", err)
}
})
}
// TestApplyConciergeProvisionConfig_SeedsProvider is the CI regression gate for
// the concierge non-response incident (prod 2026-06-18): the concierge booted
// online but configuration_status=not_configured because the runtime wheel