fix(platform-agent): pin LLM_PROVIDER=platform when concierge MODEL is empty #3160
@@ -502,10 +502,17 @@ func (h *WorkspaceHandler) ensureConciergeProvider(ctx context.Context, workspac
|
||||
// just above (the fresh-boot seed) — so reading it here is sufficient and
|
||||
// avoids a redundant secret decrypt.
|
||||
model := strings.TrimSpace(envVars["MODEL"])
|
||||
// Only pin when the model is in the platform-managed namespace that needs it.
|
||||
// A non-platform model (e.g. `sonnet`, a BYOK `claude-…`) resolves on its
|
||||
// own; forcing `platform` there would mis-route auth and break the agent.
|
||||
if !strings.HasPrefix(strings.ToLower(model), platformManagedModelPrefix) {
|
||||
// Pin platform when the model is platform-managed OR unresolved (empty). An
|
||||
// empty MODEL here is NOT a BYOK/self-host signal — those carry a stored
|
||||
// LLM_PROVIDER (handled by the early-return above) or an explicit non-platform
|
||||
// MODEL (skipped just below). Empty means an unresolved fresh/rebuilt-from-DB
|
||||
// payload, which defaults to the platform-managed family; skipping the pin
|
||||
// there (the old `HasPrefix("", …)`==false path) left the concierge without
|
||||
// LLM_PROVIDER, so the runtime could not drop the inherited tenant
|
||||
// CLAUDE_CODE_OAUTH_TOKEN and the agent 401'd against the CP LLM proxy. Only a
|
||||
// NON-empty non-platform model (an explicit BYOK pick) resolves on its own;
|
||||
// forcing `platform` there would mis-route auth and break the agent.
|
||||
if model != "" && !strings.HasPrefix(strings.ToLower(model), platformManagedModelPrefix) {
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -717,6 +717,61 @@ func TestApplyConciergeProvisionConfig_SeedsModel(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
// TestEnsureConciergeProvider_EmptyModelPins is the direct-unit regression gate
|
||||
// for the fix/concierge-provider-empty-model fix: the pin gate changed from
|
||||
// `if !strings.HasPrefix(strings.ToLower(model), platformManagedModelPrefix)`
|
||||
// to `if model != "" && !strings.HasPrefix(...)`. The old form computed
|
||||
// HasPrefix("", "moonshot/") == false for an EMPTY model and so returned early
|
||||
// WITHOUT pinning — leaving a fresh/rebuilt-from-DB concierge payload (whose
|
||||
// MODEL env was not yet populated) with no LLM_PROVIDER, which 401'd against the
|
||||
// CP LLM proxy. The fix treats an unresolved (empty) model as the platform-
|
||||
// managed default and pins. This calls ensureConciergeProvider DIRECTLY (not via
|
||||
// applyConciergeProvisionConfig) to isolate the gate.
|
||||
func TestEnsureConciergeProvider_EmptyModelPins(t *testing.T) {
|
||||
h := &WorkspaceHandler{}
|
||||
const providerSelQuery = `SELECT encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1 AND key = 'LLM_PROVIDER'`
|
||||
const secretInsert = `INSERT INTO workspace_secrets`
|
||||
|
||||
t.Run("empty MODEL (rebuilt-from-DB payload) still pins platform", func(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
// No LLM_PROVIDER stored yet → existence SELECT empty → proceed to the gate.
|
||||
mock.ExpectQuery(providerSelQuery).WithArgs("ws-empty-model").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"encrypted_value", "encryption_version"}))
|
||||
// Empty model is the platform-managed default → the pin MUST persist.
|
||||
mock.ExpectExec(secretInsert).
|
||||
WithArgs("ws-empty-model", sqlmock.AnyArg(), sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
env := map[string]string{} // no MODEL key — unresolved fresh/rebuilt payload
|
||||
h.ensureConciergeProvider(context.Background(), "ws-empty-model", env)
|
||||
|
||||
if env["LLM_PROVIDER"] != conciergeProvider {
|
||||
t.Errorf("empty MODEL did not pin LLM_PROVIDER=%q; got %q (env=%v) — concierge would 401 against the CP LLM proxy", conciergeProvider, env["LLM_PROVIDER"], env)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations (LLM_PROVIDER pin not persisted): %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("explicit BYOK non-platform model still skips the pin", func(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
// No LLM_PROVIDER stored yet → existence SELECT empty → proceed to the gate.
|
||||
mock.ExpectQuery(providerSelQuery).WithArgs("ws-byok-model").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"encrypted_value", "encryption_version"}))
|
||||
// NO ExpectExec: a non-empty BYOK model resolves on its own → no pin.
|
||||
|
||||
env := map[string]string{"MODEL": "anthropic:claude-opus-4-8"}
|
||||
h.ensureConciergeProvider(context.Background(), "ws-byok-model", env)
|
||||
|
||||
if _, ok := env["LLM_PROVIDER"]; ok {
|
||||
t.Errorf("explicit BYOK model wrongly pinned LLM_PROVIDER=%q — would mis-route a BYOK/self-host concierge", env["LLM_PROVIDER"])
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations (an unexpected INSERT means it pinned a BYOK model): %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestApplyConciergeProvisionConfig_SeedsProvider is the CI regression gate for
|
||||
// the concierge non-response incident (prod 2026-06-18): the concierge booted
|
||||
// online but configuration_status=not_configured because the runtime wheel
|
||||
|
||||
Reference in New Issue
Block a user