Merge pull request 'feat(cp#469): tenant proxy env delivery (workspace-server companion to BELT #477)' (#2167) from cp/469-tenant-proxy-env-delivery into staging
Block internal-flavored paths / Block forbidden paths (push) Successful in 3s
Harness Replays / detect-changes (push) Successful in 3s
E2E API Smoke Test / detect-changes (push) Successful in 6s
CI / Shellcheck (E2E scripts) (push) Successful in 9s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 3s
Harness Replays / Harness Replays (push) Successful in 1s
Runtime PR-Built Compatibility / detect-changes (push) Successful in 5s
CI / Detect changes (push) Successful in 15s
Handlers Postgres Integration / detect-changes (push) Successful in 12s
E2E Chat / detect-changes (push) Successful in 12s
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 50s
Runtime PR-Built Compatibility / PR-built wheel + import smoke (push) Successful in 2m28s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 2m56s
E2E Chat / E2E Chat (push) Failing after 4m18s
CI / Platform (Go) (push) Successful in 5m19s
CI / Canvas (Next.js) (push) Successful in 6m20s
CI / Python Lint & Test (push) Successful in 6m24s
CI / Canvas Deploy Reminder (push) Successful in 2s
CI / all-required (push) Successful in 6m27s

This commit was merged in pull request #2167.
This commit is contained in:
2026-06-03 06:43:08 +00:00
4 changed files with 193 additions and 9 deletions
+50
View File
@@ -105,3 +105,53 @@ func refreshEnvFromCP() error {
log.Printf("CP env refresh: applied %d values from %s/cp/tenants/config", applied, base)
return nil
}
// requiredLLMEnvVars is the set of LLM proxy env vars a managed SaaS
// tenant must have populated after refreshEnvFromCP. cp#469 (tenant
// proxy-env delivery) — guaranteed CP-delivered creds reach the
// tenant process env on boot. Per Researcher Task #37 / Spec 2 and
// Task #46 (watch-fail-first test).
//
// Key set byte-matched against Researcher's verified emission in
// controlplane tenant_config.go:140-144 (Researcher REQUEST_CHANGES
// iterate body, 3987f59c). The four keys below ARE the LLM-proxy
// subset of the 8 CP-emitted keys; OPENAI_BASE_URL / OPENAI_API_KEY /
// ANTHROPIC_BASE_URL / ANTHROPIC_API_KEY are out of scope for cp#469
// (different feature surfaces — direct-to-provider fallbacks, not
// the proxy). v2 fix: MOLECULE_LLM_USAGE_TOKEN, MOLECULE_LLM_USAGE_URL,
// MOLECULE_LLM_BASE_URL, MOLECULE_LLM_ANTHROPIC_BASE_URL — note the
// 4th key is namespaced MOLECULE_LLM_ANTHROPIC_BASE_URL, NOT bare
// ANTHROPIC_BASE_URL. Bare ANTHROPIC_BASE_URL is a separate CP-emitted
// key for direct-provider use, not the LLM proxy.
var requiredLLMEnvVars = []string{
"MOLECULE_LLM_USAGE_TOKEN",
"MOLECULE_LLM_USAGE_URL", // CRITICAL fix v2: was MOLECULE_LLM_URL in v1
"MOLECULE_LLM_BASE_URL",
"MOLECULE_LLM_ANTHROPIC_BASE_URL", // CRITICAL fix v3: was ANTHROPIC_BASE_URL in v2 (different key!)
}
// assertManagedTenantHasLLMEnv verifies that, when running as a
// managed SaaS tenant (MOLECULE_ORG_ID + ADMIN_TOKEN both set), all
// required LLM proxy env vars are populated after refreshEnvFromCP.
//
// Self-hosted (no orgID/adminToken) is exempt — dev must not be
// blocked here. Managed tenants with missing LLM keys fail with
// MISSING_CP_LLM_ENV so they do not silently boot with broken proxy
// creds. Caller in main.go decides whether to log and continue or
// log.Fatalf depending on deployment context.
func assertManagedTenantHasLLMEnv() error {
if os.Getenv("MOLECULE_ORG_ID") == "" || os.Getenv("ADMIN_TOKEN") == "" {
// Self-hosted dev / not yet provisioned — not a managed tenant.
return nil
}
var missing []string
for _, k := range requiredLLMEnvVars {
if os.Getenv(k) == "" {
missing = append(missing, k)
}
}
if len(missing) > 0 {
return fmt.Errorf("MISSING_CP_LLM_ENV: required LLM proxy keys not set after refreshEnvFromCP: %v", missing)
}
return nil
}
@@ -5,6 +5,7 @@ import (
"net/http"
"net/http/httptest"
"os"
"strings"
"testing"
)
@@ -47,6 +48,138 @@ func TestRefreshEnvFromCP_AppliesCPResponse(t *testing.T) {
}
}
// TestRefreshEnvFromCP_ManagedTenantRequiresLLMKeys: watch-fail-first
// per Researcher Task #46. When running as a managed tenant
// (MOLECULE_ORG_ID + ADMIN_TOKEN set), missing LLM proxy env vars
// after refreshEnvFromCP MUST surface as MISSING_CP_LLM_ENV, not be
// silently accepted. Without this guard, a CP that loses its LLM
// creds (e.g. during an incident) would let a tenant boot and then
// fail later at first LLM call — worse than a loud refusal here.
func TestRefreshEnvFromCP_ManagedTenantRequiresLLMKeys(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Stub CP returns a CP response WITHOUT any of the required
// LLM keys — simulates the failure mode where the CP side
// dropped or never had the LLM creds for this org.
w.Header().Set("Content-Type", "application/json")
fmt.Fprint(w, `{"MOLECULE_CP_SHARED_SECRET":"x","MOLECULE_CP_URL":"https://api.moleculesai.app"}`)
}))
defer srv.Close()
t.Setenv("MOLECULE_ORG_ID", "org-managed-1")
t.Setenv("ADMIN_TOKEN", "admin-tok")
t.Setenv("MOLECULE_CP_URL", srv.URL)
// Clear all LLM keys to simulate the boot-without-LLM-env failure mode.
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "")
t.Setenv("MOLECULE_LLM_USAGE_URL", "")
t.Setenv("MOLECULE_LLM_BASE_URL", "")
t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "")
// refreshEnvFromCP itself should succeed — CP is reachable, returned 200.
if err := refreshEnvFromCP(); err != nil {
t.Fatalf("refreshEnvFromCP: %v", err)
}
// The boot assertion must catch the missing LLM keys.
err := assertManagedTenantHasLLMEnv()
if err == nil {
t.Fatal("expected MISSING_CP_LLM_ENV error for managed tenant without LLM keys, got nil")
}
if !strings.Contains(err.Error(), "MISSING_CP_LLM_ENV") {
t.Errorf("expected error to contain MISSING_CP_LLM_ENV, got: %v", err)
}
}
// TestRefreshEnvFromCP_ManagedTenantHappyPath: when the CP returns
// all 4 LLM-proxy keys, the gate must PASS — no MISSING_CP_LLM_ENV
// for a properly-configured managed tenant. Watch-fail counterpart
// to TestRefreshEnvFromCP_ManagedTenantRequiresLLMKeys: if THIS test
// ever fires MISSING_CP_LLM_ENV on the byte-correct key set, the
// requiredLLMEnvVars list has drifted from the CP emission again.
// Per Researcher REQUEST_CHANGES TEST ADEQUACY note.
func TestRefreshEnvFromCP_ManagedTenantHappyPath(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
// Return ALL 4 LLM-proxy keys — names byte-matched to
// tenant_config.go:140-144 CP emission.
fmt.Fprint(w, `{"MOLECULE_LLM_USAGE_TOKEN":"tok-1","MOLECULE_LLM_USAGE_URL":"https://llm.example.com/usage","MOLECULE_LLM_BASE_URL":"https://llm.example.com","MOLECULE_LLM_ANTHROPIC_BASE_URL":"https://llm.example.com/anthropic"}`)
}))
defer srv.Close()
t.Setenv("MOLECULE_ORG_ID", "org-managed-happy")
t.Setenv("ADMIN_TOKEN", "admin-tok")
t.Setenv("MOLECULE_CP_URL", srv.URL)
// Pre-clear so we can verify the refresh actually populated them.
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "")
t.Setenv("MOLECULE_LLM_USAGE_URL", "")
t.Setenv("MOLECULE_LLM_BASE_URL", "")
t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "")
if err := refreshEnvFromCP(); err != nil {
t.Fatalf("refreshEnvFromCP: %v", err)
}
// Sanity: refresh actually applied the keys.
if got := os.Getenv("MOLECULE_LLM_USAGE_TOKEN"); got != "tok-1" {
t.Errorf("refresh did not apply USAGE_TOKEN: got %q", got)
}
// The boot assertion must pass — no MISSING_CP_LLM_ENV.
if err := assertManagedTenantHasLLMEnv(); err != nil {
t.Errorf("managed happy path must not MISSING_CP_LLM_ENV, got: %v", err)
}
}
// TestRefreshEnvFromCP_ManagedTenantPartialEnv: when the CP returns
// 3 of 4 LLM-proxy keys (one missing), the gate must STILL catch it
// and the error must name the missing key. Per Researcher
// REQUEST_CHANGES TEST ADEQUACY note — partial-env coverage is
// critical because the production failure mode is usually "one
// key dropped" not "all keys dropped".
func TestRefreshEnvFromCP_ManagedTenantPartialEnv(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
// 3 of 4 — MOLECULE_LLM_ANTHROPIC_BASE_URL is missing.
fmt.Fprint(w, `{"MOLECULE_LLM_USAGE_TOKEN":"tok-1","MOLECULE_LLM_USAGE_URL":"https://llm.example.com/usage","MOLECULE_LLM_BASE_URL":"https://llm.example.com"}`)
}))
defer srv.Close()
t.Setenv("MOLECULE_ORG_ID", "org-managed-partial")
t.Setenv("ADMIN_TOKEN", "admin-tok")
t.Setenv("MOLECULE_CP_URL", srv.URL)
// Pre-clear all 4 so the 3 that come back from CP are the only
// ones set; the 4th (MOLECULE_LLM_ANTHROPIC_BASE_URL) stays empty.
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "")
t.Setenv("MOLECULE_LLM_USAGE_URL", "")
t.Setenv("MOLECULE_LLM_BASE_URL", "")
t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "")
if err := refreshEnvFromCP(); err != nil {
t.Fatalf("refreshEnvFromCP: %v", err)
}
err := assertManagedTenantHasLLMEnv()
if err == nil {
t.Fatal("expected MISSING_CP_LLM_ENV for partial env (3 of 4 keys), got nil")
}
if !strings.Contains(err.Error(), "MISSING_CP_LLM_ENV") {
t.Errorf("expected error to contain MISSING_CP_LLM_ENV, got: %v", err)
}
if !strings.Contains(err.Error(), "MOLECULE_LLM_ANTHROPIC_BASE_URL") {
t.Errorf("expected error to name the missing key MOLECULE_LLM_ANTHROPIC_BASE_URL, got: %v", err)
}
}
// TestAssertManagedTenantHasLLMEnv_NotManagedIsNoop: self-hosted
// (no orgID/adminToken) must NOT block on missing LLM keys — dev
// ergonomics matter and the assertion's contract is "managed only".
func TestAssertManagedTenantHasLLMEnv_NotManagedIsNoop(t *testing.T) {
t.Setenv("MOLECULE_ORG_ID", "")
t.Setenv("ADMIN_TOKEN", "")
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "")
t.Setenv("MOLECULE_LLM_USAGE_URL", "")
t.Setenv("MOLECULE_LLM_BASE_URL", "")
t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "")
if err := assertManagedTenantHasLLMEnv(); err != nil {
t.Errorf("self-hosted (not managed) must not block, got: %v", err)
}
}
// TestRefreshEnvFromCP_CPUnreachableDoesNotFailBoot: network errors must
// return non-nil BUT main.go treats that as warn-and-continue. We assert
// the function returns an error (not a panic) so the caller can log.
+10
View File
@@ -56,6 +56,16 @@ func main() {
log.Printf("CP env refresh: %v (continuing with baked-in env)", err)
}
// Managed-tenant boot assertion (cp#469 — tenant proxy-env delivery).
// If we're a managed SaaS tenant (orgID + adminToken set), all required
// LLM proxy env vars must be present after refresh. Missing keys block
// the tenant from booting with broken LLM creds — silent-fail is worse
// than a loud refusal. Self-hosted (no orgID/adminToken) short-circuits
// inside the assertion, so this never fires for dev.
if err := assertManagedTenantHasLLMEnv(); err != nil {
log.Fatalf("Managed tenant boot assertion: %v", err)
}
// Secrets encryption. In MOLECULE_ENV=prod, boot refuses to start
// without a valid SECRETS_ENCRYPTION_KEY (fail-secure — Top-5 #5).
// In any other environment, missing keys just log a warning and
@@ -407,15 +407,6 @@ func validateCallerToken(ctx context.Context, c *gin.Context, callerID string) e
// matching (the wsauth errors are typed for the invalid case).
var errInvalidCallerToken = errors.New("missing caller auth token")
// canvasUserMessage holds the extracted user message extracted from an
// A2A canvas request body for broadcasting to other sessions.
type canvasUserMessage struct {
Message string `json:"message,omitempty"`
Parts []map[string]interface{} `json:"parts,omitempty"`
MessageID string `json:"messageId,omitempty"`
Attachments []map[string]interface{} `json:"attachments,omitempty"`
}
// extractCanvasUserMessage parses an A2A JSON-RPC request body and extracts
// the user-authored text and attachments from a canvas-initiated message/send.
// Returns nil when the body is not a canvas user message (empty, malformed,