From 586d567a48167354972800f0c47aa5fd1fd3164a Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Sat, 2 May 2026 23:01:59 -0700 Subject: [PATCH] fix(workspace-server): log silent yaml.Unmarshal + coexistence test (#256, #257) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two follow-ups from PR #2543's multi-model code review (audit #253). 1. **Log silent yaml.Unmarshal errors (#256).** When a malformed config.yaml made `yaml.Unmarshal(data, &raw)` fail, the affected template silently disappeared from /templates with no trace — operator could not distinguish "excluded due to parse error" from "never existed." That widened a real foot-gun once PR #2543 added structured top-level `providers:` (a string-shaped top-level `providers:` decoded into `[]providerRegistryEntry` would fail and drop the whole entry). Now logs `templates list: skip : yaml.Unmarshal: ` and continues with the rest. 2. **Coexistence test (#257 part 1).** PR #2543 covered the structured registry and slug list in isolation. claude-code-default in production ships BOTH: top-level `providers:` (structured registry, 2 entries) AND `runtime_config.providers:` (slug list, 3 entries). New `TestTemplatesList_BothProviderShapesCoexist` mirrors that layout, asserts both shapes surface independently with no cross-talk (e.g. a slug-only entry like `anthropic-api` does NOT synthesize a stub in the structured registry), and pins the JSON wire-shape for both fields side-by-side. 3. **`base_url: null` decoding assertion (#257 part 3).** Adds an explicit `got[0].BaseURL == ""` check in the existing `TestTemplatesList_SurfacesProviderRegistry` test, locking in the `string` (not `*string`) type. A future change to `*string` would surface as JSON `null` and break canvas's "no base_url = use provider defaults" branch — caught loudly by this assertion. Tests: 11 TestTemplatesList_* now green, including the new MalformedYAMLLogsAndSkips and BothProviderShapesCoexist. The remaining piece of #257 — renaming `Providers []string` JSON tag to `provider_slugs` — requires coordinated canvas updates across 4 files and is intentionally deferred to a separate PR (no canvas churn while user is mid-test). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../internal/handlers/templates.go | 8 + .../internal/handlers/templates_test.go | 199 ++++++++++++++++++ 2 files changed, 207 insertions(+) diff --git a/workspace-server/internal/handlers/templates.go b/workspace-server/internal/handlers/templates.go index d50bb1fe..d99d1219 100644 --- a/workspace-server/internal/handlers/templates.go +++ b/workspace-server/internal/handlers/templates.go @@ -158,6 +158,14 @@ func (h *TemplatesHandler) List(c *gin.Context) { } `yaml:"runtime_config"` } if err := yaml.Unmarshal(data, &raw); err != nil { + // Without this log a malformed config.yaml causes the + // template to silently disappear from /templates with no + // trace — the operator can't tell "excluded due to parse + // error" from "never existed." That matters more now that + // templates ship richer YAML shapes (top-level providers + // registry, models[] with required_env, etc.) where a + // type-shape mismatch on one field drops the whole entry. + log.Printf("templates list: skip %s: yaml.Unmarshal: %v", id, err) return } diff --git a/workspace-server/internal/handlers/templates_test.go b/workspace-server/internal/handlers/templates_test.go index 20505214..5088f579 100644 --- a/workspace-server/internal/handlers/templates_test.go +++ b/workspace-server/internal/handlers/templates_test.go @@ -1,8 +1,10 @@ package handlers import ( + "bytes" "database/sql" "encoding/json" + "log" "net/http" "net/http/httptest" "os" @@ -350,6 +352,15 @@ skills: [] if !reflect.DeepEqual(got[0].AuthEnv, []string{"CLAUDE_CODE_OAUTH_TOKEN"}) { t.Errorf("ProviderRegistry[0].AuthEnv: want [CLAUDE_CODE_OAUTH_TOKEN], got %v", got[0].AuthEnv) } + // `base_url: null` in YAML → empty string for a plain `string` field + // (yaml.v3 default). Pinning this so a future change to `*string` + // (which would decode to nil instead and surface differently in JSON) + // is caught loudly. The canvas treats "" the same as "no base_url" + // (uses provider defaults); a `*string` change would emit a JSON + // `null` and break that branch. + if got[0].BaseURL != "" { + t.Errorf("ProviderRegistry[0].BaseURL: want empty string for `null` YAML, got %q", got[0].BaseURL) + } // Field plumbing on the second (third-party) entry — base_url is the // distinguishing signal for compat providers; canvas uses it to render // the "via Anthropic-compat endpoint" badge. @@ -377,6 +388,110 @@ skills: [] // `providers:` block (hermes today, langgraph, etc.) must NOT emit // `provider_registry: null`, which would break canvas's array-typed // parser (Array.isArray check returns false for null). +// TestTemplatesList_BothProviderShapesCoexist pins the real production +// shape: claude-code-default ships BOTH a top-level `providers:` block +// (structured registry) AND a `runtime_config.providers:` slug list +// (canvas Config tab dropdown). Both must surface independently — +// `provider_registry` on one field, `providers` on the other — with no +// cross-talk or struct-tag collision. +// +// PR #2543 introduced the structured field; reviewer noted the two +// fields' coexistence was only tested in isolation. This locks it in +// against the production layout so a future struct refactor that +// accidentally aliases the two YAML keys (or, e.g., moves the registry +// under `runtime_config:`) would fail loudly. +func TestTemplatesList_BothProviderShapesCoexist(t *testing.T) { + setupTestDB(t) + setupTestRedis(t) + + tmpDir := t.TempDir() + tmplDir := filepath.Join(tmpDir, "claude-code-default") + if err := os.MkdirAll(tmplDir, 0755); err != nil { + t.Fatalf("mkdir: %v", err) + } + // Mirrors workspace-configs-templates/claude-code-default/config.yaml: + // top-level structured `providers:` (auth_mode + auth_env) + nested + // `runtime_config.providers:` slug list. + configYaml := `name: Claude Code +runtime: claude-code +providers: + - name: anthropic-oauth + auth_mode: oauth + auth_env: [CLAUDE_CODE_OAUTH_TOKEN] + - name: minimax + auth_mode: third_party_anthropic_compat + base_url: https://api.minimax.io/anthropic + auth_env: [MINIMAX_API_KEY] +runtime_config: + model: claude-sonnet-4-6 + providers: + - anthropic-oauth + - anthropic-api + - minimax +skills: [] +` + if err := os.WriteFile(filepath.Join(tmplDir, "config.yaml"), []byte(configYaml), 0644); err != nil { + t.Fatalf("write: %v", err) + } + + handler := NewTemplatesHandler(tmpDir, nil) + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Request = httptest.NewRequest("GET", "/templates", nil) + handler.List(c) + + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d", w.Code) + } + var resp []templateSummary + if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil { + t.Fatalf("parse: %v", err) + } + if len(resp) != 1 { + t.Fatalf("expected 1 template, got %d", len(resp)) + } + got := resp[0] + + // Slug list (runtime_config.providers) — independent of structured + // registry. Order preserved. + wantSlugs := []string{"anthropic-oauth", "anthropic-api", "minimax"} + if !reflect.DeepEqual(got.Providers, wantSlugs) { + t.Errorf("Providers (slug list): want %v, got %v", wantSlugs, got.Providers) + } + + // Structured registry (top-level providers) — fully populated, also + // in declaration order. Crucially, the slug list above does NOT + // bleed into here even though one slug (`anthropic-api`) is NOT in + // the structured registry — they really are two distinct YAML paths. + if len(got.ProviderRegistry) != 2 { + t.Fatalf("ProviderRegistry: want 2 entries (top-level only), got %d: %+v", len(got.ProviderRegistry), got.ProviderRegistry) + } + if got.ProviderRegistry[0].Name != "anthropic-oauth" || got.ProviderRegistry[0].AuthMode != "oauth" { + t.Errorf("ProviderRegistry[0]: want anthropic-oauth/oauth, got %+v", got.ProviderRegistry[0]) + } + if got.ProviderRegistry[1].Name != "minimax" || got.ProviderRegistry[1].BaseURL != "https://api.minimax.io/anthropic" { + t.Errorf("ProviderRegistry[1]: want minimax with base_url, got %+v", got.ProviderRegistry[1]) + } + + // Cross-shape negative: `anthropic-api` appears in slugs but not in + // the structured registry — make sure our parsing didn't synthesize + // a stub entry for it. + for _, e := range got.ProviderRegistry { + if e.Name == "anthropic-api" { + t.Errorf("ProviderRegistry must not synthesize entries from the slug list — found stray %q", e.Name) + } + } + + // JSON wire shape: both fields present in the same response. + body := w.Body.String() + if !strings.Contains(body, `"providers":["anthropic-oauth","anthropic-api","minimax"]`) { + t.Errorf("response missing slug-list providers field: %s", body) + } + if !strings.Contains(body, `"provider_registry":[{"name":"anthropic-oauth"`) { + t.Errorf("response missing structured provider_registry field: %s", body) + } +} + func TestTemplatesList_OmitsProviderRegistryWhenAbsent(t *testing.T) { setupTestDB(t) setupTestRedis(t) @@ -495,6 +610,90 @@ skills: [] } } +// TestTemplatesList_MalformedYAMLLogsAndSkips pins the diagnostic-on-skip +// behavior. Before, a malformed config.yaml made the affected template +// vanish from /templates with NO trace — operator can't tell it was +// excluded vs never existed. Now the handler logs `templates list: +// skip : yaml.Unmarshal: ` and continues with the rest. +// +// Asserts: +// - bad template is skipped (not present in response) +// - good sibling template still surfaces (one bad apple shouldn't +// poison the whole list) +// - log line names the offending template id (operator can grep) +func TestTemplatesList_MalformedYAMLLogsAndSkips(t *testing.T) { + setupTestDB(t) + setupTestRedis(t) + + tmpDir := t.TempDir() + + // Bad: YAML scalar where a struct is expected. tier expects int; + // supplying a list crashes yaml.Unmarshal cleanly. + badDir := filepath.Join(tmpDir, "bad-template") + if err := os.MkdirAll(badDir, 0755); err != nil { + t.Fatalf("mkdir bad: %v", err) + } + badYaml := `name: Broken +tier: [not, an, int] +runtime: claude-code +` + if err := os.WriteFile(filepath.Join(badDir, "config.yaml"), []byte(badYaml), 0644); err != nil { + t.Fatalf("write bad: %v", err) + } + + // Good sibling — must survive the bad neighbor. + goodDir := filepath.Join(tmpDir, "good-template") + if err := os.MkdirAll(goodDir, 0755); err != nil { + t.Fatalf("mkdir good: %v", err) + } + goodYaml := `name: Good +tier: 1 +runtime: hermes +skills: [] +` + if err := os.WriteFile(filepath.Join(goodDir, "config.yaml"), []byte(goodYaml), 0644); err != nil { + t.Fatalf("write good: %v", err) + } + + // Capture log output so we can assert on the skip line. + var logBuf bytes.Buffer + prevOutput := log.Writer() + log.SetOutput(&logBuf) + defer log.SetOutput(prevOutput) + + handler := NewTemplatesHandler(tmpDir, nil) + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Request = httptest.NewRequest("GET", "/templates", nil) + handler.List(c) + + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d", w.Code) + } + var resp []templateSummary + if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil { + t.Fatalf("parse: %v", err) + } + // Bad template MUST NOT appear; good template MUST appear. + if len(resp) != 1 { + t.Fatalf("expected 1 template (good only, bad skipped), got %d: %+v", len(resp), resp) + } + if resp[0].ID != "good-template" { + t.Errorf("surviving template should be good-template, got %q", resp[0].ID) + } + + // Log line MUST contain the bad template id and the parse error + // signal — without these, an operator looking at logs can't + // correlate "missing from /templates" with "yaml.Unmarshal failed". + logged := logBuf.String() + if !strings.Contains(logged, "bad-template") { + t.Errorf("expected log line to name bad-template, got: %s", logged) + } + if !strings.Contains(logged, "yaml.Unmarshal") { + t.Errorf("expected log line to mention yaml.Unmarshal, got: %s", logged) + } +} + func TestTemplatesList_NonexistentDir(t *testing.T) { setupTestDB(t) setupTestRedis(t)