fix(channels): fall back to empty defaults on unmarshal errors (#1108) #2347
@@ -93,12 +93,12 @@ For "do we have any backend?", use `HasProvisioner()`, never bare `h.provisioner
|
||||
3. **Restart divergence on runtime changes.** Docker re-reads `/configs/config.yaml` from the container before stop, so a changed `runtime:` survives a restart even if the DB isn't synced. EC2 trusts the DB only. If you change the runtime via the Config tab and the handler races the restart, Docker will land on the new runtime, EC2 will land on the old one. **Fix path:** make the Config-tab save explicitly flush to DB before kicking off a restart, not deferred.
|
||||
4. **Console-output asymmetry.** Users debugging a stuck workspace on Docker see `docker logs`; on EC2 they see `GetConsoleOutput`. The two outputs look nothing alike. **Fix path:** expose a unified `GET /workspaces/:id/boot-log` that proxies to whichever backend serves the data. Already partly there via `cp_provisioner.Console`.
|
||||
5. **Template script drift.** `install.sh` and `start.sh` in each template repo do the same high-level work (install hermes-agent, write .env, write config.yaml, start gateway) but must be kept byte-level consistent on the provider-key forwarding block. Easy to forget. Enforced now by `tools/check-template-parity.sh` (see below) — run it in each template repo's CI.
|
||||
6. **Both backends panic when underlying client is nil.** Discovered by the contract-test scaffold landing in this PR: `Provisioner.{Stop,IsRunning}` nil-dereferences the Docker client, and `CPProvisioner.{Stop,IsRunning}` nil-dereferences `httpClient`. The real code always sets these, so this is theoretical in prod — but it means the contract runner can't execute scenarios against zero-value backends. **Fix path:** guard each method with `if p.docker == nil { return false, errNoBackend }` (and equivalent for CP), then flip the `t.Skip` in the contract tests to `t.Run`.
|
||||
6. **Both backends panic when underlying client is nil.** ✅ **Resolved** (`fix/provisioner-nil-guards-1813`). `Provisioner.{Stop,IsRunning}` and `CPProvisioner.{Stop,IsRunning}` now guard against nil clients with `ErrNoBackend`, so the contract-test runner executes scenarios against zero-valued backends without panic.
|
||||
|
||||
## Enforcement
|
||||
|
||||
- **`tools/check-template-parity.sh`** (this repo) — ensures `install.sh` and `start.sh` in a template repo forward identical sets of provider keys. Wire into each template repo's CI as `bash $MONOREPO/tools/check-template-parity.sh install.sh start.sh`.
|
||||
- **Contract tests** (stub) — `workspace-server/internal/provisioner/backend_contract_test.go` defines the behaviors every `provisioner.Provisioner` implementation must satisfy. Fails compile when a method drifts between `Docker` and `CPProvisioner`. Scenario-level runs are `t.Skip`'d today pending drift risk #6 (see above) — compile-time assertions still catch method drift.
|
||||
- **Contract tests** — `workspace-server/internal/provisioner/backend_contract_test.go` defines the behaviors every `provisioner.Provisioner` implementation must satisfy. Fails compile when a method drifts between `Docker` and `CPProvisioner`. Scenario-level runs execute against zero-valued backends since drift risk #6 was resolved (`fix/provisioner-nil-guards-1813`).
|
||||
- **Source-level dispatcher pins** — `workspace_provision_auto_test.go` enforces the SoT pattern documented above:
|
||||
- `TestNoCallSiteCallsDirectProvisionerExceptAuto` — no handler calls `.provisionWorkspace(` or `.provisionWorkspaceCP(` directly outside the dispatcher's allowlist.
|
||||
- `TestNoCallSiteCallsBareStop` — no handler calls `.provisioner.Stop(` or `.cpProv.Stop(` directly outside the dispatcher's allowlist (strips Go comments before substring match so archaeology in code comments doesn't trip the gate).
|
||||
|
||||
@@ -671,6 +671,12 @@ for wid in "${WS_TO_CHECK[@]}"; do
|
||||
else
|
||||
DIAG_FAIL=$(echo "$DIAG_JSON" | python3 -c "import json,sys; d=json.load(sys.stdin); print(d.get('first_failure','unknown'))" 2>/dev/null || echo "unknown")
|
||||
DIAG_DETAIL=$(echo "$DIAG_JSON" | python3 -c "import json,sys; d=json.load(sys.stdin); s=[x for x in d.get('steps',[]) if not x.get('ok')]; step=s[0] if s else {}; print(' — '.join(x for x in [step.get('error',''), step.get('detail','')] if x))" 2>/dev/null || echo "")
|
||||
# #767: always emit the full diagnose JSON so operators see every step's
|
||||
# Detail field even when the Python extraction above fails or the shape
|
||||
# drifts. The burst is bracketed like steps 2 and 4 for grep-friendly CI.
|
||||
log "── DIAGNOSTIC BURST (step 7b — terminal diagnose for $wid) ──"
|
||||
echo "$DIAG_JSON" | python3 -m json.tool 2>/dev/null || echo "$DIAG_JSON"
|
||||
log "── END DIAGNOSTIC ──"
|
||||
fail "Workspace $wid terminal diagnose failed at step '$DIAG_FAIL': $DIAG_DETAIL — check tenant SG has tcp/22 from the configured EIC endpoint SG, MOLECULE_EIC_ENDPOINT_SG_ID is set in Railway, and EIC endpoint health"
|
||||
fi
|
||||
done
|
||||
|
||||
@@ -73,6 +73,7 @@ func (h *ChannelHandler) List(c *gin.Context) {
|
||||
var config map[string]interface{}
|
||||
if err := json.Unmarshal(configJSON, &config); err != nil {
|
||||
log.Printf("Channels: unmarshal config for channel %s: %v", id, err)
|
||||
config = map[string]interface{}{}
|
||||
}
|
||||
// #319: decrypt sensitive fields first so the mask operates on
|
||||
// plaintext (first-4 / last-4 of the real token, not the ciphertext
|
||||
@@ -94,6 +95,7 @@ func (h *ChannelHandler) List(c *gin.Context) {
|
||||
var allowed []string
|
||||
if err := json.Unmarshal(allowedJSON, &allowed); err != nil {
|
||||
log.Printf("Channels: unmarshal allowed_users for channel %s: %v", id, err)
|
||||
allowed = []string{}
|
||||
}
|
||||
|
||||
entry := map[string]interface{}{
|
||||
@@ -540,9 +542,11 @@ func (h *ChannelHandler) Webhook(c *gin.Context) {
|
||||
}
|
||||
if err := json.Unmarshal(configJSON, &row.Config); err != nil {
|
||||
log.Printf("Channels: unmarshal config for webhook row %s: %v", row.ID, err)
|
||||
row.Config = map[string]interface{}{}
|
||||
}
|
||||
if err := json.Unmarshal(allowedJSON, &row.AllowedUsers); err != nil {
|
||||
log.Printf("Channels: unmarshal allowed_users for webhook row %s: %v", row.ID, err)
|
||||
row.AllowedUsers = []string{}
|
||||
}
|
||||
if err := channels.DecryptSensitiveFields(row.Config); err != nil {
|
||||
log.Printf("Channels: decrypt webhook row %s: %v", row.ID, err)
|
||||
|
||||
@@ -116,6 +116,56 @@ func TestChannelHandler_List(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestChannelHandler_List_InvalidJSON_FallsBack(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
handler := NewChannelHandler(newTestChannelManager())
|
||||
|
||||
rows := sqlmock.NewRows([]string{
|
||||
"id", "workspace_id", "channel_type", "channel_config", "enabled",
|
||||
"allowed_users", "last_message_at", "message_count", "created_at", "updated_at",
|
||||
}).AddRow(
|
||||
"ch-bad", "ws-1", "telegram",
|
||||
[]byte(`{not valid json`),
|
||||
true, []byte(`[also not json`), nil, 0, nil, nil,
|
||||
)
|
||||
mock.ExpectQuery("SELECT .* FROM workspace_channels WHERE workspace_id").
|
||||
WithArgs("ws-1").
|
||||
WillReturnRows(rows)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request, _ = http.NewRequest("GET", "/workspaces/ws-1/channels", nil)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-1"}}
|
||||
|
||||
handler.List(c)
|
||||
|
||||
if w.Code != 200 {
|
||||
t.Errorf("expected 200, got %d", w.Code)
|
||||
}
|
||||
|
||||
var result []map[string]interface{}
|
||||
json.Unmarshal(w.Body.Bytes(), &result)
|
||||
if len(result) != 1 {
|
||||
t.Fatalf("expected 1 channel, got %d", len(result))
|
||||
}
|
||||
|
||||
config, ok := result[0]["config"].(map[string]interface{})
|
||||
if !ok {
|
||||
t.Fatalf("expected config to be a map, got %T", result[0]["config"])
|
||||
}
|
||||
if len(config) != 0 {
|
||||
t.Errorf("expected empty config after unmarshal fallback, got %v", config)
|
||||
}
|
||||
|
||||
allowed, ok := result[0]["allowed_users"].([]interface{})
|
||||
if !ok {
|
||||
t.Fatalf("expected allowed_users to be a slice, got %T", result[0]["allowed_users"])
|
||||
}
|
||||
if len(allowed) != 0 {
|
||||
t.Errorf("expected empty allowed_users after unmarshal fallback, got %v", allowed)
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== Create ====================
|
||||
|
||||
func TestChannelHandler_Create_Success(t *testing.T) {
|
||||
@@ -546,6 +596,41 @@ func TestChannelHandler_Webhook_UnknownType(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestChannelHandler_Webhook_InvalidJSON_FallsBack verifies that when the DB
|
||||
// row contains invalid JSON for channel_config or allowed_users, the webhook
|
||||
// handler logs the error and falls back to an empty map/slice rather than
|
||||
// leaving the fields nil (which would panic on downstream code that expects
|
||||
// concrete values). With empty config there is no chat_id match, so the
|
||||
// handler returns {"status":"no_channel"}.
|
||||
func TestChannelHandler_Webhook_InvalidJSON_FallsBack(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
handler := NewChannelHandler(newTestChannelManager())
|
||||
|
||||
mock.ExpectQuery(`SELECT id, workspace_id, channel_type, channel_config, enabled, allowed_users FROM workspace_channels WHERE channel_type = .* AND enabled = true`).
|
||||
WithArgs("telegram").
|
||||
WillReturnRows(sqlmock.NewRows([]string{
|
||||
"id", "workspace_id", "channel_type", "channel_config", "enabled", "allowed_users",
|
||||
}).AddRow("ch-bad", "ws-1", "telegram", []byte(`{bad json`), true, []byte(`[bad json`)))
|
||||
|
||||
body := `{"update_id":1,"message":{"message_id":1,"from":{"id":111,"is_bot":false,"first_name":"Test","username":"testuser"},"chat":{"id":-100123,"title":"Test Group","type":"supergroup"},"date":1700000000,"text":"hello"}}`
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest(http.MethodPost, "/webhooks/telegram", strings.NewReader(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
c.Params = gin.Params{{Key: "type", Value: "telegram"}}
|
||||
|
||||
handler.Webhook(c)
|
||||
|
||||
if w.Code != 200 {
|
||||
t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
var resp map[string]interface{}
|
||||
json.Unmarshal(w.Body.Bytes(), &resp)
|
||||
if resp["status"] != "no_channel" {
|
||||
t.Errorf("expected status 'no_channel', got %v", resp["status"])
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== Discover ====================
|
||||
|
||||
func TestChannelHandler_Discover_MissingToken(t *testing.T) {
|
||||
|
||||
Reference in New Issue
Block a user