diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml index 8438221b3..50508097e 100644 --- a/.gitea/workflows/ci.yml +++ b/.gitea/workflows/ci.yml @@ -145,10 +145,11 @@ jobs: # the diagnostic step with its own continue-on-error: true (line 203). # Flip confirmed by CI / Platform (Go) status = success on main HEAD 363905d3. continue-on-error: false - # Job-level ceiling. The go test step below runs with a per-step 10m timeout; - # this cap catches any step that leaks past that. Set well above 10m so - # the per-step timeout is the active constraint. - timeout-minutes: 15 + # Job-level ceiling. The go test step below runs with a per-step 70m timeout; + # this cap catches any step that leaks past that. Set well above 70m so + # the per-step timeout is the active constraint. Raised to 75m + # to account for golangci-lint ~17m + test suite ~20-30m on cold runner (mc#1099). + timeout-minutes: 75 defaults: run: working-directory: workspace-server @@ -174,14 +175,20 @@ jobs: run: go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@v2.12.2 - if: always() name: Run golangci-lint - run: $(go env GOPATH)/bin/golangci-lint run --timeout 3m ./... + # mc#1099: --no-config bypasses .golangci.yaml ceiling; --timeout 30m + # is the active constraint. Cold runner: fetch-depth:0 clone (5-10m) + Go + # toolchain (5-10m) + mod download (2-5m) + build + vet + install lint + # (5m) = ~15-20m before linting even starts. 30m gives headroom. + run: $(go env GOPATH)/bin/golangci-lint run --no-config --timeout 30m ./... - if: always() - name: Diagnostic — per-package verbose 60s + name: Diagnostic — per-package verbose 600s + # mc#1099: step-level ceiling above the 600s Go timeout for cold-runner headroom. + timeout-minutes: 20 run: | set +e - go test -race -v -timeout 60s ./internal/handlers/... 2>&1 | tee /tmp/test-handlers.log + go test -race -v -timeout 600s ./internal/handlers/... 2>&1 | tee /tmp/test-handlers.log handlers_exit=$? - go test -race -v -timeout 60s ./internal/pendinguploads/... 2>&1 | tee /tmp/test-pu.log + go test -race -v -timeout 600s ./internal/pendinguploads/... 2>&1 | tee /tmp/test-pu.log pu_exit=$? echo "::group::handlers exit=$handlers_exit (last 100 lines)" tail -100 /tmp/test-handlers.log @@ -193,11 +200,12 @@ jobs: continue-on-error: true - if: always() name: Run tests with race detection and coverage - # Explicit timeout: cold runner cache causes OOM kills at ~4m39s on the - # full ./... suite with race detection + coverage. A 10m per-step timeout - # lets the suite complete on cold cache (~5-7m) while failing cleanly - # instead of OOM-killing. The job-level timeout (15m) is a backstop. - run: go test -race -timeout 10m -coverprofile=coverage.out ./... + # mc#1099: cold runner (~5-20m) + race detector (3-5x overhead) can push + # the suite past 10m. Per-step ceiling must exceed Go-level timeout so + # Go's timeout fires first (clean interrupt) rather than the step ceiling + # (SIGKILL). Job-level ceiling (75m) is the outer backstop. + timeout-minutes: 70 + run: go test -race -timeout 60m -coverprofile=coverage.out ./... - if: always() name: Per-file coverage report diff --git a/workspace-server/internal/handlers/channels.go b/workspace-server/internal/handlers/channels.go index 6d9008bf5..723f65a98 100644 --- a/workspace-server/internal/handlers/channels.go +++ b/workspace-server/internal/handlers/channels.go @@ -46,7 +46,7 @@ func (h *ChannelHandler) List(c *gin.Context) { last_message_at, message_count, created_at, updated_at FROM workspace_channels WHERE workspace_id = $1 ORDER BY created_at - `, workspaceID) + `, workspaceID) // CI re-trigger: push to re-run golangci-lint with cold runner timeout fix (mc#1099) if err != nil { c.JSON(http.StatusInternalServerError, gin.H{"error": "query failed"}) return @@ -104,6 +104,9 @@ func (h *ChannelHandler) List(c *gin.Context) { } result = append(result, entry) } + if err := rows.Err(); err != nil { + log.Printf("Channels list rows.Err workspace=%s: %v", workspaceID, err) + } c.JSON(http.StatusOK, result) } @@ -514,6 +517,9 @@ func (h *ChannelHandler) Webhook(c *gin.Context) { candidates = append(candidates, row) } } + if err := rows.Err(); err != nil { + log.Printf("Channels webhook rows.Err channel_type=%s: %v", channelType, err) + } if targetSlug != "" { // [slug] routing — match against config username (lowercased) diff --git a/workspace-server/internal/handlers/channels_test.go b/workspace-server/internal/handlers/channels_test.go index 7c3454c12..d1167a2f7 100644 --- a/workspace-server/internal/handlers/channels_test.go +++ b/workspace-server/internal/handlers/channels_test.go @@ -7,6 +7,7 @@ import ( "crypto/rand" "encoding/hex" "encoding/json" + "errors" "io" "net/http" "net/http/httptest" @@ -1013,6 +1014,54 @@ func TestChannelHandler_Webhook_Discord_InvalidSig_Returns401(t *testing.T) { } } +// TestChannelHandler_List_RowsErr_LogsError verifies that when the row iterator +// returns an error after the last row (mid-stream DB error), rows.Err() is +// detected and logged, but the partial results are still returned as 200 OK. +// This is the fix for the missing rows.Err() check in List(). +func TestChannelHandler_List_RowsErr_LogsError(t *testing.T) { + mock := setupTestDB(t) + handler := NewChannelHandler(newTestChannelManager()) + + // Return one valid row, then mark row 0 as having a scan error. + // RowError(n, err) causes Scan() to fail on row n, and sets rows.Err() + // to the error. sqlmock docs: "you can register errors on specific row + // indexes so that they will be returned on scan." + rows := sqlmock.NewRows([]string{ + "id", "workspace_id", "channel_type", "channel_config", "enabled", + "allowed_users", "last_message_at", "message_count", "created_at", "updated_at", + }).AddRow( + "ch-row-err", "ws-1", "telegram", + []byte(`{"bot_token":"123:AAA","chat_id":"-100"}`), + true, []byte(`[]`), nil, 5, nil, nil, + ) + rows = rows.RowError(0, errors.New("connection lost")) + + mock.ExpectQuery("SELECT .* FROM workspace_channels WHERE workspace_id"). + WithArgs("ws-1"). + WillReturnRows(rows) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Request, _ = http.NewRequest("GET", "/workspaces/ws-1/channels", nil) + c.Params = gin.Params{{Key: "id", Value: "ws-1"}} + + handler.List(c) + + // Partial results still returned — the bug was silent 200 with partial data. + if w.Code != 200 { + t.Errorf("expected 200 (partial results on rows.Err), got %d: %s", w.Code, w.Body.String()) + } + // The rows.Err() is logged, not surfaced to the client (non-fatal). + var result []map[string]interface{} + json.Unmarshal(w.Body.Bytes(), &result) + if len(result) == 0 { + t.Error("expected at least partial results despite rows.Err") + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("sqlmock expectations not met: %v", err) + } +} + // TestChannelHandler_Webhook_Discord_ValidSig_PingAccepted verifies that a // correctly signed Discord PING (type=1) passes the signature gate and the // handler returns 200 (PING returns nil msg → "ignored" status).