From cda3a01e00e35899a685daa526230c313c05a5eb Mon Sep 17 00:00:00 2001 From: Molecule AI Core-QA Date: Fri, 15 May 2026 11:42:04 +0000 Subject: [PATCH] fix(ci): increase Go test timeouts for cold runner performance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cold runners with -race flag need 13-25 minutes for the full ./... suite (compilation + race-instrumented execution), exceeding the previous: - 60s diagnostic per-package timeout -> 300s (handlers, pendinguploads) - 10m main suite timeout -> 30m - 15m job-level ceiling -> 35m The OOM issue (mc#1099) was fixed by the 10m timeout, but that was calibrated for warm cache (~5-7m). Cold runners hit 13-25m, causing the suite to be killed mid-execution with non-zero exit, blocking all staging PRs. All 36 Go packages pass locally (non-race, ~20s total). No test changes — only CI timeout calibration. Co-Authored-By: Claude Opus 4.7 --- .gitea/workflows/ci.yml | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml index 8438221b3..93445e8dc 100644 --- a/.gitea/workflows/ci.yml +++ b/.gitea/workflows/ci.yml @@ -145,10 +145,10 @@ jobs: # the diagnostic step with its own continue-on-error: true (line 203). # Flip confirmed by CI / Platform (Go) status = success on main HEAD 363905d3. continue-on-error: false - # Job-level ceiling. The go test step below runs with a per-step 10m timeout; - # this cap catches any step that leaks past that. Set well above 10m so + # Job-level ceiling. The go test step below runs with a per-step 30m timeout; + # this cap catches any step that leaks past that. Set well above 30m so # the per-step timeout is the active constraint. - timeout-minutes: 15 + timeout-minutes: 35 defaults: run: working-directory: workspace-server @@ -176,12 +176,14 @@ jobs: name: Run golangci-lint run: $(go env GOPATH)/bin/golangci-lint run --timeout 3m ./... - if: always() - name: Diagnostic — per-package verbose 60s + name: Diagnostic — per-package verbose (300s timeout) run: | set +e - go test -race -v -timeout 60s ./internal/handlers/... 2>&1 | tee /tmp/test-handlers.log + # 300s allows handlers + pendinguploads packages to complete on cold + # runners with -race instrumentation (~60-120s each vs ~14s non-race). + go test -race -v -timeout 300s ./internal/handlers/... 2>&1 | tee /tmp/test-handlers.log handlers_exit=$? - go test -race -v -timeout 60s ./internal/pendinguploads/... 2>&1 | tee /tmp/test-pu.log + go test -race -v -timeout 300s ./internal/pendinguploads/... 2>&1 | tee /tmp/test-pu.log pu_exit=$? echo "::group::handlers exit=$handlers_exit (last 100 lines)" tail -100 /tmp/test-handlers.log @@ -194,10 +196,10 @@ jobs: - if: always() name: Run tests with race detection and coverage # Explicit timeout: cold runner cache causes OOM kills at ~4m39s on the - # full ./... suite with race detection + coverage. A 10m per-step timeout - # lets the suite complete on cold cache (~5-7m) while failing cleanly - # instead of OOM-killing. The job-level timeout (15m) is a backstop. - run: go test -race -timeout 10m -coverprofile=coverage.out ./... + # full ./... suite with race detection + coverage. A 30m per-step timeout + # lets the suite complete on cold cache (~13-25m) while failing cleanly + # instead of OOM-killing. The job-level timeout (35m) is a backstop. + run: go test -race -timeout 30m -coverprofile=coverage.out ./... - if: always() name: Per-file coverage report -- 2.52.0