diff --git a/workspace-server/internal/handlers/delegation.go b/workspace-server/internal/handlers/delegation.go index 1bda2c63..0156f864 100644 --- a/workspace-server/internal/handlers/delegation.go +++ b/workspace-server/internal/handlers/delegation.go @@ -342,6 +342,18 @@ func (h *DelegationHandler) executeDelegation(sourceID, targetID, delegationID s } } + // When proxyA2ARequest returns an error but we have a non-empty response body + // with a 2xx status code, the agent completed the work successfully — the error + // is a delivery/transport error (e.g., connection reset after response was + // received). Treat as success: the response body is valid and the work is done. + // This prevents "retry storms" where the canvas sees error + Restart-workspace + // suggestion even though the delegation actually completed. + if proxyErr != nil && len(respBody) > 0 && status >= 200 && status < 300 { + log.Printf("Delegation %s: completed with delivery error (status=%d, respBody=%d bytes, proxyErr=%v) — treating as success", + delegationID, status, len(respBody), proxyErr.Error()) + goto handleSuccess + } + if proxyErr != nil { log.Printf("Delegation %s: failed — %s", delegationID, proxyErr.Error()) h.updateDelegationStatus(sourceID, delegationID, "failed", proxyErr.Error()) @@ -361,6 +373,8 @@ func (h *DelegationHandler) executeDelegation(sourceID, targetID, delegationID s return } +handleSuccess: + // 202 + {queued: true} means the target was busy and the proxy // enqueued the request for the next drain tick — NOT a completion. // Treat it as such: write a clean 'queued' activity row with no diff --git a/workspace-server/internal/handlers/template_import.go b/workspace-server/internal/handlers/template_import.go index 7b16c17f..9dc5f078 100644 --- a/workspace-server/internal/handlers/template_import.go +++ b/workspace-server/internal/handlers/template_import.go @@ -267,22 +267,16 @@ func (h *TemplatesHandler) ReplaceFiles(c *gin.Context) { return } - // Container offline — try ephemeral container to write to volume + // Container offline — write to the config volume via ephemeral container. + // Do NOT fall back to the host-side template dir: the restart handler + // reads from the volume, not the template dir, so a template-dir write + // would silently succeed (200) while the file change is invisible to + // restarted containers (#151). Surface the error so the caller retries + // when Docker is available instead of believing the change persisted. volName := provisioner.ConfigVolumeName(workspaceID) if err := h.writeViaEphemeral(ctx, volName, body.Files); err != nil { - // Last resort: write to host-side template dir - destDir := h.resolveTemplateDir(wsName) - if destDir == "" { - log.Printf("ReplaceFiles: writeViaEphemeral failed and no template dir for %s: %v", workspaceID, err) - c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to write files to workspace"}) - return - } - os.MkdirAll(destDir, 0o755) - if err := writeFiles(destDir, body.Files); err != nil { - c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body"}) - return - } - c.JSON(http.StatusOK, gin.H{"status": "replaced", "workspace": workspaceID, "files": len(body.Files), "source": "template"}) + log.Printf("ReplaceFiles: writeViaEphemeral failed for %s (workspace %s offline): %v", wsName, workspaceID, err) + c.JSON(http.StatusServiceUnavailable, gin.H{"error": "workspace offline — retry after it starts"}) return } diff --git a/workspace-server/internal/handlers/template_import_test.go b/workspace-server/internal/handlers/template_import_test.go index c496f9c5..59e3550d 100644 --- a/workspace-server/internal/handlers/template_import_test.go +++ b/workspace-server/internal/handlers/template_import_test.go @@ -452,3 +452,43 @@ func TestReplaceFiles_PathTraversal(t *testing.T) { t.Errorf("unmet sqlmock expectations: %v", err) } } + +// TestReplaceFiles_OfflineDocker_Returns503 ensures that when the workspace +// container is offline AND the ephemeral Docker write fails (docker unavailable), +// ReplaceFiles returns 503 instead of silently falling back to the template +// directory. A template-dir write would be invisible after restart because +// the restart handler reads from the Docker volume, not the template dir (#151). +func TestReplaceFiles_OfflineDocker_Returns503(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + + // nil docker → TemplatesHandler.docker == nil → findContainer returns "" (offline) + // → writeViaEphemeral returns error (docker not available) → handler should + // return 503, NOT fall back to the host-side template dir. + handler := NewTemplatesHandler(t.TempDir(), nil, nil) + + mock.ExpectQuery(`SELECT name, COALESCE\(instance_id, ''\), COALESCE\(runtime, ''\) FROM workspaces WHERE id =`). + WithArgs("ws-offline"). + WillReturnRows(sqlmock.NewRows([]string{"name", "instance_id", "runtime"}).AddRow("Offline Agent", "", "")) + + body := `{"files": {"initial-prompt.md": "updated prompt"}}` + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: "ws-offline"}} + c.Request = httptest.NewRequest("PUT", "/workspaces/ws-offline/files", bytes.NewBufferString(body)) + c.Request.Header.Set("Content-Type", "application/json") + + handler.ReplaceFiles(c) + + if w.Code != http.StatusServiceUnavailable { + t.Errorf("expected 503, got %d: %s", w.Code, w.Body.String()) + } + + if !strings.Contains(w.Body.String(), "offline") { + t.Errorf("response should mention 'offline': %s", w.Body.String()) + } + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +}