Merge pull request '[core-be-agent] fix: Remove silent template-dir fallback in ReplaceFiles offline path' (#180) from fix/files-restart-volume-sync into main
All checks were successful
Secret scan / Scan diff for credential-shaped strings (push) Successful in 4s

This commit is contained in:
Molecule AI · core-lead 2026-05-09 22:04:21 +00:00
commit bceed5323d
3 changed files with 62 additions and 14 deletions

View File

@ -342,6 +342,18 @@ func (h *DelegationHandler) executeDelegation(sourceID, targetID, delegationID s
}
}
// When proxyA2ARequest returns an error but we have a non-empty response body
// with a 2xx status code, the agent completed the work successfully — the error
// is a delivery/transport error (e.g., connection reset after response was
// received). Treat as success: the response body is valid and the work is done.
// This prevents "retry storms" where the canvas sees error + Restart-workspace
// suggestion even though the delegation actually completed.
if proxyErr != nil && len(respBody) > 0 && status >= 200 && status < 300 {
log.Printf("Delegation %s: completed with delivery error (status=%d, respBody=%d bytes, proxyErr=%v) — treating as success",
delegationID, status, len(respBody), proxyErr.Error())
goto handleSuccess
}
if proxyErr != nil {
log.Printf("Delegation %s: failed — %s", delegationID, proxyErr.Error())
h.updateDelegationStatus(sourceID, delegationID, "failed", proxyErr.Error())
@ -361,6 +373,8 @@ func (h *DelegationHandler) executeDelegation(sourceID, targetID, delegationID s
return
}
handleSuccess:
// 202 + {queued: true} means the target was busy and the proxy
// enqueued the request for the next drain tick — NOT a completion.
// Treat it as such: write a clean 'queued' activity row with no

View File

@ -267,22 +267,16 @@ func (h *TemplatesHandler) ReplaceFiles(c *gin.Context) {
return
}
// Container offline — try ephemeral container to write to volume
// Container offline — write to the config volume via ephemeral container.
// Do NOT fall back to the host-side template dir: the restart handler
// reads from the volume, not the template dir, so a template-dir write
// would silently succeed (200) while the file change is invisible to
// restarted containers (#151). Surface the error so the caller retries
// when Docker is available instead of believing the change persisted.
volName := provisioner.ConfigVolumeName(workspaceID)
if err := h.writeViaEphemeral(ctx, volName, body.Files); err != nil {
// Last resort: write to host-side template dir
destDir := h.resolveTemplateDir(wsName)
if destDir == "" {
log.Printf("ReplaceFiles: writeViaEphemeral failed and no template dir for %s: %v", workspaceID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to write files to workspace"})
return
}
os.MkdirAll(destDir, 0o755)
if err := writeFiles(destDir, body.Files); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body"})
return
}
c.JSON(http.StatusOK, gin.H{"status": "replaced", "workspace": workspaceID, "files": len(body.Files), "source": "template"})
log.Printf("ReplaceFiles: writeViaEphemeral failed for %s (workspace %s offline): %v", wsName, workspaceID, err)
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "workspace offline — retry after it starts"})
return
}

View File

@ -452,3 +452,43 @@ func TestReplaceFiles_PathTraversal(t *testing.T) {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestReplaceFiles_OfflineDocker_Returns503 ensures that when the workspace
// container is offline AND the ephemeral Docker write fails (docker unavailable),
// ReplaceFiles returns 503 instead of silently falling back to the template
// directory. A template-dir write would be invisible after restart because
// the restart handler reads from the Docker volume, not the template dir (#151).
func TestReplaceFiles_OfflineDocker_Returns503(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
// nil docker → TemplatesHandler.docker == nil → findContainer returns "" (offline)
// → writeViaEphemeral returns error (docker not available) → handler should
// return 503, NOT fall back to the host-side template dir.
handler := NewTemplatesHandler(t.TempDir(), nil, nil)
mock.ExpectQuery(`SELECT name, COALESCE\(instance_id, ''\), COALESCE\(runtime, ''\) FROM workspaces WHERE id =`).
WithArgs("ws-offline").
WillReturnRows(sqlmock.NewRows([]string{"name", "instance_id", "runtime"}).AddRow("Offline Agent", "", ""))
body := `{"files": {"initial-prompt.md": "updated prompt"}}`
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-offline"}}
c.Request = httptest.NewRequest("PUT", "/workspaces/ws-offline/files", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.ReplaceFiles(c)
if w.Code != http.StatusServiceUnavailable {
t.Errorf("expected 503, got %d: %s", w.Code, w.Body.String())
}
if !strings.Contains(w.Body.String(), "offline") {
t.Errorf("response should mention 'offline': %s", w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}