From 72f0079c106e21bd339d4389ec2f487726d80e84 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Thu, 30 Apr 2026 21:55:24 -0700 Subject: [PATCH] feat(workspace-server): GET /workspaces/:id returns 410 Gone when status='removed' (#2429) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Defense-in-depth at the endpoint level. Previously, GET /workspaces/:id returned 200 OK with `status:"removed"` in the body for deleted workspaces — silent-fail UX hit on the hongmingwang tenant 2026-04-30: the channel bridge / molecule-mcp wheel had a dead workspace_id + token in .env, get_workspace_info returned 200 → caller assumed everything was fine, then every subsequent /registry/* call 401d because tokens were revoked, and operators had no idea their workspace was gone. #2425 fixed the steady-state heartbeat path (escalate to ERROR after 3 consecutive 401s). This change is the startup-time defense — fail loud when the operator first probes the workspace instead of waiting for the heartbeat to sour. The 410 body includes: {error: "workspace removed", id, removed_at, hint: "Regenerate ..."} Audit-trail consumers that need the body shape of a removed workspace (admin views, "show me deleted workspaces" tooling) opt into the legacy 200 + body via ?include_removed=true. Without this opt-in path the audit trail becomes invisible at the API layer. Two new tests pinned: - TestWorkspaceGet_RemovedReturns410 - TestWorkspaceGet_RemovedWithIncludeQueryReturns200 Follow-ups in separate PRs: - Update workspace/a2a_client.py get_workspace_info to surface "removed" specifically rather than collapsing into "not found" - Update channel bridge getWorkspaceInfo (server.ts) to detect 410 → log clear "workspace was deleted, re-onboard" error - Audit canvas/* + admin tooling consumers that may rely on the legacy 200 + status:"removed" shape; switch them to the ?include_removed=true opt-in if needed - Update docs (runtime-mcp.mdx Troubleshooting + external-agents.mdx lifecycle table) Co-Authored-By: Claude Opus 4.7 (1M context) --- .../internal/handlers/workspace.go | 25 ++++ .../internal/handlers/workspace_test.go | 120 ++++++++++++++++++ 2 files changed, 145 insertions(+) diff --git a/workspace-server/internal/handlers/workspace.go b/workspace-server/internal/handlers/workspace.go index c4a3376f..23d67f44 100644 --- a/workspace-server/internal/handlers/workspace.go +++ b/workspace-server/internal/handlers/workspace.go @@ -14,6 +14,7 @@ import ( "os" "path/filepath" "strings" + "time" "github.com/Molecule-AI/molecule-monorepo/platform/internal/crypto" "github.com/Molecule-AI/molecule-monorepo/platform/internal/db" @@ -649,6 +650,30 @@ func (h *WorkspaceHandler) Get(c *gin.Context) { return } + // #2429: workspaces with status='removed' return 410 Gone (not 200) + // so callers fail loudly at startup instead of after 60s of revoked- + // token heartbeats. The audit-trail consumers that need the body of + // a removed workspace opt in via ?include_removed=true. + // + // Why a query param and not a header: cheap to set in curl/canvas + // fetch alike, visible in access logs, and works without coupling + // to content negotiation. + if status, _ := ws["status"].(string); status == string(models.StatusRemoved) { + if c.Query("include_removed") != "true" { + var removedAt time.Time + _ = db.DB.QueryRowContext(c.Request.Context(), + `SELECT updated_at FROM workspaces WHERE id = $1`, id, + ).Scan(&removedAt) + c.JSON(http.StatusGone, gin.H{ + "error": "workspace removed", + "id": id, + "removed_at": removedAt, + "hint": "Regenerate workspace + token from the canvas → Tokens tab", + }) + return + } + } + // Strip sensitive fields — GET /workspaces/:id is on the open router. // Any caller with a valid UUID would otherwise read operational data. delete(ws, "budget_limit") diff --git a/workspace-server/internal/handlers/workspace_test.go b/workspace-server/internal/handlers/workspace_test.go index 9149b178..f1093191 100644 --- a/workspace-server/internal/handlers/workspace_test.go +++ b/workspace-server/internal/handlers/workspace_test.go @@ -9,6 +9,7 @@ import ( "os" "path/filepath" "testing" + "time" "github.com/DATA-DOG/go-sqlmock" "github.com/Molecule-AI/molecule-monorepo/platform/internal/models" @@ -97,6 +98,125 @@ func TestWorkspaceGet_NotFound(t *testing.T) { } } +// #2429: GET /workspaces/:id returns 410 Gone when status='removed'. +// Defense-in-depth at the endpoint level — without this, callers +// holding stale workspace_id + token tuples (channel bridge .env, +// captured curl scripts, etc.) get 200 + status:"removed" and have +// no idea their tokens are revoked until the heartbeat fails 60s +// later. 410 makes startup fail loud instead. +func TestWorkspaceGet_RemovedReturns410(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + broadcaster := newTestBroadcaster() + handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()) + + id := "cccccccc-0010-0000-0000-000000000000" + removedAt := time.Date(2026, 4, 30, 12, 0, 0, 0, time.UTC) + + columns := []string{ + "id", "name", "role", "tier", "status", "agent_card", "url", + "parent_id", "active_tasks", "max_concurrent_tasks", "last_error_rate", "last_sample_error", + "uptime_seconds", "current_task", "runtime", "workspace_dir", "x", "y", "collapsed", + "budget_limit", "monthly_spend", + } + mock.ExpectQuery("SELECT w.id, w.name"). + WithArgs(id). + WillReturnRows(sqlmock.NewRows(columns). + AddRow(id, "Old Agent", "worker", 1, string(models.StatusRemoved), []byte(`null`), + "", nil, 0, 1, 0.0, "", 0, "", "langgraph", + "", 0.0, 0.0, false, + nil, 0)) + mock.ExpectQuery(`SELECT updated_at FROM workspaces`). + WithArgs(id). + WillReturnRows(sqlmock.NewRows([]string{"updated_at"}).AddRow(removedAt)) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: id}} + c.Request = httptest.NewRequest("GET", "/workspaces/"+id, nil) + + handler.Get(c) + + if w.Code != http.StatusGone { + t.Fatalf("expected 410 Gone, got %d: %s", w.Code, w.Body.String()) + } + + var resp map[string]interface{} + if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil { + t.Fatalf("failed to parse 410 body: %v", err) + } + if resp["error"] != "workspace removed" { + t.Errorf("expected error 'workspace removed', got %v", resp["error"]) + } + if resp["id"] != id { + t.Errorf("expected id %q, got %v", id, resp["id"]) + } + if _, ok := resp["removed_at"]; !ok { + t.Errorf("expected removed_at in 410 body, got: %v", resp) + } + if _, ok := resp["hint"]; !ok { + t.Errorf("expected hint in 410 body, got: %v", resp) + } + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + +// Audit-trail consumers (admin views, "show me deleted workspaces" +// tooling) opt into the legacy 200 + body shape via +// ?include_removed=true. Without this opt-in path the audit trail +// becomes invisible at the API layer. +func TestWorkspaceGet_RemovedWithIncludeQueryReturns200(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + broadcaster := newTestBroadcaster() + handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()) + + id := "cccccccc-0011-0000-0000-000000000000" + + columns := []string{ + "id", "name", "role", "tier", "status", "agent_card", "url", + "parent_id", "active_tasks", "max_concurrent_tasks", "last_error_rate", "last_sample_error", + "uptime_seconds", "current_task", "runtime", "workspace_dir", "x", "y", "collapsed", + "budget_limit", "monthly_spend", + } + mock.ExpectQuery("SELECT w.id, w.name"). + WithArgs(id). + WillReturnRows(sqlmock.NewRows(columns). + AddRow(id, "Audit Agent", "worker", 1, string(models.StatusRemoved), []byte(`null`), + "", nil, 0, 1, 0.0, "", 0, "", "langgraph", + "", 0.0, 0.0, false, + nil, 0)) + // last_outbound_at follow-up query (existing path) + mock.ExpectQuery(`SELECT last_outbound_at FROM workspaces`). + WithArgs(id). + WillReturnRows(sqlmock.NewRows([]string{"last_outbound_at"}).AddRow(nil)) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: id}} + c.Request = httptest.NewRequest("GET", "/workspaces/"+id+"?include_removed=true", nil) + + handler.Get(c) + + if w.Code != http.StatusOK { + t.Fatalf("expected 200 OK with ?include_removed=true, got %d: %s", w.Code, w.Body.String()) + } + + var resp map[string]interface{} + if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil { + t.Fatalf("failed to parse response: %v", err) + } + if resp["status"] != string(models.StatusRemoved) { + t.Errorf("expected status 'removed' in body, got %v", resp["status"]) + } + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + func TestWorkspaceGet_DBError(t *testing.T) { mock := setupTestDB(t) setupTestRedis(t)