feat(workspace-server): GET /workspaces/:id returns 410 Gone when status='removed' (#2429)

Defense-in-depth at the endpoint level. Previously, GET /workspaces/:id
returned 200 OK with `status:"removed"` in the body for deleted
workspaces — silent-fail UX hit on the hongmingwang tenant 2026-04-30:
the channel bridge / molecule-mcp wheel had a dead workspace_id + token
in .env, get_workspace_info returned 200 → caller assumed everything
was fine, then every subsequent /registry/* call 401d because tokens
were revoked, and operators had no idea their workspace was gone.

#2425 fixed the steady-state heartbeat path (escalate to ERROR after
3 consecutive 401s). This change is the startup-time defense — fail
loud when the operator first probes the workspace instead of waiting
for the heartbeat to sour.

The 410 body includes:
  {error: "workspace removed", id, removed_at, hint: "Regenerate ..."}

Audit-trail consumers that need the body shape of a removed workspace
(admin views, "show me deleted workspaces" tooling) opt into the
legacy 200 + body via ?include_removed=true. Without this opt-in path
the audit trail becomes invisible at the API layer.

Two new tests pinned:
  - TestWorkspaceGet_RemovedReturns410
  - TestWorkspaceGet_RemovedWithIncludeQueryReturns200

Follow-ups in separate PRs:
  - Update workspace/a2a_client.py get_workspace_info to surface
    "removed" specifically rather than collapsing into "not found"
  - Update channel bridge getWorkspaceInfo (server.ts) to detect 410
    → log clear "workspace was deleted, re-onboard" error
  - Audit canvas/* + admin tooling consumers that may rely on the
    legacy 200 + status:"removed" shape; switch them to the
    ?include_removed=true opt-in if needed
  - Update docs (runtime-mcp.mdx Troubleshooting + external-agents.mdx
    lifecycle table)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hongming Wang 2026-04-30 21:55:24 -07:00
parent 08d082d466
commit 72f0079c10
2 changed files with 145 additions and 0 deletions

View File

@ -14,6 +14,7 @@ import (
"os"
"path/filepath"
"strings"
"time"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/crypto"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
@ -649,6 +650,30 @@ func (h *WorkspaceHandler) Get(c *gin.Context) {
return
}
// #2429: workspaces with status='removed' return 410 Gone (not 200)
// so callers fail loudly at startup instead of after 60s of revoked-
// token heartbeats. The audit-trail consumers that need the body of
// a removed workspace opt in via ?include_removed=true.
//
// Why a query param and not a header: cheap to set in curl/canvas
// fetch alike, visible in access logs, and works without coupling
// to content negotiation.
if status, _ := ws["status"].(string); status == string(models.StatusRemoved) {
if c.Query("include_removed") != "true" {
var removedAt time.Time
_ = db.DB.QueryRowContext(c.Request.Context(),
`SELECT updated_at FROM workspaces WHERE id = $1`, id,
).Scan(&removedAt)
c.JSON(http.StatusGone, gin.H{
"error": "workspace removed",
"id": id,
"removed_at": removedAt,
"hint": "Regenerate workspace + token from the canvas → Tokens tab",
})
return
}
}
// Strip sensitive fields — GET /workspaces/:id is on the open router.
// Any caller with a valid UUID would otherwise read operational data.
delete(ws, "budget_limit")

View File

@ -9,6 +9,7 @@ import (
"os"
"path/filepath"
"testing"
"time"
"github.com/DATA-DOG/go-sqlmock"
"github.com/Molecule-AI/molecule-monorepo/platform/internal/models"
@ -97,6 +98,125 @@ func TestWorkspaceGet_NotFound(t *testing.T) {
}
}
// #2429: GET /workspaces/:id returns 410 Gone when status='removed'.
// Defense-in-depth at the endpoint level — without this, callers
// holding stale workspace_id + token tuples (channel bridge .env,
// captured curl scripts, etc.) get 200 + status:"removed" and have
// no idea their tokens are revoked until the heartbeat fails 60s
// later. 410 makes startup fail loud instead.
func TestWorkspaceGet_RemovedReturns410(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
id := "cccccccc-0010-0000-0000-000000000000"
removedAt := time.Date(2026, 4, 30, 12, 0, 0, 0, time.UTC)
columns := []string{
"id", "name", "role", "tier", "status", "agent_card", "url",
"parent_id", "active_tasks", "max_concurrent_tasks", "last_error_rate", "last_sample_error",
"uptime_seconds", "current_task", "runtime", "workspace_dir", "x", "y", "collapsed",
"budget_limit", "monthly_spend",
}
mock.ExpectQuery("SELECT w.id, w.name").
WithArgs(id).
WillReturnRows(sqlmock.NewRows(columns).
AddRow(id, "Old Agent", "worker", 1, string(models.StatusRemoved), []byte(`null`),
"", nil, 0, 1, 0.0, "", 0, "", "langgraph",
"", 0.0, 0.0, false,
nil, 0))
mock.ExpectQuery(`SELECT updated_at FROM workspaces`).
WithArgs(id).
WillReturnRows(sqlmock.NewRows([]string{"updated_at"}).AddRow(removedAt))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: id}}
c.Request = httptest.NewRequest("GET", "/workspaces/"+id, nil)
handler.Get(c)
if w.Code != http.StatusGone {
t.Fatalf("expected 410 Gone, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("failed to parse 410 body: %v", err)
}
if resp["error"] != "workspace removed" {
t.Errorf("expected error 'workspace removed', got %v", resp["error"])
}
if resp["id"] != id {
t.Errorf("expected id %q, got %v", id, resp["id"])
}
if _, ok := resp["removed_at"]; !ok {
t.Errorf("expected removed_at in 410 body, got: %v", resp)
}
if _, ok := resp["hint"]; !ok {
t.Errorf("expected hint in 410 body, got: %v", resp)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// Audit-trail consumers (admin views, "show me deleted workspaces"
// tooling) opt into the legacy 200 + body shape via
// ?include_removed=true. Without this opt-in path the audit trail
// becomes invisible at the API layer.
func TestWorkspaceGet_RemovedWithIncludeQueryReturns200(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
id := "cccccccc-0011-0000-0000-000000000000"
columns := []string{
"id", "name", "role", "tier", "status", "agent_card", "url",
"parent_id", "active_tasks", "max_concurrent_tasks", "last_error_rate", "last_sample_error",
"uptime_seconds", "current_task", "runtime", "workspace_dir", "x", "y", "collapsed",
"budget_limit", "monthly_spend",
}
mock.ExpectQuery("SELECT w.id, w.name").
WithArgs(id).
WillReturnRows(sqlmock.NewRows(columns).
AddRow(id, "Audit Agent", "worker", 1, string(models.StatusRemoved), []byte(`null`),
"", nil, 0, 1, 0.0, "", 0, "", "langgraph",
"", 0.0, 0.0, false,
nil, 0))
// last_outbound_at follow-up query (existing path)
mock.ExpectQuery(`SELECT last_outbound_at FROM workspaces`).
WithArgs(id).
WillReturnRows(sqlmock.NewRows([]string{"last_outbound_at"}).AddRow(nil))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: id}}
c.Request = httptest.NewRequest("GET", "/workspaces/"+id+"?include_removed=true", nil)
handler.Get(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200 OK with ?include_removed=true, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("failed to parse response: %v", err)
}
if resp["status"] != string(models.StatusRemoved) {
t.Errorf("expected status 'removed' in body, got %v", resp["status"])
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
func TestWorkspaceGet_DBError(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)