diff --git a/workspace-server/internal/handlers/chat_files.go b/workspace-server/internal/handlers/chat_files.go index c438bae7..b58d2d28 100644 --- a/workspace-server/internal/handlers/chat_files.go +++ b/workspace-server/internal/handlers/chat_files.go @@ -30,6 +30,7 @@ package handlers import ( "context" + "database/sql" "fmt" "io" "log" @@ -102,14 +103,45 @@ const chatUploadDir = "/workspace/.molecule/chat-uploads" // of bug as the original SaaS provision drift fixed in #2366; this // extraction prevents that class on the consumer side. func resolveWorkspaceForwardCreds(c *gin.Context, ctx context.Context, workspaceID, op string) (wsURL, secret string, ok bool) { + var deliveryMode sql.NullString if err := db.DB.QueryRowContext(ctx, - `SELECT COALESCE(url, '') FROM workspaces WHERE id = $1`, workspaceID, - ).Scan(&wsURL); err != nil { + `SELECT COALESCE(url, ''), delivery_mode FROM workspaces WHERE id = $1`, workspaceID, + ).Scan(&wsURL, &deliveryMode); err != nil { log.Printf("chat_files %s: workspace lookup failed for %s: %v", op, workspaceID, err) c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"}) return "", "", false } if wsURL == "" { + // Distinguish the two empty-URL classes so the user sees an + // actionable error rather than a misleading "not registered yet" + // (which implies waiting will help): + // + // push-mode → URL just isn't on the row yet (workspace + // restart in progress, or first /registry/register hasn't + // landed). 503 + "not registered yet" is correct — retry + // after the next heartbeat (~30s) will likely succeed. + // + // anything else (poll-mode, NULL, empty string) → URL is + // structurally absent. The platform never dispatches to a + // non-push workspace, so chat upload (which is HTTP-forward + // by design) cannot proceed by waiting. Returning 503 here + // would loop the canvas client forever. 422 signals "this + // request can't succeed against THIS workspace's + // configuration" — the only fix is to re-register the + // workspace with a publicly-reachable URL. + // + // Live-observed 2026-05-04: external runtime workspaces (e.g. + // molecule-sdk-python on a mac laptop) register with + // delivery_mode=NULL. The narrow "poll" check missed them; the + // invariant we actually want is "URL empty + not-push = no + // dispatch path, ever". + if !deliveryMode.Valid || deliveryMode.String != "push" { + c.JSON(http.StatusUnprocessableEntity, gin.H{ + "error": "workspace has no callback URL — chat " + op + " requires push-mode + public URL", + "detail": "This workspace registered without a publicly-reachable URL (delivery_mode is not 'push'). The platform cannot dispatch chat uploads to it. Re-register the workspace with a public URL in push mode (e.g. via ngrok / Cloudflare tunnel) to enable chat file " + op + ".", + }) + return "", "", false + } c.JSON(http.StatusServiceUnavailable, gin.H{"error": "workspace url not registered yet"}) return "", "", false } diff --git a/workspace-server/internal/handlers/chat_files_test.go b/workspace-server/internal/handlers/chat_files_test.go index 4556e77a..e7829f45 100644 --- a/workspace-server/internal/handlers/chat_files_test.go +++ b/workspace-server/internal/handlers/chat_files_test.go @@ -58,16 +58,38 @@ func uploadFixture(t *testing.T) (*bytes.Buffer, string) { return &buf, mw.FormDataContentType() } -// expectURL stubs the SELECT that resolves the workspace's url. +// expectURL stubs the SELECT that resolves the workspace's url + +// delivery_mode. Defaults delivery_mode to "push" — most tests don't +// care about the mode and just want a URL to forward to. Use +// expectURLAndMode when the test needs a specific mode (e.g. the +// poll-mode 422 path). func expectURL(mock sqlmock.Sqlmock, workspaceID, url string) { - mock.ExpectQuery(`SELECT COALESCE\(url, ''\) FROM workspaces WHERE id = \$1`). + expectURLAndMode(mock, workspaceID, url, "push") +} + +// expectURLAndMode is the explicit form for tests that need to +// exercise the delivery_mode branch (e.g. poll-mode workspaces get +// a 422 instead of a 503 when URL is empty — the platform can't +// dispatch to a non-push workspace at all). +func expectURLAndMode(mock sqlmock.Sqlmock, workspaceID, url, mode string) { + mock.ExpectQuery(`SELECT COALESCE\(url, ''\), delivery_mode FROM workspaces WHERE id = \$1`). WithArgs(workspaceID). - WillReturnRows(sqlmock.NewRows([]string{"url"}).AddRow(url)) + WillReturnRows(sqlmock.NewRows([]string{"url", "delivery_mode"}).AddRow(url, mode)) +} + +// expectURLNullMode is the production-observed shape: external runtime +// workspaces (molecule-sdk-python on user infra) register with +// delivery_mode = NULL, not "poll". Caught 2026-05-04 — the narrow +// "poll" check missed three of three real workspaces in user reports. +func expectURLNullMode(mock sqlmock.Sqlmock, workspaceID, url string) { + mock.ExpectQuery(`SELECT COALESCE\(url, ''\), delivery_mode FROM workspaces WHERE id = \$1`). + WithArgs(workspaceID). + WillReturnRows(sqlmock.NewRows([]string{"url", "delivery_mode"}).AddRow(url, nil)) } // expectURLMissing stubs the SELECT to return sql.ErrNoRows. func expectURLMissing(mock sqlmock.Sqlmock, workspaceID string) { - mock.ExpectQuery(`SELECT COALESCE\(url, ''\) FROM workspaces WHERE id = \$1`). + mock.ExpectQuery(`SELECT COALESCE\(url, ''\), delivery_mode FROM workspaces WHERE id = \$1`). WithArgs(workspaceID). WillReturnError(sql.ErrNoRows) } @@ -201,9 +223,13 @@ func TestChatUpload_NoURL(t *testing.T) { mock := setupTestDB(t) setupTestRedis(t) - // Workspace registered but URL hasn't been reported yet (mid-boot). + // Workspace registered (push-mode) but URL hasn't been reported + // yet (mid-boot). 503 + "not registered yet" is the right surface — the + // canvas client can retry after the next heartbeat picks up the URL. + // Push mode is the only branch that produces 503; everything else + // (poll, NULL, empty) gets 422 because no amount of waiting helps. wsID := "00000000-0000-0000-0000-000000000042" - expectURL(mock, wsID, "") + expectURLAndMode(mock, wsID, "", "push") h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)) body, ct := uploadFixture(t) @@ -211,7 +237,65 @@ func TestChatUpload_NoURL(t *testing.T) { h.Upload(c) if w.Code != http.StatusServiceUnavailable { - t.Errorf("expected 503 when workspace url empty, got %d: %s", w.Code, w.Body.String()) + t.Errorf("expected 503 when workspace url empty (push mode), got %d: %s", w.Code, w.Body.String()) + } + if !strings.Contains(w.Body.String(), "not registered yet") { + t.Errorf("expected transient-state error message, got: %s", w.Body.String()) + } +} + +// TestChatUpload_PollModeEmptyURL pins the 422 distinguisher: a +// poll-mode workspace has no URL by design, so chat upload (which is +// HTTP-forward to the workspace) cannot succeed by retrying. Returning +// 503 here would loop the canvas client forever; 422 + an actionable +// message tells the user what to do. +func TestChatUpload_PollModeEmptyURL(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + + wsID := "00000000-0000-0000-0000-000000000099" + expectURLAndMode(mock, wsID, "", "poll") + + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)) + body, ct := uploadFixture(t) + c, w := makeUploadRequest(t, wsID, body, ct) + h.Upload(c) + + if w.Code != http.StatusUnprocessableEntity { + t.Fatalf("expected 422 for poll-mode upload, got %d: %s", w.Code, w.Body.String()) + } + if !strings.Contains(w.Body.String(), "push") { + t.Errorf("expected error to suggest push mode, got: %s", w.Body.String()) + } +} + +// TestChatUpload_NullModeEmptyURL — production-observed 2026-05-04: +// external-runtime workspaces (molecule-sdk-python on user infra) +// register with delivery_mode = NULL, not "poll". The earlier narrow +// poll-only check fell through to the misleading 503. The fix is the +// inverse-of-push test: anything not exactly "push" with empty URL +// can't dispatch and gets the actionable 422. +// +// Three of three external workspaces in the user's tenant had this +// shape (home hermes / runner mac mini / mac laptop, all +// runtime=external + url='' + delivery_mode=NULL). +func TestChatUpload_NullModeEmptyURL(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + + wsID := "30ba7f0b-b303-4a20-aefe-3a4a675b8aa4" // user's "mac laptop" + expectURLNullMode(mock, wsID, "") + + h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil)) + body, ct := uploadFixture(t) + c, w := makeUploadRequest(t, wsID, body, ct) + h.Upload(c) + + if w.Code != http.StatusUnprocessableEntity { + t.Fatalf("expected 422 for null-delivery-mode upload, got %d: %s", w.Code, w.Body.String()) + } + if !strings.Contains(w.Body.String(), "callback URL") { + t.Errorf("expected error to mention callback URL, got: %s", w.Body.String()) } }