Distinguish poll-mode workspace from transient empty-URL on chat upload

External-runtime workspaces that register in poll mode have no callback
URL by design — the platform never dispatches to them, so chat upload
(HTTP-forward by design) can't proceed. Returning 503 + "workspace url
not registered yet" was misleading: the "yet" implied transient state,
but the URL would never arrive.

Caught externally on 2026-05-04: user uploading an image to an external
"mac laptop" runtime workspace saw the 503 and assumed they should
retry. The workspace's poll mode meant retrying would never help.

Fix: include delivery_mode in the workspace lookup. When URL is empty:
- poll mode → 422 + "re-register in push mode with a public URL"
  (Unprocessable Entity — this request can't succeed against this
  workspace's configuration; no retry will help)
- push mode → 503 + "not registered yet" (genuine transient state —
  retry after next heartbeat is correct)

Test: TestChatUpload_PollModeEmptyURL pins the new 422 path; existing
TestChatUpload_NoURL strengthened to assert the "not registered yet"
substring stays on the push branch (it would have silently passed if
the new 422 path had clobbered both branches).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hongming Wang 2026-05-04 02:39:22 -07:00
parent 99f6481acc
commit 87ae691e67
2 changed files with 83 additions and 8 deletions

View File

@ -30,6 +30,7 @@ package handlers
import (
"context"
"database/sql"
"fmt"
"io"
"log"
@ -102,14 +103,38 @@ const chatUploadDir = "/workspace/.molecule/chat-uploads"
// of bug as the original SaaS provision drift fixed in #2366; this
// extraction prevents that class on the consumer side.
func resolveWorkspaceForwardCreds(c *gin.Context, ctx context.Context, workspaceID, op string) (wsURL, secret string, ok bool) {
var deliveryMode sql.NullString
if err := db.DB.QueryRowContext(ctx,
`SELECT COALESCE(url, '') FROM workspaces WHERE id = $1`, workspaceID,
).Scan(&wsURL); err != nil {
`SELECT COALESCE(url, ''), delivery_mode FROM workspaces WHERE id = $1`, workspaceID,
).Scan(&wsURL, &deliveryMode); err != nil {
log.Printf("chat_files %s: workspace lookup failed for %s: %v", op, workspaceID, err)
c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
return "", "", false
}
if wsURL == "" {
// Distinguish the two empty-URL classes so the user sees an
// actionable error rather than a misleading "not registered yet"
// (which implies waiting will help):
//
// poll-mode → URL is structurally absent. The platform never
// dispatches to a poll-mode workspace, so chat upload (which
// is HTTP-forward by design) cannot proceed. Returning 503
// here would loop the canvas client forever. 422 signals
// "this request can't succeed against THIS workspace's
// configuration" — the only fix is to re-register the
// workspace in push mode with a publicly-reachable URL.
//
// push-mode → URL just isn't on the row yet (workspace
// restart in progress, or first /registry/register hasn't
// landed). 503 + "not registered yet" is correct — retry
// after the next heartbeat (~30s) will likely succeed.
if deliveryMode.Valid && deliveryMode.String == "poll" {
c.JSON(http.StatusUnprocessableEntity, gin.H{
"error": "workspace is in poll mode — chat " + op + " requires push mode",
"detail": "Poll-mode workspaces have no callback URL the platform can dispatch to. Re-register the workspace with a publicly-reachable URL in push mode (e.g. via ngrok / Cloudflare tunnel) to enable chat file " + op + ".",
})
return "", "", false
}
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "workspace url not registered yet"})
return "", "", false
}

View File

@ -58,16 +58,28 @@ func uploadFixture(t *testing.T) (*bytes.Buffer, string) {
return &buf, mw.FormDataContentType()
}
// expectURL stubs the SELECT that resolves the workspace's url.
// expectURL stubs the SELECT that resolves the workspace's url +
// delivery_mode. Defaults delivery_mode to "push" — most tests don't
// care about the mode and just want a URL to forward to. Use
// expectURLAndMode when the test needs a specific mode (e.g. the
// poll-mode 422 path).
func expectURL(mock sqlmock.Sqlmock, workspaceID, url string) {
mock.ExpectQuery(`SELECT COALESCE\(url, ''\) FROM workspaces WHERE id = \$1`).
expectURLAndMode(mock, workspaceID, url, "push")
}
// expectURLAndMode is the explicit form for tests that need to
// exercise the delivery_mode branch (e.g. poll-mode workspaces get
// a 422 instead of a 503 when URL is empty — the platform can't
// dispatch to a poll-mode workspace at all).
func expectURLAndMode(mock sqlmock.Sqlmock, workspaceID, url, mode string) {
mock.ExpectQuery(`SELECT COALESCE\(url, ''\), delivery_mode FROM workspaces WHERE id = \$1`).
WithArgs(workspaceID).
WillReturnRows(sqlmock.NewRows([]string{"url"}).AddRow(url))
WillReturnRows(sqlmock.NewRows([]string{"url", "delivery_mode"}).AddRow(url, mode))
}
// expectURLMissing stubs the SELECT to return sql.ErrNoRows.
func expectURLMissing(mock sqlmock.Sqlmock, workspaceID string) {
mock.ExpectQuery(`SELECT COALESCE\(url, ''\) FROM workspaces WHERE id = \$1`).
mock.ExpectQuery(`SELECT COALESCE\(url, ''\), delivery_mode FROM workspaces WHERE id = \$1`).
WithArgs(workspaceID).
WillReturnError(sql.ErrNoRows)
}
@ -201,7 +213,9 @@ func TestChatUpload_NoURL(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
// Workspace registered but URL hasn't been reported yet (mid-boot).
// Workspace registered (push-mode default) but URL hasn't been reported
// yet (mid-boot). 503 + "not registered yet" is the right surface — the
// canvas client can retry after the next heartbeat picks up the URL.
wsID := "00000000-0000-0000-0000-000000000042"
expectURL(mock, wsID, "")
@ -211,7 +225,43 @@ func TestChatUpload_NoURL(t *testing.T) {
h.Upload(c)
if w.Code != http.StatusServiceUnavailable {
t.Errorf("expected 503 when workspace url empty, got %d: %s", w.Code, w.Body.String())
t.Errorf("expected 503 when workspace url empty (push mode), got %d: %s", w.Code, w.Body.String())
}
if !strings.Contains(w.Body.String(), "not registered yet") {
t.Errorf("expected transient-state error message, got: %s", w.Body.String())
}
}
// TestChatUpload_PollModeEmptyURL pins the 422 distinguisher: a
// poll-mode workspace has no URL by design, so chat upload (which is
// HTTP-forward to the workspace) cannot succeed by retrying. Returning
// 503 here would loop the canvas client forever; 422 + an actionable
// message ("re-register in push mode") tells the user what to do.
//
// Caught externally on 2026-05-04 — user reported "Upload failed: 503
// {error: workspace url not registered yet}" on an external runtime
// workspace (mac laptop, no public URL). The "yet" implied transient,
// but the workspace's poll-mode meant the URL would never arrive.
func TestChatUpload_PollModeEmptyURL(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
wsID := "00000000-0000-0000-0000-000000000099"
expectURLAndMode(mock, wsID, "", "poll")
h := NewChatFilesHandler(NewTemplatesHandler(t.TempDir(), nil))
body, ct := uploadFixture(t)
c, w := makeUploadRequest(t, wsID, body, ct)
h.Upload(c)
if w.Code != http.StatusUnprocessableEntity {
t.Fatalf("expected 422 for poll-mode upload, got %d: %s", w.Code, w.Body.String())
}
if !strings.Contains(w.Body.String(), "poll mode") {
t.Errorf("expected error to mention poll mode, got: %s", w.Body.String())
}
if !strings.Contains(w.Body.String(), "push mode") {
t.Errorf("expected error to suggest re-registering in push mode, got: %s", w.Body.String())
}
}