diff --git a/workspace-server/internal/handlers/terminal_diagnose.go b/workspace-server/internal/handlers/terminal_diagnose.go index e40f6e19..b78c8955 100644 --- a/workspace-server/internal/handlers/terminal_diagnose.go +++ b/workspace-server/internal/handlers/terminal_diagnose.go @@ -1,19 +1,48 @@ package handlers import ( + "bytes" "context" "fmt" "net/http" "os" "os/exec" "strings" + "sync" "time" "github.com/Molecule-AI/molecule-monorepo/platform/internal/db" "github.com/Molecule-AI/molecule-monorepo/platform/internal/provisioner" + "github.com/Molecule-AI/molecule-monorepo/platform/internal/wsauth" "github.com/gin-gonic/gin" ) +// syncBuf is a goroutine-safe writer that wraps bytes.Buffer with a mutex. +// Used to capture subprocess stderr without racing the os/exec stderr-copy +// goroutine: ``cmd.Stderr = io.Writer`` spawns a background goroutine that +// reads from the subprocess's stderr fd and calls Write on our writer, so +// reading the buffer from another goroutine (e.g., on wait-for-port +// timeout while the tunnel may still be writing) without synchronization +// is a data race that ``go test -race`` would flag. ``strings.Builder`` +// and bare ``bytes.Buffer`` aren't goroutine-safe; this tiny shim is the +// cheapest fix. +type syncBuf struct { + mu sync.Mutex + b bytes.Buffer +} + +func (s *syncBuf) Write(p []byte) (int, error) { + s.mu.Lock() + defer s.mu.Unlock() + return s.b.Write(p) +} + +func (s *syncBuf) String() string { + s.mu.Lock() + defer s.mu.Unlock() + return s.b.String() +} + // HandleDiagnose handles GET /workspaces/:id/terminal/diagnose. It runs the // same per-step pipeline as HandleConnect (ssh-keygen → EIC send-key → tunnel // → ssh) but non-interactively, captures the first failing step and its @@ -45,6 +74,29 @@ func (h *TerminalHandler) HandleDiagnose(c *gin.Context) { ctx, cancel := context.WithTimeout(c.Request.Context(), 30*time.Second) defer cancel() + // KI-005 hierarchy check — same shape as HandleConnect. Without this, + // an org-level token holder can probe any workspace in their tenant by + // guessing the UUID, learning its diagnostic state (which IAM call + // fails, what sshd says) even when they don't own it. Per-workspace + // bearer tokens are already URL-bound by WorkspaceAuth, so the gap is + // org tokens — same vector KI-005 closed for /terminal (#1609). + callerID := c.GetHeader("X-Workspace-ID") + if callerID != "" && callerID != workspaceID { + tok := wsauth.BearerTokenFromHeader(c.GetHeader("Authorization")) + if tok != "" { + if err := wsauth.ValidateToken(ctx, db.DB, callerID, tok); err != nil { + if c.GetString("org_token_id") == "" { + c.JSON(http.StatusUnauthorized, gin.H{"error": "invalid token for claimed workspace"}) + return + } + } + } + if !canCommunicateCheck(callerID, workspaceID) { + c.JSON(http.StatusForbidden, gin.H{"error": "not authorized to diagnose this workspace's terminal"}) + return + } + } + var instanceID string _ = db.DB.QueryRowContext(ctx, `SELECT COALESCE(instance_id, '') FROM workspaces WHERE id = $1`, @@ -155,8 +207,8 @@ func (h *TerminalHandler) diagnoseRemote(ctx context.Context, workspaceID, insta pubKey, err := os.ReadFile(keyPath + ".pub") if err != nil { - return stop("ssh-keygen", diagnoseStep{ - Name: "ssh-keygen", + return stop("read-pubkey", diagnoseStep{ + Name: "read-pubkey", Error: fmt.Sprintf("read pubkey: %v", err), }) } @@ -201,7 +253,7 @@ func (h *TerminalHandler) diagnoseRemote(ctx context.Context, workspaceID, insta t0 = time.Now() tunnel := openTunnelCmd(opts) tunnel.Env = os.Environ() - var tunnelStderr strings.Builder + var tunnelStderr syncBuf tunnel.Stderr = &tunnelStderr if err := tunnel.Start(); err != nil { return stop("open-tunnel", diagnoseStep{ diff --git a/workspace-server/internal/handlers/terminal_diagnose_test.go b/workspace-server/internal/handlers/terminal_diagnose_test.go index 5cf672fe..15b94945 100644 --- a/workspace-server/internal/handlers/terminal_diagnose_test.go +++ b/workspace-server/internal/handlers/terminal_diagnose_test.go @@ -6,6 +6,7 @@ import ( "errors" "net/http/httptest" "os/exec" + "strconv" "testing" "github.com/DATA-DOG/go-sqlmock" @@ -111,6 +112,53 @@ func TestHandleDiagnose_RoutesToLocal(t *testing.T) { } } +// TestHandleDiagnose_KI005_RejectsCrossWorkspace — the diagnostic endpoint +// has the same cross-workspace info-leak surface as /terminal had before +// #1609. Without KI-005, an org-level token holder could probe any +// workspace in their tenant by guessing the UUID, learning which IAM call +// fails or which sshd error fires. This test pins that HandleDiagnose +// applies the same hierarchy guard as HandleConnect (parity: ws-attacker +// claiming X-Workspace-ID against /workspaces/ws-victim/terminal/diagnose +// must 403, never reaching the SELECT COALESCE for instance_id). +func TestHandleDiagnose_KI005_RejectsCrossWorkspace(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + + // Stub CanCommunicate to deny. Reset after — same pattern as the + // HandleConnect KI-005 tests. + prev := canCommunicateCheck + canCommunicateCheck = func(callerID, targetID string) bool { return false } + defer func() { canCommunicateCheck = prev }() + + // Token validation: caller's bearer is bound to ws-attacker. + mock.ExpectQuery(`SELECT t\.id, t\.workspace_id\s+FROM workspace_auth_tokens t`). + WithArgs(sqlmock.AnyArg()). + WillReturnRows(sqlmock.NewRows([]string{"id", "workspace_id"}).AddRow("tok-1", "ws-attacker")) + mock.ExpectExec(`UPDATE workspace_auth_tokens SET last_used_at`). + WithArgs(sqlmock.AnyArg()). + WillReturnResult(sqlmock.NewResult(0, 1)) + + h := NewTerminalHandler(nil) + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: "ws-victim"}} + c.Request = httptest.NewRequest("GET", "/workspaces/ws-victim/terminal/diagnose", nil) + c.Request.Header.Set("X-Workspace-ID", "ws-attacker") + c.Request.Header.Set("Authorization", "Bearer attacker-token") + + h.HandleDiagnose(c) + + if w.Code != 403 { + t.Errorf("cross-workspace diagnose: got %d, want 403 (%s)", w.Code, w.Body.String()) + } + // Critically: the SELECT COALESCE for instance_id must NOT have run — + // no expectation was set for it. ExpectationsWereMet ensures we + // rejected before reaching the DB lookup. + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations (rejection should fire before instance_id lookup): %v", err) + } +} + // TestDiagnoseRemote_StopsAtSSHProbe — full happy path through send-key, // pick-port, open-tunnel, wait-for-port, then stub the ssh probe to fail. // Confirms first_failure surfaces the actual ssh stderr ("Permission @@ -144,7 +192,7 @@ func TestDiagnoseRemote_StopsAtSSHProbe(t *testing.T) { // fall back to a portable busy-wait). return exec.Command("sh", "-c", `port="$1"; while true; do nc -l "$port" >/dev/null 2>&1 || true; done`, - "sh", numToString(o.LocalPort)) + "sh", strconv.Itoa(o.LocalPort)) } defer func() { openTunnelCmd = prevTun }() @@ -197,26 +245,3 @@ func TestDiagnoseRemote_StopsAtSSHProbe(t *testing.T) { } } -// numToString is a tiny helper to avoid pulling fmt into the test for one -// integer-to-string call. Same observable behavior as strconv.Itoa. -func numToString(n int) string { - if n == 0 { - return "0" - } - var buf [20]byte - i := len(buf) - neg := n < 0 - if neg { - n = -n - } - for n > 0 { - i-- - buf[i] = byte('0' + n%10) - n /= 10 - } - if neg { - i-- - buf[i] = '-' - } - return string(buf[i:]) -}