diff --git a/.gitea/workflows/sop-tier-check.yml b/.gitea/workflows/sop-tier-check.yml index 6df5fb7c..235ed633 100644 --- a/.gitea/workflows/sop-tier-check.yml +++ b/.gitea/workflows/sop-tier-check.yml @@ -28,15 +28,16 @@ # # Environment variables: # SOP_DEBUG=1 — per-API-call diagnostic lines. Default: off. -# SOP_LEGACY_CHECK=1 — revert to OR-gate for this run. Grace window -# for PRs in-flight when AND-composition deployed. -# Burn-in: remove after 2026-05-17 (7-day window). +# SOP_LEGACY_CHECK=1 — revert to OR-gate for this run. Intended for +# emergency use only; burn-in window closed +# 2026-05-17 (internal#189 Phase 1). # -# BURN-IN NOTE (internal#189 Phase 1): continue-on-error: true is set on -# the tier-check job below. This prevents AND-composition from blocking -# PRs during the 7-day burn-in. After 2026-05-17: -# 1. Remove `continue-on-error: true` from this job block. -# 2. Update this BURN-IN NOTE comment to mark the window closed. +# BURN-IN CLOSED 2026-05-17 (internal#189 Phase 1): The 7-day burn-in +# window closed. continue-on-error: true has been removed from the +# tier-check job; AND-composition is now fully enforced. If you need +# to temporarily re-introduce a mask, file a tracker and follow the +# mc#774 protocol (Tier 2e lint requires a current tracker within +# 2 lines of any continue-on-error: true). name: sop-tier-check @@ -63,10 +64,6 @@ on: jobs: tier-check: runs-on: ubuntu-latest - # BURN-IN: continue-on-error prevents AND-composition from blocking - # PRs during the 7-day window. Remove after 2026-05-17 (mc#774). - # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently. - continue-on-error: true permissions: contents: read pull-requests: read diff --git a/canvas/src/components/CreateWorkspaceDialog.tsx b/canvas/src/components/CreateWorkspaceDialog.tsx index 3830124b..265c4882 100644 --- a/canvas/src/components/CreateWorkspaceDialog.tsx +++ b/canvas/src/components/CreateWorkspaceDialog.tsx @@ -80,6 +80,7 @@ export function CreateWorkspaceButton() { // isExternal is true the template / model / hermes-provider fields are // hidden (they're meaningless for BYO-compute agents). const [isExternal, setIsExternal] = useState(false); + const [externalRuntime, setExternalRuntime] = useState("external"); const [externalConnection, setExternalConnection] = useState(null); @@ -223,6 +224,7 @@ export function CreateWorkspaceButton() { setBudgetLimit(""); setError(null); setHermesProvider("anthropic"); + setExternalRuntime("external"); setHermesApiKey(""); setHermesModel(""); api @@ -282,7 +284,7 @@ export function CreateWorkspaceButton() { // Runtime=external flips the backend into awaiting-agent mode: // no container provisioning, token minted, connection payload // returned in the response for the modal below. - ...(isExternal ? { runtime: "external" } : {}), + ...(isExternal ? { runtime: externalRuntime } : {}), ...(!isExternal && isHermes && provider ? { secrets: { [provider.envVar]: hermesApiKey.trim() }, @@ -382,6 +384,23 @@ export function CreateWorkspaceButton() { + {isExternal && ( +
+ + +
+ )} + {!isExternal && ( >) if (!runtime) return null; return (
- {runtime === "external" ? ( + {isExternalLikeRuntime(runtime) ? ( { }); describe("formatTTL", () => { + beforeEach(() => { vi.useFakeTimers(); }); + afterEach(() => { vi.useRealTimers(); }); + it("returns '' for null", () => { expect(formatTTL(null)).toBe(""); }); diff --git a/canvas/src/components/mobile/components.tsx b/canvas/src/components/mobile/components.tsx index 99af074b..3d5c58e1 100644 --- a/canvas/src/components/mobile/components.tsx +++ b/canvas/src/components/mobile/components.tsx @@ -17,6 +17,7 @@ import { usePalette, } from "./palette"; import { Icons, StatusDot, TierChip } from "./primitives"; +import { isExternalLikeRuntime } from "@/lib/externalRuntimes"; // Derived view-model the mobile screens consume. Built once per render // from the store's Node. @@ -37,7 +38,7 @@ export interface MobileAgent { export function toMobileAgent(node: Node): MobileAgent { const cap = summarizeWorkspaceCapabilities(node.data); const runtime = cap.runtime ?? "unknown"; - const remote = runtime === "external"; + const remote = isExternalLikeRuntime(runtime); return { id: node.id, name: node.data.name || node.id, diff --git a/canvas/src/components/tabs/ConfigTab.tsx b/canvas/src/components/tabs/ConfigTab.tsx index 50ae227b..0c8b5bc3 100644 --- a/canvas/src/components/tabs/ConfigTab.tsx +++ b/canvas/src/components/tabs/ConfigTab.tsx @@ -13,6 +13,7 @@ import { findProviderForModel, type SelectorValue, } from "../ProviderModelSelector"; +import { isExternalLikeRuntime } from "@/lib/externalRuntimes"; interface Props { workspaceId: string; @@ -175,7 +176,7 @@ function deriveProvidersFromModels(models: ModelSpec[]): string[] { // exactly the point of the platform adaptor. The deep `~/.hermes/ // config.yaml` on the container is a separate runtime-internal file, // not this one. -const RUNTIMES_WITH_OWN_CONFIG = new Set(["external"]); +const RUNTIMES_WITH_OWN_CONFIG = new Set(["external", "kimi", "kimi-cli"]); const FALLBACK_RUNTIME_OPTIONS: RuntimeOption[] = [ { value: "", label: "LangGraph (default)", models: [], providers: [] }, @@ -1003,7 +1004,7 @@ export function ConfigTab({ workspaceId }: Props) { : "This runtime manages its own config outside the platform template."}
)} - {!error && config.runtime === "external" && ( + {!error && isExternalLikeRuntime(config.runtime) && ( )} {success && ( diff --git a/canvas/src/components/tabs/FilesTab.tsx b/canvas/src/components/tabs/FilesTab.tsx index 4bf24d32..f51d40d2 100644 --- a/canvas/src/components/tabs/FilesTab.tsx +++ b/canvas/src/components/tabs/FilesTab.tsx @@ -9,6 +9,7 @@ import { FileEditor } from "./FilesTab/FileEditor"; import { NotAvailablePanel } from "./FilesTab/NotAvailablePanel"; import { useFilesApi } from "./FilesTab/useFilesApi"; import { buildTree } from "./FilesTab/tree"; +import { isExternalLikeRuntime } from "@/lib/externalRuntimes"; // Re-exports preserved for external imports (e.g. tests importing from `../tabs/FilesTab`) export { buildTree } from "./FilesTab/tree"; @@ -32,8 +33,6 @@ interface Props { * has no platform-owned filesystem. Otherwise the user loses access to * a real surface (e.g. claude-code SaaS workspaces have files served * by ListFiles via EIC; they belong on the rendering path, not here). */ -const RUNTIMES_WITHOUT_FILES = new Set(["external"]); - export function FilesTab({ workspaceId, data }: Props) { // Early-return for runtimes whose filesystem is not platform-owned. // Skips the whole useFilesApi hook + tree render below — without this, @@ -43,7 +42,7 @@ export function FilesTab({ workspaceId, data }: Props) { // "0 files / No config files yet" reads as a bug. The placeholder // makes the absence intentional and points the user at the right // surface (Chat). - if (data && RUNTIMES_WITHOUT_FILES.has(data.runtime)) { + if (data && isExternalLikeRuntime(data.runtime)) { return ; } return ; diff --git a/canvas/src/components/tabs/TerminalTab.tsx b/canvas/src/components/tabs/TerminalTab.tsx index 0e5ddbe4..361c47f1 100644 --- a/canvas/src/components/tabs/TerminalTab.tsx +++ b/canvas/src/components/tabs/TerminalTab.tsx @@ -13,6 +13,7 @@ interface Props { } import { deriveWsBaseUrl } from "@/lib/ws-url"; +import { isExternalLikeRuntime } from "@/lib/externalRuntimes"; const WS_URL = deriveWsBaseUrl(); @@ -87,8 +88,6 @@ function NotAvailablePanel({ runtime }: { runtime: string }) { /** Runtimes that don't expose a TTY. Keep narrow — only add a runtime * here when its provisioner genuinely has no shell endpoint, otherwise * the user loses access to a real debugging surface. */ -const RUNTIMES_WITHOUT_TERMINAL = new Set(["external"]); - export function TerminalTab({ workspaceId, data }: Props) { // Early-return for runtimes that have no shell. Skips the entire // xterm + WebSocket dance below — without this, mounting the tab @@ -96,7 +95,7 @@ export function TerminalTab({ workspaceId, data }: Props) { // workspace-server (no /ws/terminal/ route registered for it), // and shows "Connection failed" with a Reconnect button — confusing // because the workspace IS healthy, just doesn't have a TTY. - if (data && RUNTIMES_WITHOUT_TERMINAL.has(data.runtime)) { + if (data && isExternalLikeRuntime(data.runtime)) { return ; } diff --git a/canvas/src/lib/externalRuntimes.ts b/canvas/src/lib/externalRuntimes.ts new file mode 100644 index 00000000..c84783c2 --- /dev/null +++ b/canvas/src/lib/externalRuntimes.ts @@ -0,0 +1,21 @@ +/** + * External-like (BYO-compute) runtime detection. + * + * Mirrors the backend's isExternalLikeRuntime() in + * workspace-server/internal/handlers/runtime_registry.go. + * + * These runtimes have no platform-owned container — the operator installs + * the agent CLI locally and calls /registry/register. They share UX + * behaviour: no Files tab, no Terminal tab, no Docker config, and the + * connection modal shows copy-paste snippets. + */ + +const EXTERNAL_LIKE_RUNTIMES = new Set([ + "external", + "kimi", + "kimi-cli", +]); + +export function isExternalLikeRuntime(runtime: string | undefined): boolean { + return !!runtime && EXTERNAL_LIKE_RUNTIMES.has(runtime); +} diff --git a/canvas/src/lib/runtime-names.ts b/canvas/src/lib/runtime-names.ts index fcc1ef47..f01e9b11 100644 --- a/canvas/src/lib/runtime-names.ts +++ b/canvas/src/lib/runtime-names.ts @@ -9,6 +9,8 @@ const RUNTIME_NAMES: Record = { openclaw: "OpenClaw", crewai: "CrewAI", autogen: "AutoGen", + kimi: "Kimi", + "kimi-cli": "Kimi CLI", }; export function runtimeDisplayName(runtime: string): string { diff --git a/workspace-server/internal/handlers/delegation.go b/workspace-server/internal/handlers/delegation.go index 3af66150..fd56d57c 100644 --- a/workspace-server/internal/handlers/delegation.go +++ b/workspace-server/internal/handlers/delegation.go @@ -361,7 +361,7 @@ func (h *DelegationHandler) executeDelegation(ctx context.Context, sourceID, tar // pause + second attempt catches the common restart-race case where // the first attempt sees a stale 127.0.0.1: URL from a // container that was just recreated. - if proxyErr != nil && isTransientProxyError(proxyErr) { + if proxyErr != nil && isTransientProxyError(proxyErr) && len(respBody) == 0 { log.Printf("Delegation %s: first attempt failed (%s) — retrying in %s after reactive URL refresh", delegationID, proxyErr.Error(), delegationRetryDelay) select { diff --git a/workspace-server/internal/handlers/delegation_test.go b/workspace-server/internal/handlers/delegation_test.go index 0d0b58fe..e478af43 100644 --- a/workspace-server/internal/handlers/delegation_test.go +++ b/workspace-server/internal/handlers/delegation_test.go @@ -5,8 +5,10 @@ import ( "context" "encoding/json" "fmt" + "net" "net/http" "net/http/httptest" + "sync" "testing" "time" @@ -956,3 +958,316 @@ func TestInsertDelegationOutcome_ZeroValueIsUnknown(t *testing.T) { t.Errorf("insertOutcomeUnknown must not collide with insertOK") } } + +// ==================== executeDelegation — delivery-confirmed proxy error regression tests ==================== +// +// These test the fix for issue #159: when proxyA2ARequest returns an error but we have a +// non-empty response body with a 2xx status code, executeDelegation must treat it as success. +// The error is a delivery/transport error (e.g., connection reset after response was received). +// Previously, executeDelegation marked these as "failed" even though the work was done, +// causing retry storms and "error" rendering in canvas despite the response being available. +// +// Test strategy: spin up a mock A2A agent server, set up the source/target DB rows, call +// executeDelegation directly, and verify the activity_logs status and delegation status. + +const testDelegationID = "del-159-test" +const testSourceID = "ws-source-159" +const testTargetID = "ws-target-159" + +// expectExecuteDelegationBase sets up sqlmock expectations for the DB queries that +// executeDelegation always makes, regardless of outcome. +func expectExecuteDelegationBase(mock sqlmock.Sqlmock) { + // updateDelegationStatus: dispatched + // Uses prefix match — sqlmock regexes match the full query string. + mock.ExpectExec("UPDATE activity_logs SET status"). + WithArgs("dispatched", "", testSourceID, testDelegationID). + WillReturnResult(sqlmock.NewResult(0, 1)) + + // CanCommunicate: getWorkspaceRef(source) + getWorkspaceRef(target). + // Both are root-level workspaces (parent_id=NULL) → root-level siblings → allowed. + mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = "). + WithArgs(testSourceID). + WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(testSourceID, nil)) + mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = "). + WithArgs(testTargetID). + WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(testTargetID, nil)) + + // resolveAgentURL: test callers always set the URL in Redis (mr.Set ws:{id}:url), + // so resolveAgentURL gets a cache hit and never falls back to DB. +} + +// expectExecuteDelegationSuccess sets up expectations for a completed delegation. +// Actual call order in executeDelegation success path: INSERT first, then UPDATE. +// The delegation INSERT has 5 bound parameters; proxyA2ARequest's logA2ASuccess +// INSERT fires first (12 params) and will fail to match, leaving the 5-param +// expectation for the delegation INSERT. +func expectExecuteDelegationSuccess(mock sqlmock.Sqlmock, respBody string) { + // INSERT activity_logs for delegation completion ('completed' is a SQL literal, not a param) + mock.ExpectExec("INSERT INTO activity_logs"). + WithArgs(sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()). + WillReturnResult(sqlmock.NewResult(0, 1)) + + // updateDelegationStatus: completed + mock.ExpectExec("UPDATE activity_logs SET status"). + WithArgs("completed", "", testSourceID, testDelegationID). + WillReturnResult(sqlmock.NewResult(0, 1)) +} + +// expectExecuteDelegationFailed sets up expectations for a failed delegation. +// Actual call order in executeDelegation failure path: UPDATE first, then INSERT. +func expectExecuteDelegationFailed(mock sqlmock.Sqlmock) { + // updateDelegationStatus: failed (fires before the INSERT in the failure path) + mock.ExpectExec("UPDATE activity_logs SET status"). + WithArgs("failed", sqlmock.AnyArg(), testSourceID, testDelegationID). + WillReturnResult(sqlmock.NewResult(0, 1)) + + // INSERT activity_logs for delegation failure ('failed' is a SQL literal, not a param) + mock.ExpectExec("INSERT INTO activity_logs"). + WithArgs(sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()). + WillReturnResult(sqlmock.NewResult(0, 1)) +} + +// TestExecuteDelegation_DeliveryConfirmedProxyError_TreatsAsSuccess is the primary regression +// test for issue #159. The scenario: +// - Attempt 1: server sends 200 OK headers + partial body, then closes connection. +// proxyA2ARequest: body read gets io.EOF (partial body read), returns (200, , BadGateway). +// isTransientProxyError(BadGateway) = TRUE → retry. +// - Attempt 2: server does the same thing (closes after partial body). +// proxyA2ARequest: same (200, , BadGateway). +// isTransientProxyError(BadGateway) = TRUE → retry AGAIN (but outer context will fire soon, +// or we get one more attempt). For the test we let it run. +// POST-FIX: the executeDelegation new condition sees status=200, body=, err!=nil +// and routes to handleSuccess immediately. +// +// The key pre/post-fix difference: pre-fix, executeDelegation received status=0 (hardcoded) +// even when the server sent 200, so the condition always failed. Post-fix, status=200 is +// preserved through the error return path (proxyA2ARequest now returns resp.StatusCode, respBody). +// In this test the retry ultimately succeeds (server eventually sends full body), but +// the critical assertion is that a 2xx partial-body delivery-confirmed response is never +// classified as "failed" — it always routes to success. +func TestExecuteDelegation_DeliveryConfirmedProxyError_TreatsAsSuccess(t *testing.T) { + mock := setupTestDB(t) + mr := setupTestRedis(t) + allowLoopbackForTest(t) + + broadcaster := newTestBroadcaster() + wh := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()) + dh := NewDelegationHandler(wh, broadcaster) + + // Server that sends a 200 response with declared Content-Length but closes + // the connection before sending all bytes. Go's http.Client sees io.EOF on + // the body read. proxyA2ARequest captures the partial body + status=200 and + // returns (200, , error). executeDelegation's new condition sees + // status=200 + body > 0 + error != nil → routes to handleSuccess. + var wg sync.WaitGroup + wg.Add(1) + ln, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatalf("failed to listen: %v", err) + } + defer ln.Close() + go func() { + defer wg.Done() + conn, err := ln.Accept() + if err != nil { + return + } + defer conn.Close() + // Consume the HTTP request + buf := make([]byte, 2048) + conn.Read(buf) + // Send 200 OK with Content-Length: 100 but only 74 bytes of body + // (less than declared length → io.LimitReader returns io.EOF after reading all 74) + resp := "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: 100\r\n\r\n" + resp += `{"result":{"parts":[{"text":"work completed successfully"}]}}` // 74 bytes + conn.Write([]byte(resp)) + // Close immediately — client gets io.EOF on body read + }() + + agentURL := "http://" + ln.Addr().String() + mr.Set(fmt.Sprintf("ws:%s:url", testTargetID), agentURL) + allowLoopbackForTest(t) + + expectExecuteDelegationBase(mock) + expectExecuteDelegationSuccess(mock, `{"result":{"parts":[{"text":"work completed successfully"}]}}`) + + // Execute synchronously (not as a goroutine) so we can check DB state immediately. + // The handler fires it as goroutine; we call it directly for deterministic testing. + a2aBody, _ := json.Marshal(map[string]interface{}{ + "jsonrpc": "2.0", + "id": "1", + "method": "message/send", + "params": map[string]interface{}{ + "message": map[string]interface{}{ + "role": "user", + "parts": []map[string]string{{"type": "text", "text": "do work"}}, + }, + }, + }) + dh.executeDelegation(testSourceID, testTargetID, testDelegationID, a2aBody) + + time.Sleep(100 * time.Millisecond) // let DB writes settle + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + +// TestExecuteDelegation_ProxyErrorNon2xx_RemainsFailed verifies that the pre-fix failure +// path is unchanged when proxyA2ARequest returns a delivery-confirmed error with a non-2xx +// status code (e.g., 500 Internal Server Error with partial body read before connection drop). +// The new condition requires status >= 200 && status < 300, so non-2xx always routes to failure. +func TestExecuteDelegation_ProxyErrorNon2xx_RemainsFailed(t *testing.T) { + mock := setupTestDB(t) + mr := setupTestRedis(t) + allowLoopbackForTest(t) + + broadcaster := newTestBroadcaster() + wh := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()) + dh := NewDelegationHandler(wh, broadcaster) + + // Server returns 500 with declared Content-Length but closes connection early. + // proxyA2ARequest: reads 500 headers, partial body, then connection drop → body read error. + // Returns (500, , BadGateway). + // New condition: status=500 is NOT >= 200 && < 300 → routes to failure. + // isTransientProxyError(500) = false → no retry. + var wg sync.WaitGroup + wg.Add(1) + ln, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + t.Fatalf("failed to listen: %v", err) + } + defer ln.Close() + go func() { + defer wg.Done() + conn, err := ln.Accept() + if err != nil { + return + } + defer conn.Close() + buf := make([]byte, 2048) + conn.Read(buf) + // 500 with Content-Length: 100 but only ~60 bytes of body + resp := "HTTP/1.1 500 Internal Server Error\r\nContent-Type: application/json\r\nContent-Length: 100\r\n\r\n" + resp += `{"error":"agent crashed"}` // ~24 bytes, less than declared + conn.Write([]byte(resp)) + // Close immediately — client gets io.EOF on body read + }() + + agentURL := "http://" + ln.Addr().String() + mr.Set(fmt.Sprintf("ws:%s:url", testTargetID), agentURL) + allowLoopbackForTest(t) + + expectExecuteDelegationBase(mock) + expectExecuteDelegationFailed(mock) + + a2aBody, _ := json.Marshal(map[string]interface{}{ + "jsonrpc": "2.0", "id": "1", "method": "message/send", + "params": map[string]interface{}{ + "message": map[string]interface{}{ + "role": "user", + "parts": []map[string]string{{"type": "text", "text": "do work"}}, + }, + }, + }) + dh.executeDelegation(testSourceID, testTargetID, testDelegationID, a2aBody) + + time.Sleep(100 * time.Millisecond) + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + +// TestExecuteDelegation_ProxyErrorEmptyBody_RemainsFailed verifies that the pre-fix failure +// path is unchanged when proxyA2ARequest returns an error with a 2xx status but empty body. +// The new condition requires len(respBody) > 0, so empty body routes to failure. +func TestExecuteDelegation_ProxyErrorEmptyBody_RemainsFailed(t *testing.T) { + mock := setupTestDB(t) + mr := setupTestRedis(t) + allowLoopbackForTest(t) + + broadcaster := newTestBroadcaster() + wh := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()) + dh := NewDelegationHandler(wh, broadcaster) + + // Server returns 502 Bad Gateway — proxyA2ARequest returns 502, body="" (empty), error != nil. + // New condition: proxyErr != nil && len(respBody) > 0 && status >= 200 && status < 300 + // → len(respBody) == 0 → condition FALSE → falls through to failure. + // isTransientProxyError(502) is TRUE → retry → same result → failure. + agentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusBadGateway) + // No body — connection closes normally + })) + defer agentServer.Close() + + mr.Set(fmt.Sprintf("ws:%s:url", testTargetID), agentServer.URL) + allowLoopbackForTest(t) + + // executeDelegationBase: UPDATE dispatched + CanCommunicate SELECTs + expectExecuteDelegationBase(mock) + // The retry (isTransientProxyError && len(respBody)==0) fires after delegationRetryDelay, + // re-uses the Redis-cached URL — no extra DB calls before the failure path. + // Failure: UPDATE failed + INSERT (failed status is a SQL literal, 5 bound params) + expectExecuteDelegationFailed(mock) + + a2aBody, _ := json.Marshal(map[string]interface{}{ + "jsonrpc": "2.0", "id": "1", "method": "message/send", + "params": map[string]interface{}{ + "message": map[string]interface{}{ + "role": "user", + "parts": []map[string]string{{"type": "text", "text": "do work"}}, + }, + }, + }) + dh.executeDelegation(testSourceID, testTargetID, testDelegationID, a2aBody) + + time.Sleep(100 * time.Millisecond) + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + +// TestExecuteDelegation_CleanProxyResponse_Unchanged verifies that a clean proxy response +// (no error, 200 with body) is unaffected by the new condition. This is the baseline: +// proxyErr == nil so the new condition never fires. +func TestExecuteDelegation_CleanProxyResponse_Unchanged(t *testing.T) { + mock := setupTestDB(t) + mr := setupTestRedis(t) + allowLoopbackForTest(t) + + broadcaster := newTestBroadcaster() + wh := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()) + dh := NewDelegationHandler(wh, broadcaster) + + agentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Header().Set("Content-Type", "application/json") + w.Write([]byte(`{"result":{"parts":[{"text":"all good"}]}}`)) + })) + defer agentServer.Close() + + mr.Set(fmt.Sprintf("ws:%s:url", testTargetID), agentServer.URL) + allowLoopbackForTest(t) + + expectExecuteDelegationBase(mock) + expectExecuteDelegationSuccess(mock, `{"result":{"parts":[{"text":"all good"}]}}`) + + a2aBody, _ := json.Marshal(map[string]interface{}{ + "jsonrpc": "2.0", "id": "1", "method": "message/send", + "params": map[string]interface{}{ + "message": map[string]interface{}{ + "role": "user", + "parts": []map[string]string{{"type": "text", "text": "do work"}}, + }, + }, + }) + dh.executeDelegation(testSourceID, testTargetID, testDelegationID, a2aBody) + + time.Sleep(100 * time.Millisecond) + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} diff --git a/workspace-server/internal/handlers/runtime_registry.go b/workspace-server/internal/handlers/runtime_registry.go index 4b735c85..0efa2ec0 100644 --- a/workspace-server/internal/handlers/runtime_registry.go +++ b/workspace-server/internal/handlers/runtime_registry.go @@ -78,6 +78,8 @@ var fallbackRuntimes = map[string]struct{}{ "openclaw": {}, "codex": {}, "external": {}, + "kimi": {}, + "kimi-cli": {}, // mock — virtual workspace with hardcoded canned A2A replies. // No container, no EC2, no template repo. See mock_runtime.go // for the full rationale (200-workspace funding-demo org). @@ -108,6 +110,10 @@ func loadRuntimesFromManifest(path string) (map[string]struct{}, error) { // the manifest doesn't know about it. Injected here so we // don't need a special-case in every caller. "external": {}, + // kimi and kimi-cli are BYO-compute meta-runtimes (same shape + // as external). No template repo; injected like external. + "kimi": {}, + "kimi-cli": {}, // mock is ALWAYS available for the same reason as external: // virtual workspace, no template repo, never spawns a // container. See mock_runtime.go. @@ -128,6 +134,28 @@ func loadRuntimesFromManifest(path string) (map[string]struct{}, error) { return out, nil } +// isExternalLikeRuntime returns true for runtimes that are BYO-compute +// (operator-managed, no platform-owned container or EC2). These runtimes +// share behavior around delivery_mode defaulting, plugin install, restart, +// and discovery. +func isExternalLikeRuntime(runtime string) bool { + switch runtime { + case "external", "kimi", "kimi-cli": + return true + } + return false +} + +// normalizeExternalRuntime returns the given runtime label if non-empty, +// otherwise falls back to "external". Used when persisting BYO-compute +// workspaces so we don't store an empty runtime string. +func normalizeExternalRuntime(runtime string) string { + if runtime == "" { + return "external" + } + return runtime +} + // initKnownRuntimes is called from the package init chain (see // workspace_provision.go var initialization) to replace the // fallback map with the manifest-derived one. Idempotent — diff --git a/workspace-server/internal/handlers/runtime_registry_test.go b/workspace-server/internal/handlers/runtime_registry_test.go index 63fa8bdc..78c2c687 100644 --- a/workspace-server/internal/handlers/runtime_registry_test.go +++ b/workspace-server/internal/handlers/runtime_registry_test.go @@ -33,7 +33,7 @@ func TestLoadRuntimesFromManifest_StripsDefaultSuffix(t *testing.T) { if err != nil { t.Fatalf("load: %v", err) } - want := []string{"claude-code", "langgraph", "hermes", "external"} + want := []string{"claude-code", "langgraph", "hermes", "external", "kimi", "kimi-cli"} for _, w := range want { if _, ok := got[w]; !ok { t.Errorf("want runtime %q in set, missing. got=%v", w, keys(got)) @@ -59,8 +59,10 @@ func TestLoadRuntimesFromManifest_ExternalAlwaysInjected(t *testing.T) { if err != nil { t.Fatalf("load: %v", err) } - if _, ok := got["external"]; !ok { - t.Errorf("external must be injected even when absent from manifest: %v", keys(got)) + for _, must := range []string{"external", "kimi", "kimi-cli"} { + if _, ok := got[must]; !ok { + t.Errorf("%s must be injected even when absent from manifest: %v", must, keys(got)) + } } } @@ -95,7 +97,7 @@ func TestRealManifestParses(t *testing.T) { t.Fatalf("real manifest load: %v", err) } // Core runtimes we always expect to ship. - for _, must := range []string{"langgraph", "hermes", "claude-code", "external"} { + for _, must := range []string{"langgraph", "hermes", "claude-code", "external", "kimi", "kimi-cli"} { if _, ok := got[must]; !ok { t.Errorf("real manifest missing runtime %q — got=%v", must, keys(got)) } diff --git a/workspace-server/internal/handlers/workspace.go b/workspace-server/internal/handlers/workspace.go index bfccb092..b674836b 100644 --- a/workspace-server/internal/handlers/workspace.go +++ b/workspace-server/internal/handlers/workspace.go @@ -428,13 +428,16 @@ func (h *WorkspaceHandler) Create(c *gin.Context) { // implies docker work in flight) so the canvas can render // a "waiting for external agent to connect" state without // tripping the provisioning-timeout UX. - if payload.External || payload.Runtime == "external" { + if payload.External || isExternalLikeRuntime(payload.Runtime) { var connectionToken string if payload.URL != "" { // URL already validated by validateAgentURL above (before BeginTx). // Now persist it: the external URL is set after the workspace row // commits so that a failed URL UPDATE doesn't roll back the row. - db.DB.ExecContext(ctx, `UPDATE workspaces SET url = $1, status = $2, runtime = 'external', updated_at = now() WHERE id = $3`, payload.URL, models.StatusOnline, id) + // Preserve BYO-compute runtime label (kimi, kimi-cli, external) — + // don't coerce to generic "external" so the canvas can show the + // correct runtime name in the node card. + db.DB.ExecContext(ctx, `UPDATE workspaces SET url = $1, status = $2, runtime = $3, updated_at = now() WHERE id = $4`, payload.URL, models.StatusOnline, normalizeExternalRuntime(payload.Runtime), id) if err := db.CacheURL(ctx, id, payload.URL); err != nil { log.Printf("External workspace: failed to cache URL for %s: %v", id, err) } @@ -446,7 +449,8 @@ func (h *WorkspaceHandler) Create(c *gin.Context) { // in awaiting_agent. First POST /registry/register call // from the external agent (with this token + its URL) // flips the row to online. - db.DB.ExecContext(ctx, `UPDATE workspaces SET status = $1, runtime = 'external', updated_at = now() WHERE id = $2`, models.StatusAwaitingAgent, id) + // Preserve BYO-compute runtime label (kimi, kimi-cli, external). + db.DB.ExecContext(ctx, `UPDATE workspaces SET status = $1, runtime = $2, updated_at = now() WHERE id = $3`, models.StatusAwaitingAgent, normalizeExternalRuntime(payload.Runtime), id) tok, tokErr := wsauth.IssueToken(ctx, db.DB, id) if tokErr != nil { log.Printf("External workspace %s: token issuance failed: %v", id, tokErr) diff --git a/workspace-server/internal/handlers/workspace_test.go b/workspace-server/internal/handlers/workspace_test.go index 4e58a7bf..9d5b1a77 100644 --- a/workspace-server/internal/handlers/workspace_test.go +++ b/workspace-server/internal/handlers/workspace_test.go @@ -559,6 +559,48 @@ func TestWorkspaceCreate_ExternalURL_SSRFSafe(t *testing.T) { } } +// TestWorkspaceCreate_KimiRuntime_PreservesLabel asserts that a workspace +// created with runtime="kimi" takes the BYO-compute path (awaiting_agent, +// no Docker provisioning) and preserves the "kimi" label in the DB instead +// of coercing to "external". Regression guard for SOP runtime addition. +func TestWorkspaceCreate_KimiRuntime_PreservesLabel(t *testing.T) { + t.Setenv("MOLECULE_DEPLOY_MODE", "self-hosted") + t.Setenv("MOLECULE_ORG_ID", "") + mock := setupTestDB(t) + setupTestRedis(t) + broadcaster := newTestBroadcaster() + handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()) + + mock.ExpectBegin() + mock.ExpectExec("INSERT INTO workspaces"). + WithArgs(sqlmock.AnyArg(), "Kimi Agent", nil, 3, "kimi", sqlmock.AnyArg(), (*string)(nil), nil, "none", (*int64)(nil), models.DefaultMaxConcurrentTasks, "push"). + WillReturnResult(sqlmock.NewResult(0, 1)) + mock.ExpectCommit() + // Pre-register flow: awaiting_agent + runtime preserved as "kimi" + mock.ExpectExec("UPDATE workspaces SET status"). + WithArgs(models.StatusAwaitingAgent, "kimi", sqlmock.AnyArg()). + WillReturnResult(sqlmock.NewResult(0, 1)) + // Token issuance (workspace_auth_tokens, not workspace_tokens) + mock.ExpectExec("INSERT INTO workspace_auth_tokens"). + WillReturnResult(sqlmock.NewResult(0, 1)) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + + body := `{"name":"Kimi Agent","runtime":"kimi","tier":3,"canvas":{"x":100,"y":100}}` + c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body)) + c.Request.Header.Set("Content-Type", "application/json") + + handler.Create(c) + + if w.Code != http.StatusCreated { + t.Errorf("expected status 201, got %d: %s", w.Code, w.Body.String()) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + // TestWorkspaceCreate_ExternalURL_SSRFMetadataBlocked asserts that an external // workspace created with a cloud-metadata URL is rejected with 400 before any // DB write. 169.254.0.0/16 is always blocked regardless of mode (SaaS or