From f7e29763245507e2c2dc0d1c9a4b6bab5ad3f6a9 Mon Sep 17 00:00:00 2001 From: claude-ceo-assistant Date: Sat, 23 May 2026 23:33:21 -0700 Subject: [PATCH] chore: retire unmaintained workspace runtimes --- README.md | 18 ++--- README.zh-CN.md | 16 ++-- canvas/src/components/tabs/ConfigTab.tsx | 12 +-- .../preflight-resolveRuntime.test.ts | 32 +++----- .../src/lib/__tests__/runtime-names.test.ts | 9 +-- canvas/src/lib/deploy-preflight.ts | 6 +- canvas/src/lib/runtime-names.ts | 6 +- canvas/src/lib/runtimeProfiles.ts | 2 +- docs/agent-runtime/cli-runtime.md | 45 +++-------- docs/agent-runtime/workspace-runtime.md | 8 +- docs/architecture/molecule-technical-doc.md | 10 +-- docs/architecture/overview.md | 2 +- docs/index.md | 6 +- docs/quickstart.md | 4 +- scripts/build-images.sh | 2 +- scripts/refresh-workspace-images.sh | 2 +- scripts/test-all-adapters.sh | 54 ++++---------- tests/e2e/lib/model_slug.sh | 10 --- tests/e2e/test_chat_upload_e2e.sh | 6 +- tests/e2e/test_comprehensive_e2e.sh | 32 ++++---- tests/e2e/test_model_slug.sh | 20 +++-- tests/e2e/test_notify_attachments_e2e.sh | 6 +- tests/e2e/test_priority_runtimes_e2e.sh | 74 +++---------------- tests/harness/seed.sh | 4 +- workspace-server/internal/bundle/importer.go | 4 +- .../internal/handlers/a2a_proxy.go | 2 +- .../internal/handlers/a2a_proxy_helpers.go | 2 +- .../internal/handlers/a2a_proxy_test.go | 14 ++-- .../internal/handlers/discovery.go | 16 ++-- .../internal/handlers/discovery_test.go | 34 ++++----- .../handlers/handlers_additional_test.go | 16 ++-- .../internal/handlers/handlers_test.go | 12 +-- .../internal/handlers/org_import.go | 2 +- .../handlers/plugins_install_eic_test.go | 1 - .../internal/handlers/restart_template.go | 4 +- .../internal/handlers/template_files_eic.go | 7 +- .../handlers/template_files_eic_test.go | 3 +- .../internal/handlers/workspace.go | 20 ++--- .../handlers/workspace_budget_test.go | 8 +- .../internal/handlers/workspace_provision.go | 2 +- .../internal/handlers/workspace_restart.go | 10 +-- .../internal/handlers/workspace_test.go | 32 ++++---- .../internal/provisioner/localbuild.go | 2 +- .../internal/provisioner/localbuild_test.go | 6 +- .../internal/provisioner/provisioner.go | 16 ++-- .../internal/provisioner/registry.go | 8 +- .../internal/provisioner/registry_test.go | 4 +- .../internal/registry/healthsweep.go | 6 +- .../internal/registry/healthsweep_test.go | 6 +- .../internal/registry/hibernation.go | 2 +- .../internal/registry/provisiontimeout.go | 4 +- .../registry/provisiontimeout_test.go | 23 +++--- workspace-server/internal/router/router.go | 2 +- .../migrations/011_workspace_runtime.sql | 2 +- 54 files changed, 256 insertions(+), 400 deletions(-) diff --git a/README.md b/README.md index de443a58..d35531ee 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ Molecule AI is the most powerful way to govern an AI agent organization in produ It combines the parts that are usually scattered across demos, internal glue code, and framework-specific tooling into one product: - one org-native control plane for teams, roles, hierarchy, and lifecycle -- one runtime layer that lets **eight** agent runtimes — LangGraph, DeepAgents, Claude Code, CrewAI, AutoGen, **Hermes**, **Gemini CLI**, and OpenClaw — run side by side behind one workspace contract +- one runtime layer that lets **four** maintained agent runtimes — Claude Code, Codex, **Hermes**, and OpenClaw — run side by side behind one workspace contract - one memory model that keeps recall, sharing, and skill evolution aligned with organizational boundaries (Memory v2 backed by pgvector for semantic recall) - one operational surface for observing, pausing, restarting, inspecting, and improving live workspaces @@ -75,7 +75,7 @@ You do not wire collaboration paths by hand. Hierarchy defines the default commu ### 3. Runtime choice stops being a dead-end decision -LangGraph, DeepAgents, Claude Code, CrewAI, AutoGen, Hermes, Gemini CLI, and OpenClaw can all plug into the same workspace abstraction. Teams can standardize governance without forcing every group onto one runtime. +Claude Code, Codex, Hermes, and OpenClaw can all plug into the same workspace abstraction. Teams can standardize governance without forcing every group onto one runtime. ### 4. Memory is treated like infrastructure @@ -112,13 +112,9 @@ Molecule AI is not trying to replace the frameworks below. It is the system that | Runtime / architecture | Status in current repo | Native strength | What Molecule AI adds | |---|---|---|---| -| **LangGraph** | Shipping on `main` | Graph control, tool use, Python extensibility | Canvas orchestration, hierarchy routing, A2A, memory scopes, operational lifecycle | -| **DeepAgents** | Shipping on `main` | Deeper planning and decomposition | Same workspace contract, team topology, activity stream, restart behavior | | **Claude Code** | Shipping on `main` | Real coding workflows, CLI-native continuity | Secure workspace abstraction, A2A delegation, org boundaries, shared control plane | -| **CrewAI** | Shipping on `main` | Role-based crews | Persistent workspace identity, policy consistency, shared canvas and registry | -| **AutoGen** | Shipping on `main` | Assistant/tool orchestration | Standardized deployment, hierarchy-aware collaboration, shared ops plane | +| **Codex** | Shipping on `main` | OpenAI Codex CLI workflows | Secure workspace abstraction, A2A delegation, org boundaries, shared control plane | | **Hermes 4** | Shipping on `main` | Hybrid reasoning, native tools, json_schema (NousResearch/hermes-agent) | Option B upstream hook, A2A bridge to OpenAI-compat API, multi-provider provider derivation | -| **Gemini CLI** | Shipping on `main` | Google Gemini CLI continuity | Workspace lifecycle, A2A, hierarchy-aware collaboration, shared ops plane | | **OpenClaw** | Shipping on `main` | CLI-native runtime with its own session model | Workspace lifecycle, templates, activity logs, topology-aware collaboration | | **NemoClaw** | WIP on `feat/nemoclaw-t4-docker` | NVIDIA-oriented runtime path | Planned to join the same abstraction once merged; not yet part of `main` | @@ -209,7 +205,7 @@ The result is not just “an agent that learns.” It is **an organization that ### Runtime - standalone workspace-template images that install `molecule-ai-workspace-runtime` from the Gitea package registry; thin AMI in production (us-east-2) -- adapter-driven execution across **8 runtimes** (Claude Code, Hermes, Gemini CLI, LangGraph, DeepAgents, CrewAI, AutoGen, OpenClaw) +- adapter-driven execution across **4 maintained runtimes** (Claude Code, Codex, Hermes, OpenClaw) - Agent Card registration - **Memory v2 backed by pgvector** — per-tenant plugin sidecar serving HMA namespaces with FTS + semantic recall - plugin-mounted shared rules/skills @@ -245,7 +241,7 @@ The result is not just “an agent that learns.” It is **an organization that Molecule AI is especially strong when you need to run: - AI engineering teams with PM / Dev Lead / QA / Research / Ops roles -- mixed runtime organizations where one team prefers LangGraph and another prefers Claude Code +- mixed runtime organizations where one team prefers Hermes and another prefers Claude Code - long-lived agent organizations that need memory boundaries and reusable procedures - internal platforms that want to expose agent teams as structured infrastructure, not ad hoc scripts @@ -260,7 +256,7 @@ Canvas (Next.js 15, warm-paper :3000) <--HTTP / WS--> Platform (Go 1.25 :8080) +------------------------- shows ------------------------> workspaces, teams, tasks, traces, events Workspace Runtime (Python ≥3.11, image with adapters) - - 8 adapters: LangGraph / DeepAgents / Claude Code / CrewAI / AutoGen / Hermes / Gemini CLI / OpenClaw + - 4 adapters: Claude Code / Codex / Hermes / OpenClaw - Agent Card + A2A server (typed-SSOT response path, RFC #2967) - heartbeat + activity + Memory v2 (pgvector semantic recall via per-tenant plugin sidecar) - skills + plugins + hot reload @@ -328,7 +324,7 @@ Then open `http://localhost:3000`: ## Current Scope -The current `main` branch ships the core platform, Canvas v4 (warm-paper themed), Memory v2 (pgvector semantic recall), the typed-SSOT A2A response path (RFC #2967), **eight production adapters** (Claude Code, Hermes, Gemini CLI, LangGraph, DeepAgents, CrewAI, AutoGen, OpenClaw), skill lifecycle, and operational surfaces. +The current `main` branch ships the core platform, Canvas v4 (warm-paper themed), Memory v2 (pgvector semantic recall), the typed-SSOT A2A response path (RFC #2967), **four maintained production adapters** (Claude Code, Codex, Hermes, OpenClaw), skill lifecycle, and operational surfaces. The companion private repo [`molecule-controlplane`](https://git.moleculesai.app/molecule-ai/molecule-controlplane) provides the SaaS surface — multi-tenant orchestration on EC2 + Neon + Cloudflare Tunnels, KMS envelope encryption, WorkOS auth, Stripe billing, and a `tenant_resources` audit table with a 30-min reconciler. diff --git a/README.zh-CN.md b/README.zh-CN.md index 12bcf57a..85189eb9 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -52,7 +52,7 @@ Molecule AI 是目前最强的 AI Agent 组织治理方案之一,用来把 age 它把过去分散在 demo、内部胶水代码和各类 framework 私有工具里的关键能力,收敛成一个产品: - 一套组织原生 control plane,管理团队、角色、层级和生命周期 -- 一套 runtime abstraction,让 **8 个** agent runtime —— LangGraph、DeepAgents、Claude Code、CrewAI、AutoGen、**Hermes**、**Gemini CLI**、OpenClaw —— 共用一套 workspace 契约 +- 一套 runtime abstraction,让 **4 个**维护中的 agent runtime —— Claude Code、Codex、**Hermes**、OpenClaw —— 共用一套 workspace 契约 - 一套与组织边界对齐的 memory 模型,把 recall、sharing 和 skill evolution 放进同一体系(Memory v2 由 pgvector 支撑语义召回) - 一套面向线上 workspace 的运维面,统一完成观测、暂停、重启、检查和持续改进 @@ -74,7 +74,7 @@ Molecule AI 填的就是这个空白。 ### 3. Runtime 选择不再是死路 -LangGraph、DeepAgents、Claude Code、CrewAI、AutoGen、Hermes、Gemini CLI、OpenClaw 都可以挂到同一个 workspace abstraction 下。团队可以统一治理方式,而不必统一到底层 runtime。 +Claude Code、Codex、Hermes、OpenClaw 都可以挂到同一个 workspace abstraction 下。团队可以统一治理方式,而不必统一到底层 runtime。 ### 4. Memory 被当成基础设施来做 @@ -111,13 +111,9 @@ Molecule AI 并不是要替代下面这些 framework,而是把它们纳入更 | Runtime / 架构 | 当前仓库状态 | 原生优势 | Molecule AI 额外补上的能力 | |---|---|---|---| -| **LangGraph** | `main` 已支持 | 图控制强、工具调用成熟、Python 扩展性好 | Canvas orchestration、层级路由、A2A、memory scope、operational lifecycle | -| **DeepAgents** | `main` 已支持 | 规划和任务拆解更强 | 同一套 workspace contract、团队拓扑、activity、restart 行为 | | **Claude Code** | `main` 已支持 | 真实编码工作流、CLI-native continuity | 安全 workspace 抽象、A2A delegation、组织边界、共享 control plane | -| **CrewAI** | `main` 已支持 | 角色型 crew 模式清晰 | 持久 workspace 身份、统一策略、共享 Canvas 和 registry | -| **AutoGen** | `main` 已支持 | assistant/tool orchestration | 统一部署、层级协作、共享运维平面 | +| **Codex** | `main` 已支持 | OpenAI Codex CLI 工作流 | 安全 workspace 抽象、A2A delegation、组织边界、共享 control plane | | **Hermes 4** | `main` 已支持 | 混合推理、原生工具调用、json_schema 输出(NousResearch/hermes-agent) | Option B 上游 hook、A2A 桥接 OpenAI 兼容 API、多 provider 自动派生 | -| **Gemini CLI** | `main` 已支持 | Google Gemini CLI 持续会话 | workspace 生命周期、A2A、层级感知协作、共享运维平面 | | **OpenClaw** | `main` 已支持 | CLI-native runtime,自有 session 模型 | workspace 生命周期、templates、activity logs、拓扑感知协作 | | **NemoClaw** | `feat/nemoclaw-t4-docker` 分支 WIP | NVIDIA 方向 runtime 路线 | 计划并入同一抽象层,但当前还不是 `main` 已合并能力 | @@ -208,7 +204,7 @@ Molecule AI 并不是要替代下面这些 framework,而是把它们纳入更 ### Runtime - 统一 `workspace/` 镜像;生产环境采用 thin AMI(us-east-2) -- adapter 驱动执行,覆盖 **8 个 runtime**(Claude Code、Hermes、Gemini CLI、LangGraph、DeepAgents、CrewAI、AutoGen、OpenClaw) +- adapter 驱动执行,覆盖 **4 个维护中的 runtime**(Claude Code、Codex、Hermes、OpenClaw) - Agent Card 注册 - **Memory v2 由 pgvector 支撑** —— 每个 tenant 一个 plugin sidecar,承载 HMA namespace、FTS 与语义召回 - plugin 挂载共享 rules/skills @@ -259,7 +255,7 @@ Canvas (Next.js 15, warm-paper :3000) <--HTTP / WS--> Platform (Go 1.25 :8080) +------------------------- 展示 ------------------------> workspaces, teams, tasks, traces, events Workspace Runtime (Python ≥3.11,含 adapter 集合的镜像) - - 8 个 adapter: LangGraph / DeepAgents / Claude Code / CrewAI / AutoGen / Hermes / Gemini CLI / OpenClaw + - 4 个 adapter: Claude Code / Codex / Hermes / OpenClaw - Agent Card + A2A server(typed-SSOT 响应路径,RFC #2967) - heartbeat + activity + Memory v2(pgvector 语义召回,per-tenant plugin sidecar) - skills + plugins + hot reload @@ -321,7 +317,7 @@ npm run dev ## 当前范围说明 -当前 `main` 已经包含核心平台、Canvas v4(warm-paper 主题)、Memory v2(pgvector 语义召回)、typed-SSOT A2A 响应路径(RFC #2967)、**8 个正式 adapter**(Claude Code、Hermes、Gemini CLI、LangGraph、DeepAgents、CrewAI、AutoGen、OpenClaw)、skill lifecycle,以及主要运维面。 +当前 `main` 已经包含核心平台、Canvas v4(warm-paper 主题)、Memory v2(pgvector 语义召回)、typed-SSOT A2A 响应路径(RFC #2967)、**4 个维护中的正式 adapter**(Claude Code、Codex、Hermes、OpenClaw)、skill lifecycle,以及主要运维面。 配套的私有仓库 [`molecule-controlplane`](https://git.moleculesai.app/molecule-ai/molecule-controlplane) 提供 SaaS 层 —— 多租户编排(EC2 + Neon + Cloudflare Tunnels)、KMS 信封加密、WorkOS 鉴权、Stripe 计费,以及 `tenant_resources` 审计表加 30 分钟 reconciler。 diff --git a/canvas/src/components/tabs/ConfigTab.tsx b/canvas/src/components/tabs/ConfigTab.tsx index 1f4e51af..2cd5fc0d 100644 --- a/canvas/src/components/tabs/ConfigTab.tsx +++ b/canvas/src/components/tabs/ConfigTab.tsx @@ -301,16 +301,13 @@ export function deriveProvidersFromModels(models: ModelSpec[]): string[] { // config.yaml` on the container is a separate runtime-internal file, // not this one. const RUNTIMES_WITH_OWN_CONFIG = new Set(["external", "kimi", "kimi-cli", "openclaw"]); +const SUPPORTED_RUNTIME_VALUES = new Set(["claude-code", "codex", "openclaw", "hermes"]); const FALLBACK_RUNTIME_OPTIONS: RuntimeOption[] = [ - { value: "", label: "LangGraph (default)", models: [], providers: [] }, { value: "claude-code", label: "Claude Code", models: [], providers: [] }, - { value: "crewai", label: "CrewAI", models: [], providers: [] }, - { value: "autogen", label: "AutoGen", models: [], providers: [] }, - { value: "deepagents", label: "DeepAgents", models: [], providers: [] }, + { value: "codex", label: "Codex", models: [], providers: [] }, { value: "openclaw", label: "OpenClaw", models: [], providers: [] }, { value: "hermes", label: "Hermes", models: [], providers: [] }, - { value: "gemini-cli", label: "Gemini CLI", models: [], providers: [] }, ]; export function ConfigTab({ workspaceId }: Props) { @@ -499,10 +496,9 @@ export function ConfigTab({ workspaceId }: Props) { .then((rows) => { if (cancelled || !Array.isArray(rows)) return; const byRuntime = new Map(); - byRuntime.set("", { value: "", label: "LangGraph (default)", models: [], providers: [] }); for (const r of rows) { const v = (r.runtime || "").trim(); - if (!v || v === "langgraph") continue; + if (!SUPPORTED_RUNTIME_VALUES.has(v)) continue; // Last template wins if two templates share a runtime — rare, and the // one with the richer models list is probably newer. const existing = byRuntime.get(v); @@ -512,7 +508,7 @@ export function ConfigTab({ workspaceId }: Props) { byRuntime.set(v, { value: v, label: r.name || v, models, providers }); } } - if (byRuntime.size > 1) setRuntimeOptions(Array.from(byRuntime.values())); + if (byRuntime.size > 0) setRuntimeOptions(Array.from(byRuntime.values())); }) .catch(() => { /* keep fallback */ }); return () => { cancelled = true; }; diff --git a/canvas/src/lib/__tests__/preflight-resolveRuntime.test.ts b/canvas/src/lib/__tests__/preflight-resolveRuntime.test.ts index 609e756d..7eeefb02 100644 --- a/canvas/src/lib/__tests__/preflight-resolveRuntime.test.ts +++ b/canvas/src/lib/__tests__/preflight-resolveRuntime.test.ts @@ -12,47 +12,35 @@ import { resolveRuntime } from "../deploy-preflight"; describe("resolveRuntime", () => { describe("explicit runtime-map entries", () => { - it('maps "langgraph" to "langgraph"', () => { - expect(resolveRuntime("langgraph")).toBe("langgraph"); - }); - it('maps "claude-code-default" to "claude-code"', () => { expect(resolveRuntime("claude-code-default")).toBe("claude-code"); }); + it('maps "codex" to "codex"', () => { + expect(resolveRuntime("codex")).toBe("codex"); + }); + + it('maps "hermes" to "hermes"', () => { + expect(resolveRuntime("hermes")).toBe("hermes"); + }); + it('maps "openclaw" to "openclaw"', () => { expect(resolveRuntime("openclaw")).toBe("openclaw"); }); - - it('maps "deepagents" to "deepagents"', () => { - expect(resolveRuntime("deepagents")).toBe("deepagents"); - }); - - it('maps "crewai" to "crewai"', () => { - expect(resolveRuntime("crewai")).toBe("crewai"); - }); - - it('maps "autogen" to "autogen"', () => { - expect(resolveRuntime("autogen")).toBe("autogen"); - }); }); describe("identity fallback for modern template ids", () => { - it("returns the id unchanged when not in the map", () => { - expect(resolveRuntime("hermes")).toBe("hermes"); - }); - it("strips trailing -default suffix as fallback", () => { expect(resolveRuntime("hermes-default")).toBe("hermes"); }); it("strips -default only when it is the suffix", () => { // "default-something" should NOT strip - expect(resolveRuntime("default-langgraph")).toBe("default-langgraph"); + expect(resolveRuntime("default-custom")).toBe("default-custom"); }); it("returns the id unchanged when id has no -default suffix", () => { - expect(resolveRuntime("gemini-cli")).toBe("gemini-cli"); + expect(resolveRuntime("custom-runtime")).toBe("custom-runtime"); }); it("handles custom template ids from community templates", () => { diff --git a/canvas/src/lib/__tests__/runtime-names.test.ts b/canvas/src/lib/__tests__/runtime-names.test.ts index ed9fbe37..739102e1 100644 --- a/canvas/src/lib/__tests__/runtime-names.test.ts +++ b/canvas/src/lib/__tests__/runtime-names.test.ts @@ -13,11 +13,9 @@ import { runtimeDisplayName } from "../runtime-names"; describe("runtimeDisplayName", () => { it.each([ ["claude-code", "Claude Code"], - ["langgraph", "LangGraph"], - ["deepagents", "DeepAgents"], + ["codex", "Codex"], + ["hermes", "Hermes"], ["openclaw", "OpenClaw"], - ["crewai", "CrewAI"], - ["autogen", "AutoGen"], ])("known runtime %q maps to %q", (input, expected) => { expect(runtimeDisplayName(input)).toBe(expected); }); @@ -25,7 +23,6 @@ describe("runtimeDisplayName", () => { it("unknown runtime falls back to the input string verbatim", () => { // A future runtime not yet in the lookup map should render with // its own id — better than a generic placeholder for ops debugging. - expect(runtimeDisplayName("hermes")).toBe("hermes"); expect(runtimeDisplayName("custom-runtime-9000")).toBe( "custom-runtime-9000", ); @@ -43,6 +40,6 @@ describe("runtimeDisplayName", () => { // the input "for safety" doesn't silently change behavior — the // upstream slug is already normalized lowercase. expect(runtimeDisplayName("Claude-Code")).toBe("Claude-Code"); - expect(runtimeDisplayName("LANGGRAPH")).toBe("LANGGRAPH"); + expect(runtimeDisplayName("CODEX")).toBe("CODEX"); }); }); diff --git a/canvas/src/lib/deploy-preflight.ts b/canvas/src/lib/deploy-preflight.ts index f2821d35..90f64892 100644 --- a/canvas/src/lib/deploy-preflight.ts +++ b/canvas/src/lib/deploy-preflight.ts @@ -63,12 +63,10 @@ export interface Template extends TemplateLike { * id needs a non-identity mapping. */ export function resolveRuntime(templateId: string): string { const runtimeMap: Record = { - langgraph: "langgraph", "claude-code-default": "claude-code", + codex: "codex", + hermes: "hermes", openclaw: "openclaw", - deepagents: "deepagents", - crewai: "crewai", - autogen: "autogen", }; return runtimeMap[templateId] ?? templateId.replace(/-default$/, ""); } diff --git a/canvas/src/lib/runtime-names.ts b/canvas/src/lib/runtime-names.ts index f01e9b11..bf6ed978 100644 --- a/canvas/src/lib/runtime-names.ts +++ b/canvas/src/lib/runtime-names.ts @@ -4,11 +4,9 @@ const RUNTIME_NAMES: Record = { "claude-code": "Claude Code", - langgraph: "LangGraph", - deepagents: "DeepAgents", + codex: "Codex", + hermes: "Hermes", openclaw: "OpenClaw", - crewai: "CrewAI", - autogen: "AutoGen", kimi: "Kimi", "kimi-cli": "Kimi CLI", }; diff --git a/canvas/src/lib/runtimeProfiles.ts b/canvas/src/lib/runtimeProfiles.ts index ae33bf26..1f8c228c 100644 --- a/canvas/src/lib/runtimeProfiles.ts +++ b/canvas/src/lib/runtimeProfiles.ts @@ -49,7 +49,7 @@ export interface RuntimeProfile { } /** The floor every runtime inherits unless it overrides. Calibrated for - * docker-local fast runtimes (claude-code, langgraph, crewai) where cold + * docker-local fast runtimes (claude-code, codex, openclaw) where cold * boot is 30-90s. */ export const DEFAULT_RUNTIME_PROFILE: Required< Pick diff --git a/docs/agent-runtime/cli-runtime.md b/docs/agent-runtime/cli-runtime.md index d2b31059..a9359b7b 100644 --- a/docs/agent-runtime/cli-runtime.md +++ b/docs/agent-runtime/cli-runtime.md @@ -2,14 +2,14 @@ ## Overview -The workspace runtime uses a **pluggable adapter architecture** — each agent infrastructure (Claude Code, OpenClaw, LangGraph, CrewAI, AutoGen, etc.) has its own adapter that bridges the A2A protocol to the infra's native interface. +The workspace runtime uses a **pluggable adapter architecture** — each maintained agent infrastructure (Claude Code, Codex, Hermes, OpenClaw) has its own adapter that bridges the A2A protocol to the infra's native interface. Adapters live in `workspace/adapters//` and are auto-discovered at startup. Each adapter implements `BaseAdapter` (from `adapters/base.py`) with `setup()` and `create_executor()` methods. The runtime is selected via `config.yaml`: ```yaml -runtime: claude-code # or: langgraph, openclaw, deepagents, crewai, autogen +runtime: claude-code # or: codex, hermes, openclaw runtime_config: model: sonnet auth_token_file: .auth-token @@ -18,7 +18,7 @@ runtime_config: ## How It Works -The unified `workspace-template` Docker image includes both Python (LangGraph) and Node.js (CLI runtimes). At startup, `main.py` checks the `runtime` field in `config.yaml`, discovers the matching adapter in `adapters//`, calls `adapter.setup(config)` then `adapter.create_executor(config)` to get an `AgentExecutor` that handles A2A requests. +The unified runtime checks the `runtime` field in `config.yaml`, discovers the matching adapter, calls `adapter.setup(config)` then `adapter.create_executor(config)` to get an `AgentExecutor` that handles A2A requests. ``` A2A request arrives @@ -28,7 +28,7 @@ AgentExecutor.execute(context, event_queue) | - extracts user message from A2A parts | - extracts conversation history from params.metadata.history | - sets current_task on heartbeat (shows on canvas card) - | - invokes the runtime (LangGraph graph, CLI subprocess, etc.) + | - invokes the runtime adapter v Response → A2A event queue → JSON-RPC response ``` @@ -37,9 +37,9 @@ Response → A2A event queue → JSON-RPC response Chat sessions in the Canvas UI send prior messages (up to 20) via `params.metadata.history` in each A2A `message/send` request. Executors extract this history: -- **LangGraph/DeepAgents**: Prepends history as `("human", text)` / `("ai", text)` tuples to the LangGraph message list -- **CrewAI/AutoGen**: Prepends history as a text prefix in the task description (`"Conversation so far:\n..."`) - **Claude Code**: Uses `--resume ` for native session continuity (history not needed) +- **Codex**: Uses the Codex runtime's native session state +- **Hermes**: Uses Hermes' agent runtime session handling - **OpenClaw**: Uses `--session-id` for native session continuity ### Current Task Reporting @@ -48,10 +48,6 @@ All executors update the workspace's `current_task` via the heartbeat during exe ## Built-in Adapters -### LangGraph (`runtime: langgraph`) — Default - -Full Python agent with LangGraph ReAct pattern. Supports skills, tools, plugins, peer coordination, and team routing. - ### Claude Code (`runtime: claude-code`) ```yaml @@ -71,35 +67,18 @@ The SDK uses the same Claude Code engine under the hood — plugins, CLAUDE.md d **Important:** Claude Code refuses to run as root with `--dangerously-skip-permissions`. The Dockerfile creates a non-root `agent` user. -### CrewAI (`runtime: crewai`) - -Role-based multi-agent framework. Creates a CrewAI Agent + Task + Crew per request with A2A delegation tools (`delegate_to_peer`, `list_available_peers`). +### Codex (`runtime: codex`) ```yaml -runtime: crewai -model: openrouter:google/gemini-2.5-flash +runtime: codex +model: openai/gpt-5.3-codex ``` -**Auth:** Uses `OPENROUTER_API_KEY` or `OPENAI_API_KEY` env var. - -### AutoGen (`runtime: autogen`) - -Microsoft AutoGen AssistantAgent with tool use. Creates an `AssistantAgent` per request with A2A delegation tools. +### Hermes (`runtime: hermes`) ```yaml -runtime: autogen -model: openai:gpt-4.1-mini -``` - -**Auth:** Uses `OPENAI_API_KEY` env var. - -### DeepAgents (`runtime: deepagents`) - -LangGraph-based agent with deep planning capabilities. Uses the same `LangGraphA2AExecutor` as the default runtime but with a specialized agent setup including delegation, memory, and search tools. - -```yaml -runtime: deepagents -model: openrouter:google/gemini-2.5-flash +runtime: hermes +model: openai/gpt-4o ``` ### OpenClaw (`runtime: openclaw`) diff --git a/docs/agent-runtime/workspace-runtime.md b/docs/agent-runtime/workspace-runtime.md index 38c5eae8..700f211c 100644 --- a/docs/agent-runtime/workspace-runtime.md +++ b/docs/agent-runtime/workspace-runtime.md @@ -4,13 +4,11 @@ The `workspace/` directory is Molecule AI's unified runtime image. Every provisi ## Runtime Matrix In Current `main` -Current `main` ships six adapters: +Current `main` ships four maintained adapters: -- `langgraph` -- `deepagents` - `claude-code` -- `crewai` -- `autogen` +- `codex` +- `hermes` - `openclaw` This is the merged runtime surface today. Branch-level experiments such as NemoClaw are separate and should be treated as roadmap/WIP, not merged support. diff --git a/docs/architecture/molecule-technical-doc.md b/docs/architecture/molecule-technical-doc.md index 48e074b6..cb4410db 100644 --- a/docs/architecture/molecule-technical-doc.md +++ b/docs/architecture/molecule-technical-doc.md @@ -511,7 +511,7 @@ description: "" version: "1.0.0" tier: 2 # 1=sandboxed, 2=standard, 3=privileged, 4=full-host model: "anthropic:claude-sonnet-4-6" # provider:model syntax -runtime: "langgraph" # langgraph | deepagents | claude-code | crewai | autogen | openclaw +runtime: "claude-code" # claude-code | codex | hermes | openclaw runtime_config: # Runtime-specific settings command: "claude" # For CLI runtimes args: [] @@ -565,15 +565,13 @@ compliance: max_task_duration_seconds: 300 ``` -### Six Runtime Adapters +### Four Runtime Adapters | Adapter | Core Strength | Image Tag | |---------|--------------|-----------| -| **LangGraph** | Graph-based state machine, tool use, streaming | `workspace-template:langgraph` | -| **DeepAgents** | Deep planning, multi-step task decomposition | `workspace-template:deepagents` | | **Claude Code** | Native coding workflows, CLI continuity, OAuth auth | `workspace-template:claude-code` | -| **CrewAI** | Role-based crews, structured task orchestration | `workspace-template:crewai` | -| **AutoGen** | Multi-agent conversations, explicit strategies | `workspace-template:autogen` | +| **Codex** | OpenAI Codex coding workflows | `workspace-template:codex` | +| **Hermes** | Hermes agent runtime | `workspace-template:hermes` | | **OpenClaw** | CLI-native runtime, own session model | `workspace-template:openclaw` | **Branch-level WIP**: NemoClaw (NVIDIA T4 + Docker socket) on `feat/nemoclaw-t4-docker`. diff --git a/docs/architecture/overview.md b/docs/architecture/overview.md index f56d8144..7634e2e7 100644 --- a/docs/architecture/overview.md +++ b/docs/architecture/overview.md @@ -44,7 +44,7 @@ All three call `onWorkspaceOffline`, which broadcasts `WORKSPACE_OFFLINE` and ca ### Template Resolution (Workspace Create) -Runtime detection happens **before** the DB insert: if `payload.Runtime` is empty and a template is specified, the handler reads `runtime:` from `configsDir/template/config.yaml` first. If still empty, it defaults to `"langgraph"`. This ensures the correct runtime (e.g. `claude-code`) is persisted in the DB and used for container image selection. +Runtime detection happens **before** the DB insert: if `payload.Runtime` is empty and a template is specified, the handler reads `runtime:` from `configsDir/template/config.yaml` first. If still empty, it defaults to `"claude-code"`. This ensures the correct runtime is persisted in the DB and used for container image selection. When the requested template does not exist, the Create handler falls back in order: diff --git a/docs/index.md b/docs/index.md index cbaf7788..2bbe5037 100644 --- a/docs/index.md +++ b/docs/index.md @@ -24,7 +24,7 @@ features: details: Build agent organizations as nested workspaces on a live React Flow canvas with drag-to-nest hierarchy, template deployment, bundles, and real-time updates. icon: "🗺️" - title: Runtime Compatibility - details: Current main ships adapters for LangGraph, DeepAgents, Claude Code, CrewAI, AutoGen, and OpenClaw under one workspace contract and A2A surface. + details: Current main ships adapters for Claude Code, Codex, Hermes, and OpenClaw under one workspace contract and A2A surface. icon: "⚙️" - title: Hierarchical Memory details: HMA-style LOCAL, TEAM, and GLOBAL scopes backed by the v2 memory plugin (per-tenant pgvector sidecar with FTS + semantic recall). @@ -49,13 +49,13 @@ features: |---|---| | **Canvas** | Empty-state deployment, onboarding guide, 10-tab side panel, template palette, bundle import/export, drag-to-nest teams, search, activity and trace views | | **Platform** | Workspace CRUD, registry, A2A proxy, team expansion, approvals, secrets, global secrets, memory APIs, files API, terminal, viewport persistence, WebSocket fanout | -| **Runtime** | One workspace image with six shipping adapters on `main`: LangGraph, DeepAgents, Claude Code, CrewAI, AutoGen, OpenClaw | +| **Runtime** | One workspace image with four shipping adapters on `main`: Claude Code, Codex, Hermes, OpenClaw | | **Memory** | v2 plugin (pgvector + FTS) serving scoped agent memories under per-workspace namespaces; key/value workspace memory; session-search recall | | **Skills** | Local skill packages, plugin-mounted shared skills/rules, audit/install/publish CLI helpers, hot reload | ## Compatibility Note -`main` currently ships six runtime adapters. `NemoClaw` appears in branch-level work (`feat/nemoclaw-t4-docker`) and is not documented here as merged `main` functionality. +`main` currently ships four runtime adapters: Claude Code, Codex, Hermes, and OpenClaw. `NemoClaw` appears in branch-level work (`feat/nemoclaw-t4-docker`) and is not documented here as merged `main` functionality. ## Recommended Reading diff --git a/docs/quickstart.md b/docs/quickstart.md index e8e16a6c..ff2a0457 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -238,7 +238,7 @@ No inbound firewall rules needed — the agent initiates the outbound WebSocket ## What To Try Next - **Expand to a team:** right-click a workspace and choose `Expand to Team`. -- **Switch runtime:** use `Config -> Runtime` to move between LangGraph, DeepAgents, Claude Code, CrewAI, AutoGen, and OpenClaw. +- **Switch runtime:** use `Config -> Runtime` to move between Claude Code, Codex, Hermes, and OpenClaw. - **Inspect operations:** check `Activity`, `Traces`, `Events`, and `Terminal`. - **Use global keys:** configure one provider once in `Secrets & API Keys -> Global`. - **Import a template:** use the template palette or `POST /templates/import`. @@ -268,7 +268,7 @@ Browser --> Canvas (Next.js :3000) | v Provisioned workspaces - (LangGraph / Claude Code / CrewAI / AutoGen / etc.) + (Claude Code / Codex / Hermes / OpenClaw) ``` For the full system model, see [Architecture](./architecture/architecture.md). diff --git a/scripts/build-images.sh b/scripts/build-images.sh index 095eae24..89e8ae4f 100755 --- a/scripts/build-images.sh +++ b/scripts/build-images.sh @@ -8,7 +8,7 @@ cd "$(dirname "$0")/../workspace-template" echo "=== Building base image ===" docker build -t workspace-template:base -t workspace-template:latest . -for adapter in langgraph claude_code openclaw deepagents crewai autogen; do +for adapter in claude_code codex hermes openclaw; do DOCKERFILE="adapters/${adapter}/Dockerfile" if [ ! -f "$DOCKERFILE" ]; then echo "Skipping $adapter (no Dockerfile)" diff --git a/scripts/refresh-workspace-images.sh b/scripts/refresh-workspace-images.sh index 82b18dc7..56d0e0ee 100755 --- a/scripts/refresh-workspace-images.sh +++ b/scripts/refresh-workspace-images.sh @@ -32,7 +32,7 @@ log() { echo -e "${GREEN}[refresh]${NC} $1" >&2; } warn() { echo -e "${YELLOW}[refresh]${NC} $1" >&2; } err() { echo -e "${RED}[refresh]${NC} $1" >&2; } -ALL_RUNTIMES=(claude-code langgraph crewai autogen deepagents hermes gemini-cli openclaw) +ALL_RUNTIMES=(claude-code codex hermes openclaw) RUNTIMES=("${ALL_RUNTIMES[@]}") RECREATE=true diff --git a/scripts/test-all-adapters.sh b/scripts/test-all-adapters.sh index 9468932b..7fefb13b 100755 --- a/scripts/test-all-adapters.sh +++ b/scripts/test-all-adapters.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# E2E test: All 6 adapters — create one agent per runtime, test A2A between all +# E2E test: all maintained adapters — create one agent per runtime, test A2A set -euo pipefail PLATFORM="${1:-http://localhost:8080}" @@ -52,12 +52,12 @@ a2a_send() { } echo "============================================" -echo " All-Adapters E2E Test (6 runtimes)" +echo " All-Adapters E2E Test (4 runtimes)" echo "============================================" echo "" # --- Create workspaces --- -echo "--- Step 1: Create 6 workspaces ---" +echo "--- Step 1: Create 4 workspaces ---" R=$(curl -s -X POST "$PLATFORM/workspaces" -H 'Content-Type: application/json' \ -d '{"name":"Alice-Claude","role":"claude-code test","tier":2,"template":"claude-code-default"}') @@ -65,9 +65,9 @@ ALICE=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin)['id' check "Create Alice (claude-code)" "provisioning" "$R" R=$(curl -s -X POST "$PLATFORM/workspaces" -H 'Content-Type: application/json' \ - -d '{"name":"Bob-LangGraph","role":"langgraph test","tier":2,"template":"langgraph"}') + -d '{"name":"Bob-Codex","role":"codex test","tier":2,"template":"codex"}') BOB=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])") -check "Create Bob (langgraph)" "provisioning" "$R" +check "Create Bob (codex)" "provisioning" "$R" R=$(curl -s -X POST "$PLATFORM/workspaces" -H 'Content-Type: application/json' \ -d '{"name":"Carol-OpenClaw","role":"openclaw test","tier":2,"template":"openclaw"}') @@ -75,29 +75,19 @@ CAROL=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin)['id' check "Create Carol (openclaw)" "provisioning" "$R" R=$(curl -s -X POST "$PLATFORM/workspaces" -H 'Content-Type: application/json' \ - -d '{"name":"Dave-DeepAgents","role":"deepagents test","tier":2,"template":"deepagents"}') + -d '{"name":"Dave-Hermes","role":"hermes test","tier":2,"template":"hermes"}') DAVE=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])") -check "Create Dave (deepagents)" "provisioning" "$R" - -R=$(curl -s -X POST "$PLATFORM/workspaces" -H 'Content-Type: application/json' \ - -d '{"name":"Eve-CrewAI","role":"crewai test","tier":2,"template":"crewai"}') -EVE=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])") -check "Create Eve (crewai)" "provisioning" "$R" - -R=$(curl -s -X POST "$PLATFORM/workspaces" -H 'Content-Type: application/json' \ - -d '{"name":"Frank-AutoGen","role":"autogen test","tier":2,"template":"autogen"}') -FRANK=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])") -check "Create Frank (autogen)" "provisioning" "$R" +check "Create Dave (hermes)" "provisioning" "$R" # --- Set API keys (skip Claude which uses OAuth) --- echo "" echo "--- Step 2: Set API keys ---" -for ID in $BOB $CAROL $DAVE $EVE $FRANK; do +for ID in $BOB $CAROL $DAVE; do curl -s -X POST "$PLATFORM/workspaces/$ID/secrets" \ -H 'Content-Type: application/json' \ -d "{\"key\":\"OPENAI_API_KEY\",\"value\":\"$OPENAI_KEY\"}" > /dev/null done -echo "Set OPENAI_API_KEY on 5 agents" +echo "Set OPENAI_API_KEY on 3 agents" # Auto-restart happens automatically when secrets are set echo "Secrets trigger auto-restart — waiting for agents to come back..." @@ -105,13 +95,11 @@ sleep 15 # --- Wait for all online --- echo "" -echo "--- Step 3: Wait for agents (OpenClaw ~3min, CrewAI/AutoGen/DeepAgents ~2min) ---" +echo "--- Step 3: Wait for agents (OpenClaw ~3min, Hermes may take longer) ---" wait_online "$ALICE" "Alice-Claude" 20 && check "Alice online" "ok" "ok" || check "Alice online" "online" "timeout" -wait_online "$BOB" "Bob-LangGraph" 60 && check "Bob online" "ok" "ok" || check "Bob online" "online" "timeout" -wait_online "$DAVE" "Dave-DeepAgents" 120 && check "Dave online" "ok" "ok" || check "Dave online" "online" "timeout" -wait_online "$EVE" "Eve-CrewAI" 120 && check "Eve online" "ok" "ok" || check "Eve online" "online" "timeout" -wait_online "$FRANK" "Frank-AutoGen" 120 && check "Frank online" "ok" "ok" || check "Frank online" "online" "timeout" +wait_online "$BOB" "Bob-Codex" 60 && check "Bob online" "ok" "ok" || check "Bob online" "online" "timeout" +wait_online "$DAVE" "Dave-Hermes" 180 && check "Dave online" "ok" "ok" || check "Dave online" "online" "timeout" wait_online "$CAROL" "Carol-OpenClaw" 360 && check "Carol online" "ok" "ok" || check "Carol online" "online" "timeout" # --- Test A2A messages --- @@ -123,7 +111,7 @@ RESP=$(a2a_send "$ALICE" "say hello in one word") echo " -> $RESP" check "Alice responds" "hello" "$RESP" -echo " Talking to Bob (LangGraph)..." +echo " Talking to Bob (Codex)..." RESP=$(a2a_send "$BOB" "say hello in one word") echo " -> $RESP" check "Bob responds" "hello" "$RESP" @@ -133,21 +121,11 @@ RESP=$(a2a_send "$CAROL" "say hello in one word") echo " -> $RESP" check "Carol responds" "hello" "$RESP" -echo " Talking to Dave (DeepAgents)..." +echo " Talking to Dave (Hermes)..." RESP=$(a2a_send "$DAVE" "say hello in one word") echo " -> $RESP" check "Dave responds" "hello" "$RESP" -echo " Talking to Eve (CrewAI)..." -RESP=$(a2a_send "$EVE" "say hello in one word") -echo " -> $RESP" -check "Eve responds" "hello" "$RESP" - -echo " Talking to Frank (AutoGen)..." -RESP=$(a2a_send "$FRANK" "say hello in one word") -echo " -> $RESP" -check "Frank responds" "hello" "$RESP" - # --- Peer discovery --- echo "" echo "--- Step 5: Peer discovery ---" @@ -157,7 +135,7 @@ peers = json.load(sys.stdin) print(f'{len(peers)} peers: {\" \".join(p.get(\"name\",\"\") for p in peers)}') " 2>/dev/null) echo " Alice sees: $R" -check "Alice sees 5 peers" "5 peers" "$R" +check "Alice sees 3 peers" "3 peers" "$R" # --- Isolation --- echo "" @@ -168,7 +146,7 @@ check "No ws-* dirs on host" "0" "$HOST_WS" # --- Cleanup --- echo "" echo "--- Step 7: Cleanup ---" -for ID in $ALICE $BOB $CAROL $DAVE $EVE $FRANK; do +for ID in $ALICE $BOB $CAROL $DAVE; do curl -s -X DELETE "$PLATFORM/workspaces/$ID" > /dev/null 2>&1 done check "Cleanup" "ok" "ok" diff --git a/tests/e2e/lib/model_slug.sh b/tests/e2e/lib/model_slug.sh index 555ce2eb..dc56b033 100755 --- a/tests/e2e/lib/model_slug.sh +++ b/tests/e2e/lib/model_slug.sh @@ -10,15 +10,6 @@ # "gpt-4o" falls through to Anthropic # default + 401, see PR #1714.) # -# langgraph → "openai:gpt-4o" (colon-form: langchain init_chat_model -# requires ":". -# Slash-form was misinterpreted as -# OpenRouter routing → fell through -# without auth, surfaced 2026-05-03 -# after the a2a-sdk v1 contract bugs -# PR #2558+#2563+#2567 cleared the -# masking layers.) -# # claude-code → auth-aware: # E2E_MINIMAX_API_KEY → "MiniMax-M2" # E2E_ANTHROPIC_API_KEY → "claude-sonnet-4-6" @@ -51,7 +42,6 @@ pick_model_slug() { fi case "$runtime" in hermes) printf 'openai/gpt-4o' ;; - langgraph) printf 'openai:gpt-4o' ;; claude-code) if [ -n "${E2E_MINIMAX_API_KEY:-}" ]; then printf 'MiniMax-M2' diff --git a/tests/e2e/test_chat_upload_e2e.sh b/tests/e2e/test_chat_upload_e2e.sh index fd45516a..75de4034 100755 --- a/tests/e2e/test_chat_upload_e2e.sh +++ b/tests/e2e/test_chat_upload_e2e.sh @@ -15,7 +15,7 @@ # Required env: # BASE default http://localhost:8080 # override to https://..staging... -# WORKSPACE_RUNTIME default langgraph (any internal runtime) +# WORKSPACE_RUNTIME default claude-code (any maintained internal runtime) # # Exit codes: # 0 upload + read-back round-trip succeeded @@ -26,7 +26,7 @@ set -uo pipefail BASE="${BASE:-http://localhost:8080}" -RUNTIME="${WORKSPACE_RUNTIME:-langgraph}" +RUNTIME="${WORKSPACE_RUNTIME:-claude-code}" PARENT="" PARENT_TOK="" @@ -49,7 +49,7 @@ trap cleanup EXIT INT TERM echo "[1/5] POST /workspaces (runtime=$RUNTIME)..." P_RESP=$(curl -sS -X POST "$BASE/workspaces" \ -H "Content-Type: application/json" \ - -d "{\"name\":\"e2e-chat-upload\",\"runtime\":\"$RUNTIME\",\"tier\":2}") + -d "{\"name\":\"e2e-chat-upload\",\"runtime\":\"$RUNTIME\",\"tier\":2,\"model\":\"sonnet\"}") PARENT=$(echo "$P_RESP" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))" 2>/dev/null) [ -n "$PARENT" ] || { echo " ✗ workspace create failed: $P_RESP"; exit 1; } echo " ✓ workspace=$PARENT" diff --git a/tests/e2e/test_comprehensive_e2e.sh b/tests/e2e/test_comprehensive_e2e.sh index 9d4247ca..619c9799 100755 --- a/tests/e2e/test_comprehensive_e2e.sh +++ b/tests/e2e/test_comprehensive_e2e.sh @@ -137,14 +137,14 @@ check "Create claude-code workspace" '"status":"provisioning"' "$R" RT_CC_ID=$(echo "$R" | jq_extract "['id']") R=$(curl -s -X POST "$BASE/workspaces" -H "Content-Type: application/json" \ - -d '{"name":"RT LangGraph","role":"Test","tier":2,"runtime":"langgraph"}') -check "Create langgraph workspace" '"status":"provisioning"' "$R" -RT_LG_ID=$(echo "$R" | jq_extract "['id']") + -d '{"name":"RT Codex","role":"Test","tier":2,"runtime":"codex"}') +check "Create codex workspace" '"status":"provisioning"' "$R" +RT_CX_ID=$(echo "$R" | jq_extract "['id']") R=$(curl -s -X POST "$BASE/workspaces" -H "Content-Type: application/json" \ - -d '{"name":"RT CrewAI","role":"Test","tier":2,"runtime":"crewai"}') -check "Create crewai workspace" '"status":"provisioning"' "$R" -RT_CR_ID=$(echo "$R" | jq_extract "['id']") + -d '{"name":"RT Hermes","role":"Test","tier":2,"runtime":"hermes"}') +check "Create hermes workspace" '"status":"provisioning"' "$R" +RT_HM_ID=$(echo "$R" | jq_extract "['id']") # Wait for containers to start (poll up to 30s for first one to appear) if command -v docker &>/dev/null; then @@ -174,8 +174,8 @@ if command -v docker &>/dev/null; then } _check_image "$RT_CC_ID" "claude-code" "claude-code uses claude-code image" - _check_image "$RT_LG_ID" "langgraph" "langgraph uses langgraph image" - _check_image "$RT_CR_ID" "crewai" "crewai uses crewai image" + _check_image "$RT_CX_ID" "codex" "codex uses codex image" + _check_image "$RT_HM_ID" "hermes" "hermes uses hermes image" else echo " SKIP: Docker not available — cannot verify container images" SKIP=$((SKIP + 3)) @@ -183,7 +183,7 @@ fi # Verify runtime in agent card after registration sleep 5 -for rt_id in $RT_CC_ID $RT_LG_ID $RT_CR_ID; do +for rt_id in $RT_CC_ID $RT_CX_ID $RT_HM_ID; do # Register so we can check agent card curl -s -X POST "$BASE/registry/register" -H "Content-Type: application/json" \ -d "{\"id\":\"$rt_id\",\"url\":\"http://localhost:19999\",\"agent_card\":{\"name\":\"Test\",\"skills\":[]}}" > /dev/null 2>&1 @@ -204,20 +204,20 @@ fi # Verify runtime change persists on restart (if provisioner supports ExecRead) # Write a new runtime to config, restart, check image changes -R=$(curl -s -X PUT "$BASE/workspaces/$RT_LG_ID/files/config.yaml" \ +R=$(curl -s -X PUT "$BASE/workspaces/$RT_CX_ID/files/config.yaml" \ -H "Content-Type: application/json" \ - -d '{"content":"name: RT LangGraph\nruntime: deepagents\nmodel: openai:gpt-4.1-mini\ntier: 2\n"}') + -d '{"content":"name: RT Codex\nruntime: openclaw\nmodel: openai:gpt-4.1-mini\ntier: 2\n"}') if echo "$R" | grep -qF "saved"; then - curl -s -X POST "$BASE/workspaces/$RT_LG_ID/restart" > /dev/null 2>&1 + curl -s -X POST "$BASE/workspaces/$RT_CX_ID/restart" > /dev/null 2>&1 # Poll up to 30s for the new container image to appear (restart can take a while) if command -v docker &>/dev/null; then - short_id="${RT_LG_ID:0:12}" + short_id="${RT_CX_ID:0:12}" for _ in 1 2 3 4 5 6; do sleep 5 actual=$(docker inspect "ws-${short_id}" --format '{{.Config.Image}}' 2>/dev/null || echo "") - if echo "$actual" | grep -qF "deepagents"; then break; fi + if echo "$actual" | grep -qF "openclaw"; then break; fi done - _check_image "$RT_LG_ID" "deepagents" "Runtime change langgraph→deepagents on restart" + _check_image "$RT_CX_ID" "openclaw" "Runtime change codex to openclaw on restart" else echo " SKIP: Docker not available" SKIP=$((SKIP + 1)) @@ -228,7 +228,7 @@ else fi # Clean up runtime test workspaces -for rt_id in $RT_CC_ID $RT_LG_ID $RT_CR_ID; do +for rt_id in $RT_CC_ID $RT_CX_ID $RT_HM_ID; do curl -s -X DELETE "$BASE/workspaces/$rt_id?confirm=true" > /dev/null 2>&1 sleep 0.3 done diff --git a/tests/e2e/test_model_slug.sh b/tests/e2e/test_model_slug.sh index 9984642e..ecfb2134 100755 --- a/tests/e2e/test_model_slug.sh +++ b/tests/e2e/test_model_slug.sh @@ -2,12 +2,10 @@ # Regression test for tests/e2e/lib/model_slug.sh. # # PR #2571 fixed a synth-E2E masking bug where MODEL_SLUG was hardcoded -# to "openai/gpt-4o" (slash-form) but langgraph's init_chat_model needs -# "openai:gpt-4o" (colon-form). Fix shipped as a per-runtime case -# statement. Without this regression test, dropping any branch of the -# case (or flipping a slug format) would silently revert behavior — the -# E2E only fails as "Could not resolve authentication method" at the -# very first message, after a successful tenant + workspace provision. +# to "openai/gpt-4o" (slash-form). Without this regression test, dropping +# any branch of the case (or flipping a slug format) would silently revert +# behavior — the E2E only fails as "Could not resolve authentication method" +# at the very first message, after a successful tenant + workspace provision. # # Each branch must FAIL the test if the dispatch behavior changes, not # just produce some non-empty string. @@ -47,7 +45,7 @@ echo # ── Per-runtime branches (the load-bearing ones for synth-E2E) ── run_test "hermes → slash-form (derive-provider.sh contract)" hermes "openai/gpt-4o" -run_test "langgraph → colon-form (init_chat_model contract)" langgraph "openai:gpt-4o" +run_test "codex → slash-form fallback" codex "openai/gpt-4o" run_test "claude-code → OAuth/default alias" claude-code "sonnet" got=$(unset E2E_MODEL_SLUG E2E_ANTHROPIC_API_KEY; E2E_MINIMAX_API_KEY="mx-test" pick_model_slug claude-code) @@ -74,8 +72,8 @@ echo echo "Test: pick_model_slug — E2E_MODEL_SLUG override" echo -got=$(E2E_MODEL_SLUG="anthropic:claude-opus-4-7" pick_model_slug langgraph) -assert_eq "override beats langgraph default" "$got" "anthropic:claude-opus-4-7" +got=$(E2E_MODEL_SLUG="anthropic:claude-opus-4-7" pick_model_slug codex) +assert_eq "override beats codex default" "$got" "anthropic:claude-opus-4-7" got=$(E2E_MODEL_SLUG="custom/whatever" pick_model_slug hermes) assert_eq "override beats hermes default" "$got" "custom/whatever" @@ -88,8 +86,8 @@ assert_eq "override beats claude-code default" "$got" "some-b # it because changing this behavior (e.g. via -v test) would silently # break the dispatch when an operator passes "" to clear an inherited # env var. -got=$(E2E_MODEL_SLUG="" pick_model_slug langgraph) -assert_eq "empty-string override falls through to dispatch" "$got" "openai:gpt-4o" +got=$(E2E_MODEL_SLUG="" pick_model_slug codex) +assert_eq "empty-string override falls through to dispatch" "$got" "openai/gpt-4o" echo echo "─────────────────────────────────────────────────" diff --git a/tests/e2e/test_notify_attachments_e2e.sh b/tests/e2e/test_notify_attachments_e2e.sh index 7aec8915..7388a943 100755 --- a/tests/e2e/test_notify_attachments_e2e.sh +++ b/tests/e2e/test_notify_attachments_e2e.sh @@ -94,10 +94,10 @@ done # model is required at the Create boundary (CTO 2026-05-22 SSOT — see # feedback_workspace_model_required_no_platform_default_dynamic_credential_intake). -# Body had no runtime → defaults to langgraph; pass the langgraph-compatible -# default that the deleted DefaultModel("") would have returned. +# Body has no runtime → defaults to claude-code; pass the matching model +# that the workspace-creation contract now requires. R=$(curl -s -X POST "$BASE/workspaces" -H "Content-Type: application/json" \ - -d '{"name":"Notify E2E","tier":1,"model":"anthropic:claude-opus-4-7"}') + -d '{"name":"Notify E2E","tier":1,"model":"sonnet"}') WSID=$(echo "$R" | python3 -c 'import json,sys;print(json.load(sys.stdin)["id"])' 2>/dev/null || true) [ -n "$WSID" ] || { echo "Failed to create workspace: $R"; exit 1; } echo "Created workspace $WSID" diff --git a/tests/e2e/test_priority_runtimes_e2e.sh b/tests/e2e/test_priority_runtimes_e2e.sh index a9b873e0..d8683ece 100755 --- a/tests/e2e/test_priority_runtimes_e2e.sh +++ b/tests/e2e/test_priority_runtimes_e2e.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# E2E test: every runtime template (8 total) works end-to-end. +# E2E test: every maintained runtime template works end-to-end. # # Self-contained happy-path smoke per runtime. Provisions a fresh # workspace, waits for status=online, sends a real A2A message, and @@ -7,13 +7,12 @@ # extraction (and ongoing template work) can't silently break any # runtime. # -# Runtimes covered: claude-code, hermes, langgraph, crewai, autogen, -# deepagents, openclaw, gemini-cli. claude-code + hermes have unique +# Runtimes covered: claude-code, codex, hermes, openclaw. +# claude-code + hermes have unique # provisioning quirks (claude-code OAuth, hermes 15-min cold-boot) # and stay first-class with their own run_ functions; the -# 5 OpenAI-backed runtimes share run_openai_runtime; gemini-cli has -# its own block (Google AI key). Each phase skips cleanly if its -# prerequisite secret is missing. +# OpenAI-backed runtimes share run_openai_runtime. Each phase skips cleanly +# if its prerequisite secret is missing. # # What this proves: # 1. Provisioning + container boot works for each runtime. @@ -296,9 +295,8 @@ print(json.dumps({ #################################################################### # Secondary runtimes — same provision/online/A2A loop, parametrized. #################################################################### -# These 5 templates (langgraph, crewai, autogen, deepagents, openclaw) -# all use OpenAI as their LLM provider in the default config and don't -# need the hermes-specific HERMES_* secret block. Skip if no key. +# Codex and OpenClaw use OpenAI as their LLM provider in this smoke and +# don't need the hermes-specific HERMES_* secret block. Skip if no key. # claude-code + hermes stay first-class above because each has unique # provisioning quirks (claude-code OAuth, hermes cold-boot tolerance); # refactoring them into this generic loop would lose those guards. @@ -361,67 +359,17 @@ print(json.dumps({ fi } -run_langgraph() { run_openai_runtime "langgraph" "langgraph"; } -run_crewai() { run_openai_runtime "crewai" "crewai"; } -run_autogen() { run_openai_runtime "autogen" "autogen"; } -run_deepagents() { run_openai_runtime "deepagents" "deepagents"; } +run_codex() { run_openai_runtime "codex" "codex"; } run_openclaw() { run_openai_runtime "openclaw" "openclaw"; } -# gemini-cli wants a Google API key, not OpenAI. Skip if absent. -run_gemini_cli() { - echo "" - echo "=== gemini-cli happy path ===" - if [ -z "${E2E_GEMINI_API_KEY:-}" ]; then - skip "E2E_GEMINI_API_KEY not set (gemini-cli needs Google AI key)" - return 0 - fi - local secrets - secrets=$(python3 -c " -import json, os -print(json.dumps({'GEMINI_API_KEY': os.environ['E2E_GEMINI_API_KEY']})) -") - local resp wsid - # model required (CTO 2026-05-22 SSOT) — gemini-cli routes via the gemini provider. - resp=$(curl -s -X POST "$BASE/workspaces" -H "Content-Type: application/json" \ - -d "{\"name\":\"Priority E2E (gemini-cli)\",\"runtime\":\"gemini-cli\",\"model\":\"gemini-2.0-flash\",\"tier\":1,\"secrets\":$secrets}") - wsid=$(echo "$resp" | python3 -c 'import json,sys;print(json.load(sys.stdin).get("id",""))') || true - if [ -z "$wsid" ]; then fail "create gemini-cli workspace" "$resp"; return 0; fi - CREATED_WSIDS+=("$wsid") - echo " workspace=$wsid" - local final - final=$(wait_for_status "$wsid" "online failed" 240) || true - if [ "$final" != "online" ]; then - fail "gemini-cli workspace reaches online" "final status: $final" - return 0 - fi - pass "gemini-cli workspace reaches online" - local token; token=$(e2e_mint_test_token "$wsid") - if [ -z "$token" ]; then fail "mint gemini-cli test token" "no token"; return 0; fi - local reply - if reply=$(send_test_prompt "$wsid" "$token"); then - if echo "$reply" | grep -q "PONG"; then - pass "gemini-cli reply contains PONG" - else - pass "gemini-cli reply non-empty (first 80 chars: ${reply:0:80})" - fi - assert_activity_logged "gemini-cli" "$wsid" "$token" - else - fail "gemini-cli reply" "${reply:-}" - fi -} - -WANT="${E2E_RUNTIMES:-claude-code hermes}" +WANT="${E2E_RUNTIMES:-claude-code codex hermes openclaw}" for r in $WANT; do case "$r" in claude-code) run_claude_code ;; + codex) run_codex ;; hermes) run_hermes ;; - langgraph) run_langgraph ;; - crewai) run_crewai ;; - autogen) run_autogen ;; - deepagents) run_deepagents ;; openclaw) run_openclaw ;; - gemini-cli) run_gemini_cli ;; - all) run_claude_code; run_hermes; run_langgraph; run_crewai; run_autogen; run_deepagents; run_openclaw; run_gemini_cli ;; + all) run_claude_code; run_codex; run_hermes; run_openclaw ;; *) echo "unknown runtime in E2E_RUNTIMES: $r" >&2; exit 2 ;; esac done diff --git a/tests/harness/seed.sh b/tests/harness/seed.sh index fdcbd672..5c8f2eec 100755 --- a/tests/harness/seed.sh +++ b/tests/harness/seed.sh @@ -27,9 +27,9 @@ create_workspace() { local tenant="$1" name="$2" tier="$3" parent="${4:-}" local body if [ -n "$parent" ]; then - body="{\"name\":\"$name\",\"tier\":$tier,\"parent_id\":\"$parent\",\"runtime\":\"langgraph\"}" + body="{\"name\":\"$name\",\"tier\":$tier,\"parent_id\":\"$parent\",\"runtime\":\"claude-code\",\"model\":\"sonnet\"}" else - body="{\"name\":\"$name\",\"tier\":$tier,\"runtime\":\"langgraph\"}" + body="{\"name\":\"$name\",\"tier\":$tier,\"runtime\":\"claude-code\",\"model\":\"sonnet\"}" fi local id if [ "$tenant" = "alpha" ]; then diff --git a/workspace-server/internal/bundle/importer.go b/workspace-server/internal/bundle/importer.go index f61c7a98..a5ba9f1b 100644 --- a/workspace-server/internal/bundle/importer.go +++ b/workspace-server/internal/bundle/importer.go @@ -60,8 +60,8 @@ func Import( // Build config files in memory for the provisioner configFiles := buildBundleConfigFiles(b) - // Extract runtime from config.yaml in the bundle - bundleRuntime := "langgraph" + // Extract runtime from config.yaml in the bundle. + bundleRuntime := "claude-code" if configYaml, ok := b.Prompts["config.yaml"]; ok { for _, line := range strings.Split(configYaml, "\n") { line = strings.TrimSpace(line) diff --git a/workspace-server/internal/handlers/a2a_proxy.go b/workspace-server/internal/handlers/a2a_proxy.go index f01bd94c..5a468eee 100644 --- a/workspace-server/internal/handlers/a2a_proxy.go +++ b/workspace-server/internal/handlers/a2a_proxy.go @@ -705,7 +705,7 @@ func (h *WorkspaceHandler) resolveAgentURL(ctx context.Context, workspaceID stri if strings.HasPrefix(agentURL, "http://127.0.0.1:") && h.provisioner != nil && platformInDocker { var wsRuntime string if err := db.DB.QueryRowContext(ctx, - `SELECT COALESCE(runtime, 'langgraph') FROM workspaces WHERE id = $1`, + `SELECT COALESCE(runtime, 'claude-code') FROM workspaces WHERE id = $1`, workspaceID, ).Scan(&wsRuntime); err != nil { log.Printf("ProxyA2A: runtime lookup before Docker URL rewrite failed for %s: %v", workspaceID, err) diff --git a/workspace-server/internal/handlers/a2a_proxy_helpers.go b/workspace-server/internal/handlers/a2a_proxy_helpers.go index 6b82b639..3c8a05e4 100644 --- a/workspace-server/internal/handlers/a2a_proxy_helpers.go +++ b/workspace-server/internal/handlers/a2a_proxy_helpers.go @@ -169,7 +169,7 @@ func (h *WorkspaceHandler) handleA2ADispatchError(ctx context.Context, workspace // canvas-chat-to-dead-workspace incident traces to exactly this gap. func (h *WorkspaceHandler) maybeMarkContainerDead(ctx context.Context, workspaceID string) bool { var wsRuntime string - db.DB.QueryRowContext(ctx, `SELECT COALESCE(runtime, 'langgraph') FROM workspaces WHERE id = $1`, workspaceID).Scan(&wsRuntime) + db.DB.QueryRowContext(ctx, `SELECT COALESCE(runtime, 'claude-code') FROM workspaces WHERE id = $1`, workspaceID).Scan(&wsRuntime) if isExternalLikeRuntime(wsRuntime) { return false } diff --git a/workspace-server/internal/handlers/a2a_proxy_test.go b/workspace-server/internal/handlers/a2a_proxy_test.go index 44dcd122..2b0d45e5 100644 --- a/workspace-server/internal/handlers/a2a_proxy_test.go +++ b/workspace-server/internal/handlers/a2a_proxy_test.go @@ -280,7 +280,7 @@ func TestProxyA2A_Upstream502_TriggersContainerDeadCheck(t *testing.T) { mock.ExpectExec("INSERT INTO activity_logs"). WillReturnResult(sqlmock.NewResult(0, 1)) // maybeMarkContainerDead's runtime lookup, then the offline-flip UPDATE. - mock.ExpectQuery(`SELECT COALESCE\(runtime, 'langgraph'\) FROM workspaces WHERE id =`). + mock.ExpectQuery(`SELECT COALESCE\(runtime, 'claude-code'\) FROM workspaces WHERE id =`). WithArgs("ws-tunnel-dead"). WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("hermes")) mock.ExpectExec(`UPDATE workspaces SET status =`). @@ -340,7 +340,7 @@ func TestProxyA2A_Upstream502_AliveAgent_PropagatesAsIs(t *testing.T) { expectBudgetCheck(mock, "ws-alive-502") mock.ExpectExec("INSERT INTO activity_logs").WillReturnResult(sqlmock.NewResult(0, 1)) // IsRunning runtime lookup runs but no UPDATE follows (running=true). - mock.ExpectQuery(`SELECT COALESCE\(runtime, 'langgraph'\) FROM workspaces WHERE id =`). + mock.ExpectQuery(`SELECT COALESCE\(runtime, 'claude-code'\) FROM workspaces WHERE id =`). WithArgs("ws-alive-502"). WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("hermes")) @@ -1954,9 +1954,9 @@ func TestMaybeMarkContainerDead_NilProvisioner(t *testing.T) { setupTestRedis(t) handler := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir()) - mock.ExpectQuery(`SELECT COALESCE\(runtime, 'langgraph'\) FROM workspaces WHERE id =`). + mock.ExpectQuery(`SELECT COALESCE\(runtime, 'claude-code'\) FROM workspaces WHERE id =`). WithArgs("ws-nilprov"). - WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("langgraph")) + WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("claude-code")) if got := handler.maybeMarkContainerDead(context.Background(), "ws-nilprov"); got { t.Error("expected false when provisioner is nil") @@ -1977,7 +1977,7 @@ func TestMaybeMarkContainerDead_CPOnly_NotRunning(t *testing.T) { cp := &fakeCPProv{running: false} handler.SetCPProvisioner(cp) - mock.ExpectQuery(`SELECT COALESCE\(runtime, 'langgraph'\) FROM workspaces WHERE id =`). + mock.ExpectQuery(`SELECT COALESCE\(runtime, 'claude-code'\) FROM workspaces WHERE id =`). WithArgs("ws-saas-dead"). WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("hermes")) mock.ExpectExec(`UPDATE workspaces SET status =`). @@ -2006,7 +2006,7 @@ func TestMaybeMarkContainerDead_CPOnly_Running(t *testing.T) { cp := &fakeCPProv{running: true} handler.SetCPProvisioner(cp) - mock.ExpectQuery(`SELECT COALESCE\(runtime, 'langgraph'\) FROM workspaces WHERE id =`). + mock.ExpectQuery(`SELECT COALESCE\(runtime, 'claude-code'\) FROM workspaces WHERE id =`). WithArgs("ws-saas-alive"). WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("hermes")) @@ -2107,7 +2107,7 @@ func TestMaybeMarkContainerDead_ExternalRuntime(t *testing.T) { setupTestRedis(t) handler := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir()) - mock.ExpectQuery(`SELECT COALESCE\(runtime, 'langgraph'\) FROM workspaces WHERE id =`). + mock.ExpectQuery(`SELECT COALESCE\(runtime, 'claude-code'\) FROM workspaces WHERE id =`). WithArgs("ws-ext"). WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("external")) diff --git a/workspace-server/internal/handlers/discovery.go b/workspace-server/internal/handlers/discovery.go index 9269290e..c34752c1 100644 --- a/workspace-server/internal/handlers/discovery.go +++ b/workspace-server/internal/handlers/discovery.go @@ -128,7 +128,7 @@ func discoverHostPeer(ctx context.Context, c *gin.Context, targetID string) { // of `callerID` and writes the JSON response (or an appropriate 404/503 error). func discoverWorkspacePeer(ctx context.Context, c *gin.Context, callerID, targetID string) { var wsName, wsRuntime string - db.DB.QueryRowContext(ctx, `SELECT COALESCE(name,''), COALESCE(runtime,'langgraph') FROM workspaces WHERE id = $1`, targetID).Scan(&wsName, &wsRuntime) + db.DB.QueryRowContext(ctx, `SELECT COALESCE(name,''), COALESCE(runtime,'claude-code') FROM workspaces WHERE id = $1`, targetID).Scan(&wsName, &wsRuntime) // External workspaces: return their registered URL. // Rewrite 127.0.0.1/localhost → host.docker.internal ONLY when the @@ -180,7 +180,7 @@ func writeExternalWorkspaceURL(ctx context.Context, c *gin.Context, callerID, ta } outURL := wsURL var callerRuntime string - db.DB.QueryRowContext(ctx, `SELECT COALESCE(runtime,'langgraph') FROM workspaces WHERE id = $1`, callerID).Scan(&callerRuntime) + db.DB.QueryRowContext(ctx, `SELECT COALESCE(runtime,'claude-code') FROM workspaces WHERE id = $1`, callerID).Scan(&callerRuntime) if !isExternalLikeRuntime(callerRuntime) { outURL = strings.Replace(outURL, "127.0.0.1", "host.docker.internal", 1) outURL = strings.Replace(outURL, "localhost", "host.docker.internal", 1) @@ -203,8 +203,8 @@ func writeExternalWorkspaceURL(ctx context.Context, c *gin.Context, callerID, ta // Peers handles GET /registry/:id/peers // -// Optional ``?q=`` filters the result by case-insensitive -// substring match against ``name`` or ``role`` (#1038). Filtering is done +// Optional “?q=“ filters the result by case-insensitive +// substring match against “name“ or “role“ (#1038). Filtering is done // in Go after the DB read — keeps the SQL identical to the no-filter path // (no injection risk, no DB-driver collation surprises) at the cost of // loading the unfiltered set first. Acceptable because the peer set is @@ -301,8 +301,8 @@ func (h *DiscoveryHandler) Peers(c *gin.Context) { c.JSON(http.StatusOK, peers) } -// excludeSelfFromPeers strips any peer entry whose ``id`` equals -// ``workspaceID`` (the caller's own row). Final-line defense for #383 +// excludeSelfFromPeers strips any peer entry whose “id“ equals +// “workspaceID“ (the caller's own row). Final-line defense for #383 // (self-delegation 400-loop on external workspaces): a peer-list that // includes the requester's own row is the root mechanism by which an // agent ends up delegating to itself. The pre-DB filters in Peers @@ -337,8 +337,8 @@ func filterPeersByQuery(peers []map[string]interface{}, q string) []map[string]i needle := strings.ToLower(q) out := make([]map[string]interface{}, 0, len(peers)) for _, p := range peers { - name, _ := p["name"].(string) // nil → "" — safe on empty-role rows - role, _ := p["role"].(string) // nil → "" — queryPeerMaps sets nil when DB role is empty + name, _ := p["name"].(string) // nil → "" — safe on empty-role rows + role, _ := p["role"].(string) // nil → "" — queryPeerMaps sets nil when DB role is empty if strings.Contains(strings.ToLower(name), needle) || strings.Contains(strings.ToLower(role), needle) { out = append(out, p) diff --git a/workspace-server/internal/handlers/discovery_test.go b/workspace-server/internal/handlers/discovery_test.go index 62f45a8b..1b779306 100644 --- a/workspace-server/internal/handlers/discovery_test.go +++ b/workspace-server/internal/handlers/discovery_test.go @@ -404,11 +404,11 @@ func TestPeers_Q_NoMatches_RawBodyIsArrayNotNull(t *testing.T) { // role and asserts no panic + correct filter behaviour. func TestFilterPeersByQuery_NilRoleRegression(t *testing.T) { cases := []struct { - name string - peers []map[string]interface{} - q string - wantLen int - wantIDs []string + name string + peers []map[string]interface{} + q string + wantLen int + wantIDs []string }{ { name: "nil role matches on name", @@ -555,7 +555,7 @@ func TestDiscoverWorkspacePeer_Online(t *testing.T) { setupTestRedis(t) // name/runtime lookup → non-external - mock.ExpectQuery(`SELECT COALESCE\(name,''\), COALESCE\(runtime,'langgraph'\) FROM workspaces WHERE id =`). + mock.ExpectQuery(`SELECT COALESCE\(name,''\), COALESCE\(runtime,'claude-code'\) FROM workspaces WHERE id =`). WithArgs("ws-online"). WillReturnRows(sqlmock.NewRows([]string{"name", "runtime"}).AddRow("Target", "langgraph")) // No cached internal URL → DB status lookup → online @@ -583,7 +583,7 @@ func TestDiscoverWorkspacePeer_NotFound(t *testing.T) { mock := setupTestDB(t) setupTestRedis(t) - mock.ExpectQuery(`SELECT COALESCE\(name,''\), COALESCE\(runtime,'langgraph'\) FROM workspaces WHERE id =`). + mock.ExpectQuery(`SELECT COALESCE\(name,''\), COALESCE\(runtime,'claude-code'\) FROM workspaces WHERE id =`). WithArgs("ws-missing"). WillReturnRows(sqlmock.NewRows([]string{"name", "runtime"}).AddRow("", "langgraph")) mock.ExpectQuery(`SELECT status FROM workspaces WHERE id =`). @@ -605,14 +605,14 @@ func TestDiscoverWorkspacePeer_ExternalRuntime_HandledByExternalURL(t *testing.T mock := setupTestDB(t) setupTestRedis(t) - mock.ExpectQuery(`SELECT COALESCE\(name,''\), COALESCE\(runtime,'langgraph'\) FROM workspaces WHERE id =`). + mock.ExpectQuery(`SELECT COALESCE\(name,''\), COALESCE\(runtime,'claude-code'\) FROM workspaces WHERE id =`). WithArgs("ws-ext"). WillReturnRows(sqlmock.NewRows([]string{"name", "runtime"}).AddRow("Ext", "external")) // writeExternalWorkspaceURL's two queries mock.ExpectQuery(`SELECT COALESCE\(url,''\) FROM workspaces WHERE id =`). WithArgs("ws-ext"). WillReturnRows(sqlmock.NewRows([]string{"url"}).AddRow("http://external.example")) - mock.ExpectQuery(`SELECT COALESCE\(runtime,'langgraph'\) FROM workspaces WHERE id =`). + mock.ExpectQuery(`SELECT COALESCE\(runtime,'claude-code'\) FROM workspaces WHERE id =`). WithArgs("ws-caller"). WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("external")) @@ -630,7 +630,7 @@ func TestDiscoverWorkspacePeer_CachedInternalURLHit(t *testing.T) { mock := setupTestDB(t) mr := setupTestRedis(t) - mock.ExpectQuery(`SELECT COALESCE\(name,''\), COALESCE\(runtime,'langgraph'\) FROM workspaces WHERE id =`). + mock.ExpectQuery(`SELECT COALESCE\(name,''\), COALESCE\(runtime,'claude-code'\) FROM workspaces WHERE id =`). WithArgs("ws-cached"). WillReturnRows(sqlmock.NewRows([]string{"name", "runtime"}).AddRow("Cached", "langgraph")) mr.Set("ws:ws-cached:internal_url", "http://ws-cached:8000") @@ -654,7 +654,7 @@ func TestDiscoverWorkspacePeer_NotReachable(t *testing.T) { mock := setupTestDB(t) setupTestRedis(t) - mock.ExpectQuery(`SELECT COALESCE\(name,''\), COALESCE\(runtime,'langgraph'\) FROM workspaces WHERE id =`). + mock.ExpectQuery(`SELECT COALESCE\(name,''\), COALESCE\(runtime,'claude-code'\) FROM workspaces WHERE id =`). WithArgs("ws-paused"). WillReturnRows(sqlmock.NewRows([]string{"name", "runtime"}).AddRow("Paused", "langgraph")) mock.ExpectQuery(`SELECT status FROM workspaces WHERE id =`). @@ -681,9 +681,9 @@ func TestWriteExternalWorkspaceURL_Success(t *testing.T) { mock.ExpectQuery(`SELECT COALESCE\(url,''\) FROM workspaces WHERE id =`). WithArgs("ws-ext"). WillReturnRows(sqlmock.NewRows([]string{"url"}).AddRow("http://external.example/a2a")) - mock.ExpectQuery(`SELECT COALESCE\(runtime,'langgraph'\) FROM workspaces WHERE id =`). + mock.ExpectQuery(`SELECT COALESCE\(runtime,'claude-code'\) FROM workspaces WHERE id =`). WithArgs("ws-caller"). - WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("langgraph")) + WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("claude-code")) w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) @@ -731,9 +731,9 @@ func TestWriteExternalWorkspaceURL_RewritesLocalhostForDockerCaller(t *testing.T WithArgs("ws-ext"). WillReturnRows(sqlmock.NewRows([]string{"url"}).AddRow("http://127.0.0.1:8000/a2a")) // non-external caller runtime → rewrite enabled - mock.ExpectQuery(`SELECT COALESCE\(runtime,'langgraph'\) FROM workspaces WHERE id =`). + mock.ExpectQuery(`SELECT COALESCE\(runtime,'claude-code'\) FROM workspaces WHERE id =`). WithArgs("ws-caller"). - WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("langgraph")) + WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("claude-code")) w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) @@ -818,9 +818,9 @@ func TestWriteExternalWorkspaceURL_RejectsMetadataIPURL(t *testing.T) { WillReturnRows(sqlmock.NewRows([]string{"url"}). AddRow("http://169.254.169.254/computeMetadata/v1/")) // callerRuntime lookup happens before isSafeURL — must mock it. - mock.ExpectQuery(`SELECT COALESCE\(runtime,'langgraph'\) FROM workspaces WHERE id =`). + mock.ExpectQuery(`SELECT COALESCE\(runtime,'claude-code'\) FROM workspaces WHERE id =`). WithArgs("ws-caller"). - WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("langgraph")) + WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("claude-code")) w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) diff --git a/workspace-server/internal/handlers/handlers_additional_test.go b/workspace-server/internal/handlers/handlers_additional_test.go index 9f8c939f..dceab6c0 100644 --- a/workspace-server/internal/handlers/handlers_additional_test.go +++ b/workspace-server/internal/handlers/handlers_additional_test.go @@ -33,9 +33,11 @@ func TestWorkspaceCreate_WithParentID(t *testing.T) { // Default tier is 3 (Privileged) — see workspace.go create-handler comment. // delivery_mode defaults to "push" when payload omits it (#2339). mock.ExpectExec("INSERT INTO workspaces"). - WithArgs(sqlmock.AnyArg(), "Child Agent", nil, 3, "langgraph", &parentID, nil, "none", (*int64)(nil), models.DefaultMaxConcurrentTasks, "push"). + WithArgs(sqlmock.AnyArg(), "Child Agent", nil, 3, "claude-code", &parentID, nil, "none", (*int64)(nil), models.DefaultMaxConcurrentTasks, "push"). WillReturnResult(sqlmock.NewResult(0, 1)) mock.ExpectCommit() + mock.ExpectExec("INSERT INTO workspace_secrets"). + WillReturnResult(sqlmock.NewResult(0, 1)) mock.ExpectExec("INSERT INTO canvas_layouts"). WithArgs(sqlmock.AnyArg(), float64(0), float64(0)). WillReturnResult(sqlmock.NewResult(0, 1)) @@ -104,7 +106,7 @@ func TestWorkspaceCreate_MissingName(t *testing.T) { w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) - body := `{"tier":1,"runtime":"langgraph"}` + body := `{"tier":1,"runtime":"claude-code"}` c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body)) c.Request.Header.Set("Content-Type", "application/json") @@ -242,7 +244,7 @@ func TestWorkspaceList_WithData(t *testing.T) { } rows := sqlmock.NewRows(columns). AddRow("ws-1", "Agent One", "worker", 1, "online", []byte(`{"name":"agent1"}`), "http://localhost:8001", - nil, 3, 1, 0.02, "", 7200, "processing", "langgraph", "", 10.0, 20.0, false, nil, int64(0), false, true, []byte(`{}`)). + nil, 3, 1, 0.02, "", 7200, "processing", "claude-code", "", 10.0, 20.0, false, nil, int64(0), false, true, []byte(`{}`)). AddRow("ws-2", "Agent Two", "", 2, "degraded", []byte("null"), "", nil, 0, 1, 0.6, "timeout", 100, "", "claude-code", "", 50.0, 60.0, true, nil, int64(0), false, true, []byte(`{}`)) @@ -585,7 +587,7 @@ func TestDiscover_TargetOffline(t *testing.T) { // Name + runtime lookup (discovery now queries both) mock.ExpectQuery("SELECT COALESCE"). WithArgs("ws-off"). - WillReturnRows(sqlmock.NewRows([]string{"name", "runtime"}).AddRow("Offline Agent", "langgraph")) + WillReturnRows(sqlmock.NewRows([]string{"name", "runtime"}).AddRow("Offline Agent", "claude-code")) // No cached internal URL → falls to DB status check → offline mock.ExpectQuery("SELECT status FROM workspaces WHERE id ="). @@ -917,7 +919,7 @@ func TestRestart_ParentPaused(t *testing.T) { mock.ExpectQuery("SELECT status, name, tier"). WithArgs("dddddddd-0001-0000-0000-000000000000"). WillReturnRows(sqlmock.NewRows([]string{"status", "name", "tier", "runtime"}). - AddRow("offline", "Child Agent", 1, "langgraph")) + AddRow("offline", "Child Agent", 1, "claude-code")) // isParentPaused: get parent_id mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id"). @@ -960,7 +962,7 @@ func TestRestart_ProvisionerNil(t *testing.T) { mock.ExpectQuery("SELECT status, name, tier"). WithArgs("ws-noprov"). WillReturnRows(sqlmock.NewRows([]string{"status", "name", "tier", "runtime"}). - AddRow("offline", "Agent", 1, "langgraph")) + AddRow("offline", "Agent", 1, "claude-code")) // isParentPaused: no parent mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id"). @@ -1195,7 +1197,7 @@ func TestResume_ProvisionerNil(t *testing.T) { mock.ExpectQuery("SELECT name, tier"). WithArgs("ws-resume-noprov"). WillReturnRows(sqlmock.NewRows([]string{"name", "tier", "runtime"}). - AddRow("Paused Agent", 1, "langgraph")) + AddRow("Paused Agent", 1, "claude-code")) w := httptest.NewRecorder() c, _ := gin.CreateTestContext(w) diff --git a/workspace-server/internal/handlers/handlers_test.go b/workspace-server/internal/handlers/handlers_test.go index 04d59693..3c82c5f7 100644 --- a/workspace-server/internal/handlers/handlers_test.go +++ b/workspace-server/internal/handlers/handlers_test.go @@ -368,11 +368,13 @@ func TestWorkspaceCreate(t *testing.T) { // Default tier is 3 (Privileged) — see workspace.go create-handler comment. // delivery_mode defaults to "push" when payload omits it (#2339). mock.ExpectExec("INSERT INTO workspaces"). - WithArgs(sqlmock.AnyArg(), "Test Agent", nil, 3, "langgraph", (*string)(nil), nil, "none", (*int64)(nil), models.DefaultMaxConcurrentTasks, "push"). + WithArgs(sqlmock.AnyArg(), "Test Agent", nil, 3, "claude-code", (*string)(nil), nil, "none", (*int64)(nil), models.DefaultMaxConcurrentTasks, "push"). WillReturnResult(sqlmock.NewResult(0, 1)) // Expect transaction commit (no secrets in this payload) mock.ExpectCommit() + mock.ExpectExec("INSERT INTO workspace_secrets"). + WillReturnResult(sqlmock.NewResult(0, 1)) // Expect canvas_layouts INSERT mock.ExpectExec("INSERT INTO canvas_layouts"). @@ -389,8 +391,8 @@ func TestWorkspaceCreate(t *testing.T) { // Note: model is now required at the Create boundary (CTO 2026-05-22 // SSOT directive — see feedback_workspace_model_required_no_platform_default_dynamic_credential_intake // and TestCreate_ModelRequired_Returns422). This test happens to take - // the bare-defaults path (no template, no runtime → langgraph), so - // the body must declare an explicit model. Using a langgraph-compatible + // the bare-defaults path (no template, no runtime → claude-code), so + // the body must declare an explicit model. Using a claude-code-compatible // id; the test doesn't exercise model semantics beyond presence. body := `{"name":"Test Agent","model":"anthropic:claude-opus-4-7","canvas":{"x":100,"y":200}}` c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body)) @@ -462,7 +464,7 @@ func TestWorkspaceList(t *testing.T) { AddRow("ws-1", "Agent One", "worker", 1, "online", []byte("null"), "http://localhost:8001", nil, 0, 1, 0.0, "", 100, "", "claude-code", "", 10.0, 20.0, false, nil, int64(0), false, true, []byte(`{}`)). AddRow("ws-2", "Agent Two", "manager", 2, "provisioning", []byte("null"), "", - nil, 0, 1, 0.0, "", 0, "", "langgraph", "", 50.0, 60.0, false, nil, int64(0), false, true, []byte(`{}`)) + nil, 0, 1, 0.0, "", 0, "", "claude-code", "", 50.0, 60.0, false, nil, int64(0), false, true, []byte(`{}`)) mock.ExpectQuery("SELECT w.id, w.name"). WillReturnRows(rows) @@ -1182,7 +1184,7 @@ func TestWorkspaceGet_CurrentTask(t *testing.T) { WithArgs("dddddddd-0004-0000-0000-000000000000"). WillReturnRows(sqlmock.NewRows(columns).AddRow( "dddddddd-0004-0000-0000-000000000000", "Task Worker", "worker", 1, "online", []byte("null"), "http://localhost:9000", - nil, 2, 1, 0.0, "", 300, "Analyzing document", "langgraph", "", 10.0, 20.0, false, + nil, 2, 1, 0.0, "", 300, "Analyzing document", "claude-code", "", 10.0, 20.0, false, nil, int64(0), false, true, []byte(`{}`), )) diff --git a/workspace-server/internal/handlers/org_import.go b/workspace-server/internal/handlers/org_import.go index 14522c17..9735cca4 100644 --- a/workspace-server/internal/handlers/org_import.go +++ b/workspace-server/internal/handlers/org_import.go @@ -62,7 +62,7 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX runtime = defaults.Runtime } if runtime == "" { - runtime = "langgraph" + runtime = "claude-code" } model := ws.Model if model == "" { diff --git a/workspace-server/internal/handlers/plugins_install_eic_test.go b/workspace-server/internal/handlers/plugins_install_eic_test.go index 17ec1651..b3da22d5 100644 --- a/workspace-server/internal/handlers/plugins_install_eic_test.go +++ b/workspace-server/internal/handlers/plugins_install_eic_test.go @@ -119,7 +119,6 @@ func TestHostPluginPath_PerRuntime(t *testing.T) { }{ {"claude-code", "browser-automation", "/configs/plugins/browser-automation"}, {"hermes", "browser-automation", "/home/ubuntu/.hermes/plugins/browser-automation"}, - {"langgraph", "browser-automation", "/opt/configs/plugins/browser-automation"}, // Unknown / empty runtime falls back to /configs (containerized // user-data layout) so a future runtime added to workspaces table // without a workspaceFilePathPrefix entry doesn't blow up the diff --git a/workspace-server/internal/handlers/restart_template.go b/workspace-server/internal/handlers/restart_template.go index c60d0a5f..08e9c570 100644 --- a/workspace-server/internal/handlers/restart_template.go +++ b/workspace-server/internal/handlers/restart_template.go @@ -89,9 +89,9 @@ func resolveRestartTemplate(configsDir, wsName, dbRuntime string, body restartTe // host, load an arbitrary host directory as the workspace template. // // sanitizeRuntime applies an allowlist of known runtimes; any unknown - // value (including traversal strings) is remapped to "langgraph". The + // value (including traversal strings) is remapped to "claude-code". The // attacker cannot choose an arbitrary host path — they can at most - // trigger application of the langgraph-default template. + // trigger application of the claude-code-default template. if body.ApplyTemplate && dbRuntime != "" { safeRuntime := sanitizeRuntime(dbRuntime) runtimeTemplate := filepath.Join(configsDir, safeRuntime+"-default") diff --git a/workspace-server/internal/handlers/template_files_eic.go b/workspace-server/internal/handlers/template_files_eic.go index 8ef8028d..cf261141 100644 --- a/workspace-server/internal/handlers/template_files_eic.go +++ b/workspace-server/internal/handlers/template_files_eic.go @@ -48,13 +48,10 @@ import ( var workspaceFilePathPrefix = map[string]string{ "hermes": "/home/ubuntu/.hermes", "claude-code": "/configs", - "langgraph": "/opt/configs", "external": "/opt/configs", // Default for unknown / future runtimes is /configs — matches the - // containerized user-data layout. The `langgraph` / `external` - // entries pre-date the unified user-data path and are retained - // until a migration audit confirms what the running tenants of - // those runtimes actually have on disk. + // containerized user-data layout. The `external` entry pre-dates the + // unified user-data path and does not map to a spawned runtime. } // resolveWorkspaceFilePath translates (runtime, root, relPath) into an diff --git a/workspace-server/internal/handlers/template_files_eic_test.go b/workspace-server/internal/handlers/template_files_eic_test.go index 2d30422c..a671ce59 100644 --- a/workspace-server/internal/handlers/template_files_eic_test.go +++ b/workspace-server/internal/handlers/template_files_eic_test.go @@ -32,7 +32,6 @@ func TestResolveWorkspaceFilePath_RuntimeIndirection(t *testing.T) { // and would 500 with EACCES on save (the bug that motivated this gate). {"claude-code", "/configs", "config.yaml", "/configs/config.yaml"}, {"CLAUDE-CODE", "/configs", "config.yaml", "/configs/config.yaml"}, // case-insensitive - {"langgraph", "/configs", "config.yaml", "/opt/configs/config.yaml"}, {"external", "/configs", "skills.json", "/opt/configs/skills.json"}, {"", "/configs", "config.yaml", "/configs/config.yaml"}, // empty runtime → default {"unknown", "/configs", "config.yaml", "/configs/config.yaml"}, // unknown → default @@ -70,7 +69,7 @@ func TestResolveWorkspaceFilePath_LiteralRoots(t *testing.T) { // universal Linux path, not a managed-config indirection. {"hermes", "/home", "ubuntu/.bashrc", "/home/ubuntu/.bashrc"}, {"claude-code", "/home", "ubuntu/notes.md", "/home/ubuntu/notes.md"}, - {"langgraph", "/home", "ubuntu/x", "/home/ubuntu/x"}, + {"codex", "/home", "ubuntu/x", "/home/ubuntu/x"}, // /workspace and /plugins are also literal — runtime is ignored. {"hermes", "/workspace", "src/main.go", "/workspace/src/main.go"}, {"claude-code", "/plugins", "p/manifest.yaml", "/plugins/p/manifest.yaml"}, diff --git a/workspace-server/internal/handlers/workspace.go b/workspace-server/internal/handlers/workspace.go index 6b76a15f..7780b15d 100644 --- a/workspace-server/internal/handlers/workspace.go +++ b/workspace-server/internal/handlers/workspace.go @@ -273,12 +273,12 @@ func (h *WorkspaceHandler) Create(c *gin.Context) { // runtimeExplicitlyRequested is true when the caller expressed intent for // a SPECIFIC runtime — either by passing `runtime` directly, or by naming // a `template` (a template encodes a runtime). When true, we must NOT - // silently fall back to langgraph if that intent can't be honored: that + // silently fall back to the default runtime if that intent can't be honored: that // is the molecule-controlplane#188 / #184 contract violation (caller asks - // for codex/claude-code, gets a langgraph workspace, 201, no error — a + // for codex/hermes/openclaw, gets a default-runtime workspace, 201, no error — a // false success). #188 mandates fail-closed (error+notify) on mismatch, // not an advisory degrade. The legitimate "no template, no runtime → - // langgraph default" path (bare {"name":...}) is unaffected. + // default-runtime path (bare {"name":...}) is unaffected. runtimeExplicitlyRequested := payload.Runtime != "" || payload.Template != "" templateRuntimeResolved := payload.Runtime != "" if payload.Template != "" && (payload.Runtime == "" || payload.Model == "") { @@ -332,15 +332,15 @@ func (h *WorkspaceHandler) Create(c *gin.Context) { // intent for a specific runtime (passed `runtime`, or named a `template`) // but we could NOT resolve a concrete runtime from it (template's // config.yaml unreadable, or it has no `runtime:` key), DO NOT silently - // substitute langgraph and return 201 — that is the silent contract + // substitute the default runtime and return 201 — that is the silent contract // violation that produced 5/5 wrong workspaces and a false codex E2E pass. // Return 422 so the caller learns the requested runtime was not honored. // The platform-side CP fix (controlplane#188) is the sibling gate; this // closes the ws-server `Create` boundary the product UI actually hits. if payload.Runtime == "" && runtimeExplicitlyRequested && !templateRuntimeResolved { - log.Printf("Create: FAIL-CLOSED (controlplane#188) — template=%q requested but runtime could not be resolved; refusing silent langgraph fallback", payload.Template) + log.Printf("Create: FAIL-CLOSED (controlplane#188) — template=%q requested but runtime could not be resolved; refusing silent default-runtime fallback", payload.Template) c.JSON(http.StatusUnprocessableEntity, gin.H{ - "error": "runtime could not be resolved from the requested template; refusing to silently provision langgraph (controlplane#188). Pass an explicit \"runtime\", or use a template whose config.yaml declares one.", + "error": "runtime could not be resolved from the requested template; refusing to silently provision the default runtime (controlplane#188). Pass an explicit \"runtime\", or use a template whose config.yaml declares one.", "template": payload.Template, "code": "RUNTIME_UNRESOLVED", }) @@ -348,8 +348,8 @@ func (h *WorkspaceHandler) Create(c *gin.Context) { } if payload.Runtime == "" { // Legitimate default path: no template AND no runtime requested - // (bare {"name":...}) — langgraph is the intended default here. - payload.Runtime = "langgraph" + // (bare {"name":...}) — claude-code is the intended default here. + payload.Runtime = "claude-code" } // SSOT (CTO 2026-05-22, feedback_workspace_model_required_no_platform_default_dynamic_credential_intake): @@ -851,7 +851,7 @@ const workspaceListQuery = ` w.parent_id, w.active_tasks, COALESCE(w.max_concurrent_tasks, 1), w.last_error_rate, COALESCE(w.last_sample_error, ''), w.uptime_seconds, - COALESCE(w.current_task, ''), COALESCE(w.runtime, 'langgraph'), + COALESCE(w.current_task, ''), COALESCE(w.runtime, 'claude-code'), COALESCE(w.workspace_dir, ''), COALESCE(cl.x, 0), COALESCE(cl.y, 0), COALESCE(cl.collapsed, false), w.budget_limit, COALESCE(w.monthly_spend, 0), @@ -913,7 +913,7 @@ func (h *WorkspaceHandler) Get(c *gin.Context) { w.parent_id, w.active_tasks, COALESCE(w.max_concurrent_tasks, 1), w.last_error_rate, COALESCE(w.last_sample_error, ''), w.uptime_seconds, - COALESCE(w.current_task, ''), COALESCE(w.runtime, 'langgraph'), + COALESCE(w.current_task, ''), COALESCE(w.runtime, 'claude-code'), COALESCE(w.workspace_dir, ''), COALESCE(cl.x, 0), COALESCE(cl.y, 0), COALESCE(cl.collapsed, false), w.budget_limit, COALESCE(w.monthly_spend, 0), diff --git a/workspace-server/internal/handlers/workspace_budget_test.go b/workspace-server/internal/handlers/workspace_budget_test.go index 3ffb2560..1ff8cb87 100644 --- a/workspace-server/internal/handlers/workspace_budget_test.go +++ b/workspace-server/internal/handlers/workspace_budget_test.go @@ -51,7 +51,7 @@ func TestWorkspaceBudget_Get_NilLimit(t *testing.T) { WillReturnRows(sqlmock.NewRows(wsColumns). AddRow("dddddddd-0005-0000-0000-000000000000", "Free Agent", "worker", 1, "online", []byte(`{}`), "http://localhost:9001", - nil, 0, 1, 0.0, "", 0, "", "langgraph", "", + nil, 0, 1, 0.0, "", 0, "", "claude-code", "", 0.0, 0.0, false, nil, // budget_limit NULL 0, // monthly_spend 0 @@ -97,7 +97,7 @@ func TestWorkspaceBudget_Get_WithLimit(t *testing.T) { WillReturnRows(sqlmock.NewRows(wsColumns). AddRow("dddddddd-0006-0000-0000-000000000000", "Capped Agent", "worker", 1, "online", []byte(`{}`), "http://localhost:9002", - nil, 0, 1, 0.0, "", 0, "", "langgraph", "", + nil, 0, 1, 0.0, "", 0, "", "claude-code", "", 0.0, 0.0, false, int64(500), // budget_limit = $5.00 in DB int64(123), // monthly_spend = $1.23 in DB @@ -151,7 +151,7 @@ func TestWorkspaceBudget_Create_WithLimit(t *testing.T) { "Budgeted Agent", // name nil, // role 3, // tier (default, workspace.go create-handler) - "langgraph", // runtime + "claude-code", // runtime (*string)(nil), // parent_id nil, // workspace_dir "none", // workspace_access @@ -161,6 +161,8 @@ func TestWorkspaceBudget_Create_WithLimit(t *testing.T) { ). WillReturnResult(sqlmock.NewResult(0, 1)) mock.ExpectCommit() + mock.ExpectExec("INSERT INTO workspace_secrets"). + WillReturnResult(sqlmock.NewResult(0, 1)) mock.ExpectExec("INSERT INTO canvas_layouts"). WithArgs(sqlmock.AnyArg(), float64(0), float64(0)). WillReturnResult(sqlmock.NewResult(0, 1)) diff --git a/workspace-server/internal/handlers/workspace_provision.go b/workspace-server/internal/handlers/workspace_provision.go index 2c6f4ff7..85af9914 100644 --- a/workspace-server/internal/handlers/workspace_provision.go +++ b/workspace-server/internal/handlers/workspace_provision.go @@ -515,7 +515,7 @@ func configDirName(workspaceID string) string { } // knownRuntimes is the allowlist of runtime strings the provisioner will -// accept. Unknown values are coerced to the default ("langgraph") instead +// accept. Unknown values are coerced to the default ("claude-code") instead // of being splatted into filepath.Join + config.yaml templating, which // closes both the YAML-injection vector (#241) where an attacker could // smuggle `initial_prompt: run id && curl …` through a crafted runtime diff --git a/workspace-server/internal/handlers/workspace_restart.go b/workspace-server/internal/handlers/workspace_restart.go index 8de60838..c2935707 100644 --- a/workspace-server/internal/handlers/workspace_restart.go +++ b/workspace-server/internal/handlers/workspace_restart.go @@ -212,7 +212,7 @@ func (h *WorkspaceHandler) Restart(c *gin.Context) { var status, wsName, dbRuntime string var tier int err := db.DB.QueryRowContext(ctx, - `SELECT status, name, tier, COALESCE(runtime, 'langgraph') FROM workspaces WHERE id = $1`, id, + `SELECT status, name, tier, COALESCE(runtime, 'claude-code') FROM workspaces WHERE id = $1`, id, ).Scan(&status, &wsName, &tier, &dbRuntime) if err == sql.ErrNoRows { c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"}) @@ -744,7 +744,7 @@ func (h *WorkspaceHandler) runRestartCycle(workspaceID string) { var wsName, status, dbRuntime string var tier int err := db.DB.QueryRowContext(ctx, - `SELECT name, status, tier, COALESCE(runtime, 'langgraph') FROM workspaces WHERE id = $1 AND status NOT IN ('removed', 'paused', 'hibernated')`, workspaceID, + `SELECT name, status, tier, COALESCE(runtime, 'claude-code') FROM workspaces WHERE id = $1 AND status NOT IN ('removed', 'paused', 'hibernated')`, workspaceID, ).Scan(&wsName, &status, &tier, &dbRuntime) if err != nil { return // includes paused/hibernated — don't auto-restart those @@ -897,7 +897,7 @@ func (h *WorkspaceHandler) Resume(c *gin.Context) { var wsName, dbRuntime string var tier int err := db.DB.QueryRowContext(ctx, - `SELECT name, tier, COALESCE(runtime, 'langgraph') FROM workspaces WHERE id = $1 AND status = 'paused'`, id, + `SELECT name, tier, COALESCE(runtime, 'claude-code') FROM workspaces WHERE id = $1 AND status = 'paused'`, id, ).Scan(&wsName, &tier, &dbRuntime) if err == sql.ErrNoRows { c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found or not paused"}) @@ -930,9 +930,9 @@ func (h *WorkspaceHandler) Resume(c *gin.Context) { toResume := []wsInfo{{id, wsName, dbRuntime, tier}} rows, _ := db.DB.QueryContext(ctx, `WITH RECURSIVE descendants AS ( - SELECT id, name, tier, COALESCE(runtime, 'langgraph') AS runtime FROM workspaces WHERE parent_id = $1 AND status = 'paused' + SELECT id, name, tier, COALESCE(runtime, 'claude-code') AS runtime FROM workspaces WHERE parent_id = $1 AND status = 'paused' UNION ALL - SELECT w.id, w.name, w.tier, COALESCE(w.runtime, 'langgraph') FROM workspaces w JOIN descendants d ON w.parent_id = d.id WHERE w.status = 'paused' + SELECT w.id, w.name, w.tier, COALESCE(w.runtime, 'claude-code') FROM workspaces w JOIN descendants d ON w.parent_id = d.id WHERE w.status = 'paused' ) SELECT id, name, tier, runtime FROM descendants`, id) if rows != nil { defer rows.Close() diff --git a/workspace-server/internal/handlers/workspace_test.go b/workspace-server/internal/handlers/workspace_test.go index 2c8cf577..f0a10be4 100644 --- a/workspace-server/internal/handlers/workspace_test.go +++ b/workspace-server/internal/handlers/workspace_test.go @@ -35,7 +35,7 @@ func TestWorkspaceGet_Success(t *testing.T) { WithArgs("cccccccc-0001-0000-0000-000000000000"). WillReturnRows(sqlmock.NewRows(columns). AddRow("cccccccc-0001-0000-0000-000000000000", "My Agent", "worker", 1, "online", []byte(`{"name":"test"}`), - "http://localhost:8001", nil, 2, 1, 0.05, "", 3600, "working", "langgraph", + "http://localhost:8001", nil, 2, 1, 0.05, "", 3600, "working", "claude-code", "", 10.0, 20.0, false, nil, 0, false, true, []byte(`{}`))) @@ -60,7 +60,7 @@ func TestWorkspaceGet_Success(t *testing.T) { if resp["status"] != "online" { t.Errorf("expected status 'online', got %v", resp["status"]) } - if resp["runtime"] != "langgraph" { + if resp["runtime"] != "claude-code" { t.Errorf("expected runtime 'langgraph', got %v", resp["runtime"]) } // current_task is stripped from public GET response (#955) @@ -125,7 +125,7 @@ func TestWorkspaceGet_RemovedReturns410(t *testing.T) { WithArgs(id). WillReturnRows(sqlmock.NewRows(columns). AddRow(id, "Old Agent", "worker", 1, string(models.StatusRemoved), []byte(`null`), - "", nil, 0, 1, 0.0, "", 0, "", "langgraph", + "", nil, 0, 1, 0.0, "", 0, "", "claude-code", "", 0.0, 0.0, false, nil, 0, false, true, []byte(`{}`))) mock.ExpectQuery(`SELECT updated_at FROM workspaces`). @@ -189,7 +189,7 @@ func TestWorkspaceGet_RemovedReturns410WithNullRemovedAtOnTimestampFetchFailure( WithArgs(id). WillReturnRows(sqlmock.NewRows(columns). AddRow(id, "Vanished", "worker", 1, string(models.StatusRemoved), []byte(`null`), - "", nil, 0, 1, 0.0, "", 0, "", "langgraph", + "", nil, 0, 1, 0.0, "", 0, "", "claude-code", "", 0.0, 0.0, false, nil, 0, false, true, []byte(`{}`))) // Simulate the row vanishing between the two queries. @@ -252,7 +252,7 @@ func TestWorkspaceGet_RemovedWithIncludeQueryReturns200(t *testing.T) { WithArgs(id). WillReturnRows(sqlmock.NewRows(columns). AddRow(id, "Audit Agent", "worker", 1, string(models.StatusRemoved), []byte(`null`), - "", nil, 0, 1, 0.0, "", 0, "", "langgraph", + "", nil, 0, 1, 0.0, "", 0, "", "claude-code", "", 0.0, 0.0, false, nil, 0, false, true, []byte(`{}`))) // last_outbound_at follow-up query (existing path) @@ -342,7 +342,7 @@ func TestWorkspaceCreate_DBInsertError(t *testing.T) { // Transaction begins, workspace INSERT fails, transaction is rolled back. mock.ExpectBegin() mock.ExpectExec("INSERT INTO workspaces"). - WithArgs(sqlmock.AnyArg(), "Failing Agent", nil, 3, "langgraph", (*string)(nil), nil, "none", (*int64)(nil), models.DefaultMaxConcurrentTasks, "push"). + WithArgs(sqlmock.AnyArg(), "Failing Agent", nil, 3, "claude-code", (*string)(nil), nil, "none", (*int64)(nil), models.DefaultMaxConcurrentTasks, "push"). WillReturnError(sql.ErrConnDone) mock.ExpectRollback() @@ -373,11 +373,13 @@ func TestWorkspaceCreate_DefaultsApplied(t *testing.T) { // Transaction wraps the workspace INSERT (no secrets in this request). mock.ExpectBegin() // Expect workspace INSERT with defaulted tier=3 (Privileged — the - // handler default in workspace.go), runtime="langgraph" + // handler default in workspace.go), runtime="claude-code" mock.ExpectExec("INSERT INTO workspaces"). - WithArgs(sqlmock.AnyArg(), "Default Agent", nil, 3, "langgraph", (*string)(nil), nil, "none", (*int64)(nil), models.DefaultMaxConcurrentTasks, "push"). + WithArgs(sqlmock.AnyArg(), "Default Agent", nil, 3, "claude-code", (*string)(nil), nil, "none", (*int64)(nil), models.DefaultMaxConcurrentTasks, "push"). WillReturnResult(sqlmock.NewResult(0, 1)) mock.ExpectCommit() + mock.ExpectExec("INSERT INTO workspace_secrets"). + WillReturnResult(sqlmock.NewResult(0, 1)) // Expect canvas_layouts INSERT (x=0, y=0 — defaults) mock.ExpectExec("INSERT INTO canvas_layouts"). @@ -1429,7 +1431,7 @@ func TestWorkspaceGet_FinancialFieldsStripped(t *testing.T) { WithArgs("cccccccc-0010-0000-0000-000000000000"). WillReturnRows(sqlmock.NewRows(columns). AddRow("cccccccc-0010-0000-0000-000000000000", "Finance Test", "worker", 1, "online", []byte(`{}`), - "http://localhost:9001", nil, 0, 1, 0.0, "", 0, "", "langgraph", + "http://localhost:9001", nil, 0, 1, 0.0, "", 0, "", "claude-code", "", 0.0, 0.0, false, int64(50000), int64(12500), false, true, []byte(`{}`))) // budget_limit=500 USD, spend=125 USD @@ -1489,7 +1491,7 @@ func TestWorkspaceGet_SensitiveFieldsStripped(t *testing.T) { "panic: internal error at /secret/path.go:42", 100, "Analyzing customer PII for the Q4 report", - "langgraph", + "claude-code", "/home/user/secret-projects/client-work", 0.0, 0.0, false, nil, 0, false, true, []byte(`{}`))) @@ -1679,7 +1681,7 @@ func TestWorkspaceCreate_TemplateDefaultsLegacyTopLevelModel(t *testing.T) { } cfg := []byte(`name: Legacy Agent tier: 1 -runtime: langgraph +runtime: hermes model: anthropic:claude-sonnet-4-5 `) if err := os.WriteFile(filepath.Join(templateDir, "config.yaml"), cfg, 0o644); err != nil { @@ -1695,10 +1697,12 @@ model: anthropic:claude-sonnet-4-5 // this assertion should flip back to 1. mock.ExpectExec("INSERT INTO workspaces"). WithArgs( - sqlmock.AnyArg(), "Legacy Agent", nil, 3, "langgraph", + sqlmock.AnyArg(), "Legacy Agent", nil, 3, "hermes", (*string)(nil), nil, "none", (*int64)(nil), models.DefaultMaxConcurrentTasks, "push"). WillReturnResult(sqlmock.NewResult(0, 1)) mock.ExpectCommit() + mock.ExpectExec("INSERT INTO workspace_secrets"). + WillReturnResult(sqlmock.NewResult(0, 1)) mock.ExpectExec("INSERT INTO canvas_layouts"). WithArgs(sqlmock.AnyArg(), float64(0), float64(0)). WillReturnResult(sqlmock.NewResult(0, 1)) @@ -1879,8 +1883,8 @@ func TestWorkspaceCreate_188_NoTemplateNoRuntime_NowMODEL_REQUIRED(t *testing.T) if !bytes.Contains(w.Body.Bytes(), []byte(`"code":"MODEL_REQUIRED"`)) { t.Errorf("bare-body create: expected code=MODEL_REQUIRED in body, got %s", w.Body.String()) } - if !bytes.Contains(w.Body.Bytes(), []byte(`"runtime":"langgraph"`)) { - t.Errorf("bare-body create: expected runtime=\"langgraph\" in 422 body (the gate runs AFTER the langgraph-default assignment so the diagnostic surfaces what runtime WOULD have been used), got %s", w.Body.String()) + if !bytes.Contains(w.Body.Bytes(), []byte(`"runtime":"claude-code"`)) { + t.Errorf("bare-body create: expected runtime=\"claude-code\" in 422 body (the gate runs AFTER the default assignment so the diagnostic surfaces what runtime WOULD have been used), got %s", w.Body.String()) } } diff --git a/workspace-server/internal/provisioner/localbuild.go b/workspace-server/internal/provisioner/localbuild.go index 2eb23ce4..fcda13d5 100644 --- a/workspace-server/internal/provisioner/localbuild.go +++ b/workspace-server/internal/provisioner/localbuild.go @@ -392,7 +392,7 @@ func remoteHeadShaProd(ctx context.Context, opts *LocalBuildOptions, runtime str } defer func() { _ = resp.Body.Close() }() if resp.StatusCode == http.StatusNotFound { - return "", fmt.Errorf("repo not found at %s — runtime %q may not be mirrored to Gitea (only claude-code/hermes/langgraph/autogen today)", apiURL, runtime) + return "", fmt.Errorf("repo not found at %s — runtime %q may not be mirrored to Gitea (expected one of claude-code, codex, hermes, openclaw)", apiURL, runtime) } if resp.StatusCode == http.StatusUnauthorized || resp.StatusCode == http.StatusForbidden { return "", fmt.Errorf("auth failure (%d) at %s — verify MOLECULE_GITEA_TOKEN if private repo", resp.StatusCode, apiURL) diff --git a/workspace-server/internal/provisioner/localbuild_test.go b/workspace-server/internal/provisioner/localbuild_test.go index 293b9c1c..d3aa291c 100644 --- a/workspace-server/internal/provisioner/localbuild_test.go +++ b/workspace-server/internal/provisioner/localbuild_test.go @@ -161,7 +161,7 @@ func TestEnsureLocalImage_GiteaUnreachable(t *testing.T) { opts.remoteHeadSha = func(ctx context.Context, opts *LocalBuildOptions, runtime string) (string, error) { return "", errors.New("dial tcp: no such host") } - _, err := ensureLocalImageWithOpts(context.Background(), "langgraph", opts) + _, err := ensureLocalImageWithOpts(context.Background(), "hermes", opts) if err == nil { t.Fatalf("expected error, got nil") } @@ -259,7 +259,7 @@ func TestEnsureLocalImage_BuildFailure(t *testing.T) { opts.dockerBuild = func(ctx context.Context, opts *LocalBuildOptions, contextDir, tag string) error { return errors.New("Dockerfile syntax error") } - _, err := ensureLocalImageWithOpts(context.Background(), "autogen", opts) + _, err := ensureLocalImageWithOpts(context.Background(), "codex", opts) if err == nil { t.Fatalf("expected error, got nil") } @@ -313,7 +313,7 @@ func TestEnsureLocalImage_ConcurrentSameRuntime(t *testing.T) { for i := 0; i < N; i++ { go func() { defer wg.Done() - _, _ = ensureLocalImageWithOpts(context.Background(), "langgraph", opts) + _, _ = ensureLocalImageWithOpts(context.Background(), "hermes", opts) }() } wg.Wait() diff --git a/workspace-server/internal/provisioner/provisioner.go b/workspace-server/internal/provisioner/provisioner.go index c4318906..49864344 100644 --- a/workspace-server/internal/provisioner/provisioner.go +++ b/workspace-server/internal/provisioner/provisioner.go @@ -38,8 +38,8 @@ var ErrNoBackend = errors.New("provisioner: no backend configured (zero-valued r // ErrUnresolvableRuntime is returned by selectImage when a workspace // names a runtime that has no resolvable image (not in RuntimeImages and // no operator-pinned cfg.Image). RFC internal#483 + security review 4269: -// previously such a request silently fell through to DefaultImage -// (langgraph) — a user asking for crewai would get a langgraph container +// previously such a request silently fell through to DefaultImage — a user +// asking for a removed runtime would get a different container // with no signal. The CTO standing directive // (feedback_platform_must_hardgate_base_contract) is fail-closed: a // named-but-unresolvable runtime must reject with a structured, @@ -68,8 +68,7 @@ var ErrUnresolvableRuntime = errors.New("provisioner: requested runtime has no r // short-circuit pulls entirely if needed. var RuntimeImages = computeRuntimeImages() -// DefaultImage is the fallback workspace Docker image (langgraph is the -// most common runtime). Computed via RegistryPrefix() so the prefix +// DefaultImage is the fallback workspace Docker image. Computed via RegistryPrefix() so the prefix // override applies to the fallback path too. // // NOTE: Every runtime MUST have an entry in knownRuntimes (registry.go). @@ -97,7 +96,7 @@ type WorkspaceConfig struct { PluginsPath string // Host path to plugins directory (mounted at /plugins) WorkspacePath string // Host path to bind-mount as /workspace (if empty, uses Docker named volume) Tier int - Runtime string // "langgraph" (default) or "claude-code", "codex", "ollama", "custom" + Runtime string // "claude-code" (default), "codex", "hermes", "openclaw", etc. InstanceType string // Optional CP EC2 instance type override (SaaS only) DiskGB int32 // Optional CP root volume size override in GiB (SaaS only) Display WorkspaceDisplayConfig @@ -139,9 +138,8 @@ type WorkspaceDisplayConfig struct { // feedback_platform_must_hardgate_base_contract): if the workspace NAMES a // runtime that resolves to no image (not in RuntimeImages, no pinned // cfg.Image), reject with ErrUnresolvableRuntime instead of silently -// substituting DefaultImage. Pre-fix, removing crewai/deepagents/gemini-cli -// from the catalog left those create requests silently provisioning a -// langgraph container — the user asked for crewai and got langgraph with no +// substituting DefaultImage. Pre-fix, removing a runtime from the catalog left +// those create requests silently provisioning a fallback container with no // signal. The error propagates through Start → markProvisionFailed, which // already broadcasts WorkspaceProvisionFailed and records the message. // @@ -707,7 +705,7 @@ func buildContainerEnv(cfg WorkspaceConfig) []string { // /app and set ENV ADAPTER_MODULE=adapter, but molecule-runtime is a // pip console_script entry point so cwd isn't on sys.path automatically. // Setting PYTHONPATH from the provisioner fixes every adapter image - // (claude-code, hermes, langgraph, …) without needing to PR each + // (claude-code, codex, hermes, openclaw, …) without needing to PR each // standalone template repo. Per-template ENV in the Dockerfile can // still override (Dockerfile ENV is overridden by docker -e at runtime). "PYTHONPATH=/app", diff --git a/workspace-server/internal/provisioner/registry.go b/workspace-server/internal/provisioner/registry.go index a6fa5d77..4f1eee25 100644 --- a/workspace-server/internal/provisioner/registry.go +++ b/workspace-server/internal/provisioner/registry.go @@ -18,18 +18,15 @@ const defaultRegistryPrefix = "ghcr.io/molecule-ai" // // Order matters for deterministic test snapshots; keep alphabetical. var knownRuntimes = []string{ - "autogen", "claude-code", "codex", "hermes", - "langgraph", "openclaw", } // defaultRuntime is the fallback when a workspace's config doesn't specify a -// runtime. Picked because LangGraph is the most common in our org templates -// and has the smallest "first impression" cold-start surface. -const defaultRuntime = "langgraph" +// runtime. +const defaultRuntime = "claude-code" // RegistryPrefix returns the registry prefix all workspace-template image // references should use. Defaults to ghcr.io/molecule-ai (the upstream OSS @@ -119,4 +116,3 @@ func computeRuntimeImages() map[string]string { } return out } - diff --git a/workspace-server/internal/provisioner/registry_test.go b/workspace-server/internal/provisioner/registry_test.go index 50802976..39be4c45 100644 --- a/workspace-server/internal/provisioner/registry_test.go +++ b/workspace-server/internal/provisioner/registry_test.go @@ -53,8 +53,8 @@ func TestRuntimeImage_AllKnownRuntimes(t *testing.T) { } } // Pin the count so adding a runtime requires explicit test acknowledgement. - if len(knownRuntimes) != 6 { - t.Errorf("knownRuntimes length = %d, want 6 (autogen, claude-code, codex, hermes, langgraph, openclaw)", len(knownRuntimes)) + if len(knownRuntimes) != 4 { + t.Errorf("knownRuntimes length = %d, want 4 (claude-code, codex, hermes, openclaw)", len(knownRuntimes)) } } diff --git a/workspace-server/internal/registry/healthsweep.go b/workspace-server/internal/registry/healthsweep.go index e290d823..40977d57 100644 --- a/workspace-server/internal/registry/healthsweep.go +++ b/workspace-server/internal/registry/healthsweep.go @@ -37,7 +37,7 @@ func remoteStaleAfter() time.Duration { } // StartHealthSweep periodically checks all "online" workspaces. For -// container-backed runtimes (langgraph, claude-code, …) it calls the +// container-backed runtimes (claude-code, codex, hermes, openclaw) it calls the // Docker API via `checker.IsRunning`. For `runtime='external'` (remote // agents per Phase 30) it checks heartbeat freshness: a heartbeat older // than `REMOTE_LIVENESS_STALE_AFTER` (default 90s) marks the workspace @@ -79,7 +79,7 @@ func sweepOnlineWorkspaces(ctx context.Context, checker ContainerChecker, onOffl // auto-restart would loop forever (provisioner has no template // for either runtime). rows, err := db.DB.QueryContext(ctx, - `SELECT id FROM workspaces WHERE status IN ('online', 'degraded') AND COALESCE(runtime, 'langgraph') NOT IN ('external', 'mock')`) + `SELECT id FROM workspaces WHERE status IN ('online', 'degraded') AND COALESCE(runtime, 'claude-code') NOT IN ('external', 'mock')`) if err != nil { log.Printf("Health sweep: query error: %v", err) return @@ -146,7 +146,7 @@ func sweepStaleRemoteWorkspaces(ctx context.Context, onOffline OfflineHandler) { rows, err := db.DB.QueryContext(ctx, ` SELECT id FROM workspaces WHERE status IN ('online', 'degraded') - AND COALESCE(runtime, 'langgraph') = 'external' + AND COALESCE(runtime, 'claude-code') = 'external' AND COALESCE(last_heartbeat_at, updated_at) < now() - ($1 || ' seconds')::interval `, staleAfterSec) if err != nil { diff --git a/workspace-server/internal/registry/healthsweep_test.go b/workspace-server/internal/registry/healthsweep_test.go index 45718cb9..635cee8e 100644 --- a/workspace-server/internal/registry/healthsweep_test.go +++ b/workspace-server/internal/registry/healthsweep_test.go @@ -163,7 +163,7 @@ func TestSweepStaleRemoteWorkspaces_MarksStaleAwaitingAgent(t *testing.T) { setupTestRedis(t) // Two stale remote workspaces returned by the query - mock.ExpectQuery(`FROM workspaces\s+WHERE status IN \('online', 'degraded'\)\s+AND COALESCE\(runtime, 'langgraph'\) = 'external'\s+AND COALESCE\(last_heartbeat_at, updated_at\) < now\(\) - `). + mock.ExpectQuery(`FROM workspaces\s+WHERE status IN \('online', 'degraded'\)\s+AND COALESCE\(runtime, 'claude-code'\) = 'external'\s+AND COALESCE\(last_heartbeat_at, updated_at\) < now\(\) - `). WillReturnRows(sqlmock.NewRows([]string{"id"}). AddRow("ws-stale-1"). AddRow("ws-stale-2")) @@ -193,7 +193,7 @@ func TestSweepStaleRemoteWorkspaces_NoStaleWorkspaces(t *testing.T) { mock := setupTestDB(t) setupTestRedis(t) - mock.ExpectQuery(`FROM workspaces\s+WHERE status IN \('online', 'degraded'\)\s+AND COALESCE\(runtime, 'langgraph'\) = 'external'`). + mock.ExpectQuery(`FROM workspaces\s+WHERE status IN \('online', 'degraded'\)\s+AND COALESCE\(runtime, 'claude-code'\) = 'external'`). WillReturnRows(sqlmock.NewRows([]string{"id"})) called := 0 @@ -277,7 +277,7 @@ func TestStartHealthSweep_NilCheckerRunsRemoteSweep(t *testing.T) { // The goroutine will tick once every 50ms; we give it 200ms then // cancel. sqlmock will satisfy any number of calls. - mock.ExpectQuery(`FROM workspaces\s+WHERE status IN \('online', 'degraded'\)\s+AND COALESCE\(runtime, 'langgraph'\) = 'external'`). + mock.ExpectQuery(`FROM workspaces\s+WHERE status IN \('online', 'degraded'\)\s+AND COALESCE\(runtime, 'claude-code'\) = 'external'`). WillReturnRows(sqlmock.NewRows([]string{"id"})) ctx, cancel := context.WithCancel(context.Background()) diff --git a/workspace-server/internal/registry/hibernation.go b/workspace-server/internal/registry/hibernation.go index 8d3884da..58e9abc7 100644 --- a/workspace-server/internal/registry/hibernation.go +++ b/workspace-server/internal/registry/hibernation.go @@ -71,7 +71,7 @@ func hibernateIdleWorkspaces(ctx context.Context, onHibernate HibernateHandler) AND hibernation_idle_minutes > 0 AND status IN ('online', 'degraded') AND active_tasks = 0 - AND COALESCE(runtime, 'langgraph') != 'external' + AND COALESCE(runtime, 'claude-code') != 'external' AND last_heartbeat_at IS NOT NULL AND last_heartbeat_at < now() - (hibernation_idle_minutes * INTERVAL '1 minute') `) diff --git a/workspace-server/internal/registry/provisiontimeout.go b/workspace-server/internal/registry/provisiontimeout.go index f5c2638b..684da758 100644 --- a/workspace-server/internal/registry/provisiontimeout.go +++ b/workspace-server/internal/registry/provisiontimeout.go @@ -21,8 +21,8 @@ type ProvisionTimeoutEmitter interface { // DefaultProvisioningTimeout is how long a workspace may sit in // status='provisioning' before the sweeper flips it to 'failed'. -// Default for non-hermes runtimes (claude-code, langgraph, crewai, -// autogen, etc.) which cold-boot in <5 min. The container-launch path +// Default for non-hermes runtimes (claude-code, codex, openclaw, etc.) +// which cold-boot in <5 min. The container-launch path // has its own 3-minute context timeout (provisioner.ProvisionTimeout) // but that only bounds the docker API call — a container that started // but crashes before /registry/register never triggers that path and diff --git a/workspace-server/internal/registry/provisiontimeout_test.go b/workspace-server/internal/registry/provisiontimeout_test.go index 29cc904e..90652eb0 100644 --- a/workspace-server/internal/registry/provisiontimeout_test.go +++ b/workspace-server/internal/registry/provisiontimeout_test.go @@ -336,10 +336,9 @@ func TestProvisioningTimeout_RuntimeAware(t *testing.T) { want time.Duration }{ {"hermes", HermesProvisioningTimeout}, - {"langgraph", DefaultProvisioningTimeout}, {"claude-code", DefaultProvisioningTimeout}, - {"crewai", DefaultProvisioningTimeout}, - {"autogen", DefaultProvisioningTimeout}, + {"codex", DefaultProvisioningTimeout}, + {"openclaw", DefaultProvisioningTimeout}, {"", DefaultProvisioningTimeout}, {"unknown-runtime", DefaultProvisioningTimeout}, } @@ -359,17 +358,17 @@ func TestProvisioningTimeout_RuntimeAware(t *testing.T) { // // Order pinned: // -// 1. PROVISION_TIMEOUT_SECONDS env beats everything (ops debug). -// 2. Manifest lookup beats hermes special-case + default. -// 3. Hermes default applies when lookup returns 0 for hermes. -// 4. DefaultProvisioningTimeout applies when lookup returns 0 for -// anything else. -// 5. Lookup returning 0 for ANY runtime is "no override" — never -// a 0-second timeout (which would kill every workspace instantly). +// 1. PROVISION_TIMEOUT_SECONDS env beats everything (ops debug). +// 2. Manifest lookup beats hermes special-case + default. +// 3. Hermes default applies when lookup returns 0 for hermes. +// 4. DefaultProvisioningTimeout applies when lookup returns 0 for +// anything else. +// 5. Lookup returning 0 for ANY runtime is "no override" — never +// a 0-second timeout (which would kill every workspace instantly). func TestProvisioningTimeout_ManifestOverride(t *testing.T) { manifest := map[string]int{ "claude-code": 900, // 15 min — what an ops manifest bump would set - "langgraph": 1200, + "codex": 1200, "hermes": 2400, // 40 min — manifest can override hermes default too } lookup := func(runtime string) int { return manifest[runtime] } @@ -380,7 +379,7 @@ func TestProvisioningTimeout_ManifestOverride(t *testing.T) { want time.Duration }{ {"manifest override beats default for claude-code", "claude-code", 900 * time.Second}, - {"manifest override applied for langgraph", "langgraph", 1200 * time.Second}, + {"manifest override applied for codex", "codex", 1200 * time.Second}, {"manifest override beats hermes default", "hermes", 2400 * time.Second}, {"unknown runtime + no manifest entry → default", "unknown-runtime", DefaultProvisioningTimeout}, {"empty runtime + no manifest entry → default", "", DefaultProvisioningTimeout}, diff --git a/workspace-server/internal/router/router.go b/workspace-server/internal/router/router.go index c8f5c9b3..ec3754d9 100644 --- a/workspace-server/internal/router/router.go +++ b/workspace-server/internal/router/router.go @@ -568,7 +568,7 @@ func Setup(hub *ws.Hub, broadcaster *events.Broadcaster, prov *provisioner.Provi var runtime string err := db.DB.QueryRowContext( context.Background(), - `SELECT COALESCE(runtime, 'langgraph') FROM workspaces WHERE id = $1`, + `SELECT COALESCE(runtime, 'claude-code') FROM workspaces WHERE id = $1`, workspaceID, ).Scan(&runtime) return runtime, err diff --git a/workspace-server/migrations/011_workspace_runtime.sql b/workspace-server/migrations/011_workspace_runtime.sql index ad88a5c2..7ed440cc 100644 --- a/workspace-server/migrations/011_workspace_runtime.sql +++ b/workspace-server/migrations/011_workspace_runtime.sql @@ -1,2 +1,2 @@ ALTER TABLE workspaces -ADD COLUMN IF NOT EXISTS runtime TEXT DEFAULT 'langgraph'; +ADD COLUMN IF NOT EXISTS runtime TEXT DEFAULT 'claude-code';