Compare commits

..

1 Commits

Author SHA1 Message Date
infra-sre 2bb461a56e fix(mcp): scrub err.Error() from JSON-RPC error messages (OFFSEC-001)
Replace all three err.Error() leaks in mcp.go with constant strings,
consistent with the same fix applied to 22 other files in PRs #1193/1206/1219/#168.

- Call handler (line ~329): "parse error: " + err.Error() → "parse error"
- dispatchRPC params unmarshal (line ~417): "invalid params: " + err.Error()
  → "invalid parameters"
- dispatchRPC tool call (line ~422): err.Error() → "tool call failed"
  + log.Printf server-side for forensics

Routes protected by WorkspaceAuth (C1) and MCPRateLimiter (C2) — this is
defence-in-depth per OFFSEC-001 / #259.

Tests added:
- TestMCPHandler_Call_MalformedJSON_ReturnsConstantParseError
- TestMCPHandler_dispatchRPC_InvalidParams_ReturnsConstantMessage
- TestMCPHandler_dispatchRPC_UnknownTool_ReturnsConstantMessage
- TestMCPHandler_dispatchRPC_InvalidParams_ArrayInsteadOfObject

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-10 10:00:38 +00:00
19 changed files with 80 additions and 515 deletions
@@ -23,7 +23,7 @@ name: publish-workspace-server-image
on:
push:
branches: [main]
branches: [staging, main]
paths:
- 'workspace-server/**'
- 'canvas/**'
@@ -32,9 +32,11 @@ on:
- '.gitea/workflows/publish-workspace-server-image.yml'
workflow_dispatch:
# Serialize per-branch so two rapid main pushes don't race the same
# :staging-latest tag retag. Allow parallel runs as they produce
# different :staging-<sha> tags and last-write-wins on :staging-latest.
# Serialize per-branch so two rapid staging pushes don't race the same
# :staging-latest tag retag. Allow staging and main to run in parallel
# (different GITHUB_REF → different concurrency group) since they
# produce different :staging-<sha> tags and last-write-wins on
# :staging-latest is acceptable across branches.
#
# cancel-in-progress: false → in-flight builds finish; the next push's
# build queues. This avoids a partially-pushed image.
@@ -32,7 +32,7 @@ name: publish-workspace-server-image
on:
push:
branches: [main]
branches: [staging, main]
paths:
- 'workspace-server/**'
- 'canvas/**'
-1
View File
@@ -1 +0,0 @@
staging trigger
@@ -31,25 +31,17 @@ export function extractMessageText(body: Record<string, unknown> | null): string
if (text) return text;
// Response: result.parts[].text or result.parts[].root.text
// Use the first part that has a direct text field; within that part,
// prefer direct text over root.text. Subsequent parts' root.text fields
// are ignored when a direct text exists in an earlier part.
const result = body.result as Record<string, unknown> | undefined;
const rParts = (result?.parts || []) as Array<Record<string, unknown>>;
const firstPartWithText = rParts.find(
(p) => typeof p.text === "string" && (p.text as string) !== ""
);
if (firstPartWithText) {
return firstPartWithText.text as string;
}
// No direct text found; use root.text from the first part (if present).
const firstPart = rParts[0];
if (firstPart) {
const root = firstPart.root as Record<string, unknown> | undefined;
if (typeof root?.text === "string" && root.text !== "") {
return root.text as string;
}
}
const rText = rParts
.map((p) => {
if (p.text) return p.text as string;
const root = p.root as Record<string, unknown> | undefined;
return (root?.text as string) || "";
})
.filter(Boolean)
.join("\n");
if (rText) return rText;
if (typeof body.result === "string") return body.result;
} catch { /* ignore */ }
@@ -3,56 +3,52 @@
* Tests for Spinner component.
*
* Covers: sm/md/lg size classes, aria-hidden, motion-safe animate-spin class.
*
* NOTE: SVG elements use SVGAnimatedString for className (not a plain string),
* so we use getAttribute("class") instead of className for assertions.
*/
import React from "react";
import { render, cleanup } from "@testing-library/react";
import { afterEach, describe, expect, it } from "vitest";
import { render, screen } from "@testing-library/react";
import { describe, expect, it } from "vitest";
import { Spinner } from "../Spinner";
afterEach(cleanup);
function getSvgClass(r: ReturnType<typeof render>): string {
const svg = r.container.querySelector("svg");
if (!svg) throw new Error("No SVG found");
return svg.getAttribute("class") ?? "";
}
describe("Spinner — size variants", () => {
it("renders with sm size class", () => {
const r = render(<Spinner size="sm" />);
expect(getSvgClass(r)).toContain("w-3");
expect(getSvgClass(r)).toContain("h-3");
const { container } = render(<Spinner size="sm" />);
const svg = container.querySelector("svg");
expect(svg).toBeTruthy();
expect(svg?.className).toContain("w-3");
expect(svg?.className).toContain("h-3");
});
it("renders with md size class (default)", () => {
const r = render(<Spinner size="md" />);
expect(getSvgClass(r)).toContain("w-4");
expect(getSvgClass(r)).toContain("h-4");
const { container } = render(<Spinner size="md" />);
const svg = container.querySelector("svg");
expect(svg?.className).toContain("w-4");
expect(svg?.className).toContain("h-4");
});
it("renders with lg size class", () => {
const r = render(<Spinner size="lg" />);
expect(getSvgClass(r)).toContain("w-5");
expect(getSvgClass(r)).toContain("h-5");
const { container } = render(<Spinner size="lg" />);
const svg = container.querySelector("svg");
expect(svg?.className).toContain("w-5");
expect(svg?.className).toContain("h-5");
});
it("defaults to md size when no size prop given", () => {
const r = render(<Spinner />);
expect(getSvgClass(r)).toContain("w-4");
expect(getSvgClass(r)).toContain("h-4");
const { container } = render(<Spinner />);
const svg = container.querySelector("svg");
expect(svg?.className).toContain("w-4");
expect(svg?.className).toContain("h-4");
});
it("has aria-hidden=true so screen readers skip it", () => {
const r = render(<Spinner />);
const svg = r.container.querySelector("svg");
const { container } = render(<Spinner />);
const svg = container.querySelector("svg");
expect(svg?.getAttribute("aria-hidden")).toBe("true");
});
it("includes the motion-safe:animate-spin class for CSS animation", () => {
expect(getSvgClass(render(<Spinner />))).toContain("motion-safe:animate-spin");
const { container } = render(<Spinner />);
const svg = container.querySelector("svg");
expect(svg?.className).toContain("motion-safe:animate-spin");
});
it("renders exactly one SVG element", () => {
@@ -100,14 +100,7 @@ export function toYaml(config: ConfigData): string {
if (!o) return;
lines.push(`${k}:`);
Object.entries(o).forEach(([sk, sv]) => {
if (sv === undefined || sv === null || sv === "") return;
if (Array.isArray(sv)) {
// Nested list block: e.g. required_env: [KEY, SECRET]
lines.push(` ${sk}:`);
sv.forEach((v) => lines.push(` - ${v}`));
} else {
lines.push(` ${sk}: ${sv}`);
}
if (sv !== undefined && sv !== null && sv !== "") lines.push(` ${sk}: ${sv}`);
});
};
@@ -128,7 +121,7 @@ export function toYaml(config: ConfigData): string {
if (config.task_budget && config.task_budget > 0) { simple("task_budget", config.task_budget); }
if (config.prompt_files?.length) { lines.push(""); list("prompt_files", config.prompt_files); }
lines.push(""); list("skills", config.skills);
lines.push(""); list("tools", config.tools);
if (config.tools?.length) { list("tools", config.tools); }
lines.push(""); obj("a2a", config.a2a as unknown as Record<string, unknown>);
lines.push(""); obj("delegation", config.delegation as unknown as Record<string, unknown>);
if (config.sandbox?.backend) { lines.push(""); obj("sandbox", config.sandbox as unknown as Record<string, unknown>); }
-22
View File
@@ -269,28 +269,6 @@ Each workspace exposes an A2A server, builds an Agent Card, and registers with t
But the long-term collaboration model remains direct workspace-to-workspace communication via A2A.
## Known Limitations
### Playwright / browser system libs are not installed
The base `molecule-ai-workspace-runtime` image (`workspace/Dockerfile`) is built on `python:3.11-slim` with Node.js 22, git, and `gh` — about 500 MB. It deliberately **does not** include the system libraries Chromium needs (`libnss3`, `libatk-bridge2.0-0`, `libxkbcommon0`, `libcups2`, `libdrm2`, `libxcomposite1`, `libxdamage1`, `libxrandr2`, `libgbm1`, `libpango-1.0-0`, `libasound2`, etc.). Adding them would inflate the image by ~200250 MB (~40%) for every workspace, even though only frontend / QA workspaces ever launch a browser.
Practical consequences:
- `npx playwright test` (and any other Chromium-driven E2E tooling) **will fail at browser launch** when run from inside an in-container workspace agent.
- The error surface is missing-shared-object messages such as `error while loading shared libraries: libnss3.so` or `Host system is missing dependencies to run browsers`.
- Unit and integration tests (Vitest, Jest, etc.) that don't spawn a real browser are unaffected.
Recommended workflow:
1. **Run E2E in CI**, not in-container. The Gitea Actions self-hosted runner (and the GitHub Actions runner used by mirror repos) has the full Playwright dep set installed and is the supported surface for E2E. Push a branch, let CI run the suite.
2. **Local debugging** of a single failing spec is best done on a developer laptop with `npx playwright install-deps` run once.
3. **In-container iteration** on test logic itself is fine — write specs, lint them, type-check them — just don't expect `playwright test` to actually launch a browser.
If a particular workspace role genuinely needs in-container E2E (a dedicated QA template, for instance), the right place to layer Playwright deps is in a **role-specific adapter template image** that does `FROM molecule-ai-workspace-runtime:<tag>` and adds `RUN npx playwright install-deps`. Open a request against `molecule-ai-workspace-runtime` if you need this template stamped.
Tracking issue: [molecule-ai/molecule-app#7](https://git.moleculesai.app/molecule-ai/molecule-app/issues/7).
## Related Docs
- [Agent Runtime Adapters](./cli-runtime.md)
-1
View File
@@ -44,4 +44,3 @@
{"name": "mock-bigorg", "repo": "molecule-ai/molecule-ai-org-template-mock-bigorg", "ref": "main"}
]
}
// Triggered by Integration Tester at 2026-05-10T08:52Z
+5 -45
View File
@@ -37,50 +37,6 @@ PLUGINS_DIR="${4:?Missing plugins dir}"
EXPECTED=0
CLONED=0
# clone_one_with_retry — clone a single repo, retrying on transient failure.
#
# Why: the publish-workspace-server-image (and harness-replays) CI jobs
# clone the full manifest (~36 repos) serially on a memory-constrained
# Gitea Actions runner. Under host memory pressure the OOM killer
# occasionally SIGKILLs git-remote-https mid-clone:
#
# error: git-remote-https died of signal 9
# fatal: the remote end hung up unexpectedly
#
# (observed in publish-workspace-server-image run 4622 on 2026-05-10 — the
# job died on the 14th of 36 clones, which wedged staging→main). One
# transient SIGKILL / network blip would otherwise fail the whole tenant
# image rebuild. Retrying after a short backoff lets the pressure subside.
# The durable fix is more runner RAM/swap (tracked with Infra-SRE); this
# just stops a single flake from being release-blocking.
#
# Args: <target_dir> <name> <clone_url> <display_url> <ref>
clone_one_with_retry() {
local tdir="$1" name="$2" url="$3" display="$4" ref="$5"
local attempt=1 max_attempts=3 backoff
while : ; do
# A killed attempt can leave a partial directory behind; git clone
# refuses a non-empty target, so wipe it before each try.
rm -rf "$tdir/$name"
if [ "$ref" = "main" ]; then
if git clone --depth=1 -q "$url" "$tdir/$name"; then return 0; fi
else
if git clone --depth=1 -q --branch "$ref" "$url" "$tdir/$name"; then return 0; fi
fi
if [ "$attempt" -ge "$max_attempts" ]; then
echo "::error::clone failed after ${max_attempts} attempts: ${display}" >&2
return 1
fi
backoff=$((attempt * 3)) # 3s, then 6s
echo " ⚠ clone attempt ${attempt}/${max_attempts} failed for ${display} — retrying in ${backoff}s" >&2
sleep "$backoff"
attempt=$((attempt + 1))
done
}
clone_category() {
local category="$1"
local target_dir="$2"
@@ -126,7 +82,11 @@ clone_category() {
fi
echo " cloning $display_url -> $target_dir/$name (ref=$ref)"
clone_one_with_retry "$target_dir" "$name" "$clone_url" "$display_url" "$ref"
if [ "$ref" = "main" ]; then
git clone --depth=1 -q "$clone_url" "$target_dir/$name"
else
git clone --depth=1 -q --branch "$ref" "$clone_url" "$target_dir/$name"
fi
CLONED=$((CLONED + 1))
i=$((i + 1))
done
@@ -717,16 +717,13 @@ func deriveProviderFromModelSlug(model string) string {
func applyRuntimeModelEnv(envVars map[string]string, runtime, model string) {
// Resolution order (priority high → low):
// 1. payload.Model (caller passed the canvas-picked model id verbatim)
// 2. envVars["MOLECULE_MODEL"] (the canonical, unambiguous name)
// 3. envVars["MODEL"] (workspace_secret persisted by /org/import via
// 2. envVars["MODEL"] (workspace_secret persisted by /org/import via
// the persona env file — MODEL=MiniMax-M2.7-highspeed etc.)
// 4. envVars["MODEL_PROVIDER"] (legacy + misleadingly named: it carries
// a *model id*, never the provider — that's LLM_PROVIDER. Historically
// set by canvas Save+Restart's PUT /model; the post-2026-05-08
// persona-env convention sometimes (mis)set it to a provider slug
// ("minimax") or a runtime name ("claude-code"), neither a valid
// model id — see internal#226. Only fires when the better-named
// vars are absent.)
// 3. envVars["MODEL_PROVIDER"] (legacy: this secret was historically a
// *model id* set by canvas Save+Restart's PUT /model; on the
// post-2026-05-08 persona-env convention it's a *provider slug*
// (e.g. "minimax") which is NOT a valid model id, so this fallback
// only fires when MODEL is absent.)
//
// Pre-fix bug: this function unconditionally OVERWROTE envVars["MODEL"]
// with the MODEL_PROVIDER slug (when payload.Model was empty), wiping
@@ -739,9 +736,6 @@ func applyRuntimeModelEnv(envVars map[string]string, runtime, model string) {
// and the workspace template's adapter routed to providers[0]
// (anthropic-oauth) and wedged at SDK initialize. Caught 2026-05-08
// during Phase 4 verification of template-claude-code PR #9.
if model == "" {
model = envVars["MOLECULE_MODEL"]
}
if model == "" {
model = envVars["MODEL"]
}
@@ -752,18 +746,16 @@ func applyRuntimeModelEnv(envVars map[string]string, runtime, model string) {
return
}
// Canonical model env varsmolecule-runtime's workspace/config.py
// resolves the picked model as MOLECULE_MODEL > MODEL > (legacy)
// MODEL_PROVIDER (#280). Export both new names so adapters can read
// either; MODEL stays for backwards compat with everything that
// already reads os.environ["MODEL"] (the claude-code adapter does,
// since #194). Without this, the user's canvas selection is silently
// dropped on every templated provision — confirmed via crash-loop
// diagnosis on 2026-05-02 where MiniMax picks booted with model=sonnet
// (template default) and demanded CLAUDE_CODE_OAUTH_TOKEN. Set these
// FIRST so the per-runtime branches below can layer on additional
// vendor-specific names without fighting over the canonical one.
envVars["MOLECULE_MODEL"] = model
// Universal MODEL env var — every adapter that wants to honour the
// canvas-picked model (instead of its template's default) reads this.
// molecule-runtime's workspace/config.py already falls back to MODEL
// for runtime_config.model (#194). Without this line, the user's
// canvas selection is silently dropped on every templated provision —
// confirmed via crash-loop diagnosis on 2026-05-02 where MiniMax
// picks booted with model=sonnet (template default) and demanded
// CLAUDE_CODE_OAUTH_TOKEN. Set it FIRST so the per-runtime branches
// below can still layer on additional vendor-specific names without
// fighting over the canonical one.
envVars["MODEL"] = model
switch runtime {
@@ -665,62 +665,46 @@ func TestApplyRuntimeModelEnv_SetsUniversalMODELForAllRuntimes(t *testing.T) {
runtime string
model string
modelProviderEnv string
moleculeModelEnv string
wantMODEL string
wantHermesDefault string // empty string = must be unset
}{
{
name: "claude-code: picked model populates MODEL + MOLECULE_MODEL",
name: "claude-code: picked model populates MODEL",
runtime: "claude-code",
model: "MiniMax-M2",
wantMODEL: "MiniMax-M2",
},
{
name: "hermes: picked model populates MODEL, MOLECULE_MODEL, HERMES_DEFAULT_MODEL",
name: "hermes: picked model populates BOTH MODEL and HERMES_DEFAULT_MODEL",
runtime: "hermes",
model: "minimax/MiniMax-M2.7",
wantMODEL: "minimax/MiniMax-M2.7",
wantHermesDefault: "minimax/MiniMax-M2.7",
},
{
name: "langgraph: picked model populates MODEL + MOLECULE_MODEL (no vendor-specific name)",
name: "langgraph: picked model populates MODEL (no vendor-specific name)",
runtime: "langgraph",
model: "anthropic:claude-opus-4-7",
wantMODEL: "anthropic:claude-opus-4-7",
},
{
name: "crewai: picked model populates MODEL + MOLECULE_MODEL (no vendor-specific name)",
name: "crewai: picked model populates MODEL (no vendor-specific name)",
runtime: "crewai",
model: "openai:gpt-4o",
wantMODEL: "openai:gpt-4o",
},
{
name: "empty model + no env fallback: nothing set",
name: "empty model + empty MODEL_PROVIDER fallback: nothing set",
runtime: "claude-code",
model: "",
},
{
name: "empty model + MODEL_PROVIDER fallback hits: MODEL/MOLECULE_MODEL set from secret",
name: "empty model + MODEL_PROVIDER fallback hits: MODEL set from secret",
runtime: "claude-code",
model: "",
modelProviderEnv: "MiniMax-M2",
wantMODEL: "MiniMax-M2",
},
{
name: "empty model + MOLECULE_MODEL env fallback hits (canonical name)",
runtime: "claude-code",
model: "",
moleculeModelEnv: "opus",
wantMODEL: "opus",
},
{
name: "MOLECULE_MODEL beats MODEL_PROVIDER when both set (misnomer guard, internal#226)",
runtime: "claude-code",
model: "",
moleculeModelEnv: "opus",
modelProviderEnv: "claude-code",
wantMODEL: "opus",
},
}
for _, tc := range cases {
@@ -729,18 +713,11 @@ func TestApplyRuntimeModelEnv_SetsUniversalMODELForAllRuntimes(t *testing.T) {
if tc.modelProviderEnv != "" {
envVars["MODEL_PROVIDER"] = tc.modelProviderEnv
}
if tc.moleculeModelEnv != "" {
envVars["MOLECULE_MODEL"] = tc.moleculeModelEnv
}
applyRuntimeModelEnv(envVars, tc.runtime, tc.model)
if got := envVars["MODEL"]; got != tc.wantMODEL {
t.Errorf("MODEL = %q, want %q", got, tc.wantMODEL)
}
// MOLECULE_MODEL (the canonical name) must mirror MODEL exactly.
if got := envVars["MOLECULE_MODEL"]; got != tc.wantMODEL {
t.Errorf("MOLECULE_MODEL = %q, want %q", got, tc.wantMODEL)
}
if got := envVars["HERMES_DEFAULT_MODEL"]; got != tc.wantHermesDefault {
t.Errorf("HERMES_DEFAULT_MODEL = %q, want %q", got, tc.wantHermesDefault)
}
-17
View File
@@ -179,23 +179,6 @@ def parse(data: Any) -> Variant:
)
return Malformed(raw=data)
# Push-mode queue envelope — returned when a push-mode workspace
# (one with a public URL) is at capacity. The platform queues the
# request and returns {"queued": true, "message": "...", "queue_id": "..."}.
# Unlike the poll-mode envelope (status=queued + delivery_mode=poll),
# this shape has no delivery_mode key — it's distinguishable by
# data.get("queued") is True alone. Checked before poll-mode so the
# two cases are mutually exclusive even if a buggy server sends both.
if data.get("queued") is True:
method_raw = data.get(_KEY_METHOD)
method = str(method_raw) if method_raw is not None else "message/send"
logger.info(
"a2a_response.parse: queued for busy push-mode peer (method=%s, queue_id=%s)",
method,
data.get("queue_id", "?"),
)
return Queued(method=method)
# Poll-queued envelope. Both keys must be present — the workspace
# server sets them together; if only one is present the body is
# ambiguous and we route to Malformed for visibility.
-24
View File
@@ -204,20 +204,6 @@ async def tool_delegate_task(
if not workspace_id or not task:
return "Error: workspace_id and task are required"
# Self-delegation guard: delegating to your own workspace ID deadlocks —
# the sending turn holds _run_lock while the receive handler waits for the
# same lock, the request 30s-times-out, and the whole cycle is wasted.
# Reject immediately with an actionable message. (effective_src mirrors the
# `src or WORKSPACE_ID` resolution used below for routing.)
effective_src = source_workspace_id or _peer_to_source.get(workspace_id) or WORKSPACE_ID
if workspace_id and workspace_id == effective_src:
return (
"Error: cannot delegate_task to your own workspace — self-delegation "
"deadlocks _run_lock (your sending turn holds it, the receive handler "
"waits for it, the request times out). There is no peer who is also you: "
"just do the work yourself, or call commit_memory / send_message_to_user directly."
)
# Auto-route: if source not specified, look up which registered
# workspace last saw this peer (populated by tool_list_peers). Falls
# back to the legacy WORKSPACE_ID for single-workspace operators.
@@ -337,16 +323,6 @@ async def tool_delegate_task_async(
src = source_workspace_id or _peer_to_source.get(workspace_id) or WORKSPACE_ID
# Self-delegation guard: even on the async path, queuing a task to your own
# workspace just makes you re-process your own dispatch — never useful, and
# on the sync path it deadlocks (see tool_delegate_task). Reject early.
if workspace_id and workspace_id == src:
return (
"Error: cannot delegate_task_async to your own workspace — there is no "
"peer who is also you. Do the work yourself, or call commit_memory / "
"send_message_to_user directly."
)
# Idempotency key: SHA-256 of (source, target, task) so that a
# restarted agent firing the same delegation gets the same key and
# the platform returns the existing delegation_id instead of
+3 -28
View File
@@ -66,35 +66,10 @@ async def delegate_task(workspace_id: str, task: str) -> str:
)
data = a2a_resp.json()
if "result" in data:
result = data["result"]
parts = result.get("parts", []) if isinstance(result, dict) else []
if parts and isinstance(parts[0], dict):
return parts[0].get("text", "(no text)")
# Empty parts list (e.g. {"parts": []}) should return str(result),
# not "(no text)" — preserves pre-fix behavior (#279 regression fix).
if isinstance(result, dict) and result.get("parts") == []:
return str(result)
return str(result) if isinstance(result, str) else "(no text)"
parts = data["result"].get("parts", [])
return parts[0].get("text", "(no text)") if parts else str(data["result"])
elif "error" in data:
err = data["error"]
# Handle both string-form errors ("error": "some string")
# and object-form errors ("error": {"message": "...", "code": ...}).
msg = ""
if isinstance(err, dict):
msg = err.get("message", "")
elif isinstance(err, str):
msg = err
else:
msg = str(err)
return f"Error: {msg}"
msg = ""
if isinstance(err, dict):
msg = err.get("message", "")
elif isinstance(err, str):
msg = err
else:
msg = str(err)
return f"Error: {msg}"
return f"Error: {data['error'].get('message', str(data['error']))}"
return str(data)
except Exception as e:
return f"Error sending A2A message: {e}"
+8 -54
View File
@@ -1,6 +1,5 @@
"""Load workspace configuration from config.yaml."""
import logging
import os
from dataclasses import dataclass, field
from pathlib import Path
@@ -8,8 +7,6 @@ from typing import Optional
import yaml
logger = logging.getLogger(__name__)
@dataclass
class RBACConfig:
@@ -384,47 +381,6 @@ def _derive_provider_from_model(model: str) -> str:
return ""
_legacy_model_provider_warned = False
def _picked_model_from_env(default: str) -> str:
"""Resolve the operator-picked model id from env; newest name wins.
Precedence: ``MOLECULE_MODEL`` (canonical, unambiguous) → ``MODEL`` →
``MODEL_PROVIDER`` (legacy) → ``default`` (the YAML ``model:`` field).
``MODEL_PROVIDER`` is **misleadingly named**: it carries the picked
*model id*, never the LLM provider — the provider lives in
``LLM_PROVIDER`` / the YAML ``provider:`` field. The legacy path stays
so canvas Save+Restart, the workspace-server secret-mint path, and
persona env files that set it keep working, but if it's the *only* one
set we log a deprecation once — the misnomer keeps biting (e.g. setting
``MODEL_PROVIDER=claude-code`` expecting it to select the claude-code
*runtime* — it doesn't, ``runtime:`` does — after which the claude CLI
404s on ``--model claude-code``). Set ``MODEL``/``MOLECULE_MODEL`` to
an id from ``runtime_config.models[].id`` (e.g. ``opus``, ``sonnet``,
``claude-opus-4-7``, ``MiniMax-M2.7-highspeed``) instead.
"""
global _legacy_model_provider_warned
for name in ("MOLECULE_MODEL", "MODEL"):
v = (os.environ.get(name) or "").strip()
if v:
return v
legacy = (os.environ.get("MODEL_PROVIDER") or "").strip()
if legacy:
if not _legacy_model_provider_warned:
logger.warning(
"MODEL_PROVIDER=%r is deprecated and misleadingly named — it "
"sets the picked *model id*, not the LLM provider (that's "
"LLM_PROVIDER / the YAML `provider:` field). Set MODEL (or "
"MOLECULE_MODEL) to an id from runtime_config.models instead.",
legacy,
)
_legacy_model_provider_warned = True
return legacy
return default
_EVENT_LOG_VALID_BACKENDS = {"memory", "disabled"}
@@ -489,10 +445,8 @@ def load_config(config_path: Optional[str] = None) -> WorkspaceConfig:
with open(config_file) as f:
raw = yaml.safe_load(f) or {}
# Operator-picked model from env (canvas / secret-mint / persona env),
# falling back to the YAML `model:` field. See _picked_model_from_env for
# the precedence (MOLECULE_MODEL > MODEL > legacy MODEL_PROVIDER).
model = _picked_model_from_env(raw.get("model", "anthropic:claude-opus-4-7"))
# Override model from env if provided
model = os.environ.get("MODEL_PROVIDER", raw.get("model", "anthropic:claude-opus-4-7"))
# Resolve top-level provider with this priority chain:
# 1. ``LLM_PROVIDER`` env var (canvas Save+Restart sets this so the
@@ -563,9 +517,8 @@ def load_config(config_path: Optional[str] = None) -> WorkspaceConfig:
required_env=runtime_raw.get("required_env", []),
timeout=runtime_raw.get("timeout", 0),
# Picked-model precedence (priority order):
# 1. operator-picked model from env — MOLECULE_MODEL > MODEL >
# (legacy) MODEL_PROVIDER, plumbed via canvas Save+Restart,
# workspace-server's secret-mint path, or the universal
# 1. MODEL_PROVIDER env var — canvas-picked model, plumbed via
# workspace-server's secret-mint path or the universal
# MODEL/MODEL_PROVIDER env from applyRuntimeModelEnv. The
# operator's canvas selection MUST win over the template's
# baked-in default; previously the template's
@@ -574,12 +527,13 @@ def load_config(config_path: Optional[str] = None) -> WorkspaceConfig:
# surfaced 2026-05-02 during E2E).
# 2. runtime_raw.model — explicit YAML override in the
# template's runtime_config.
# 3. top-level `model` (already env-resolved above). This is
# the SaaS restart case (CP regenerates a minimal
# 3. top-level `model` already honors MODEL_PROVIDER (line
# 359) but only when YAML lacks a top-level `model:`. This
# is the SaaS restart case (CP regenerates a minimal
# config.yaml on every boot, dropping runtime_config.model).
# Centralising here means EVERY adapter gets the override for
# free — no per-adapter env-reading code required.
model=_picked_model_from_env(runtime_raw.get("model") or model),
model=os.environ.get("MODEL_PROVIDER") or runtime_raw.get("model") or model,
# Same fallback shape as ``model`` above: an explicit
# ``runtime_config.provider`` wins; otherwise inherit the
# top-level resolved provider so adapters see a single
-16
View File
@@ -51,22 +51,6 @@ class AdaptorSource:
def _load_module_from_path(module_name: str, path: Path):
"""Import a Python file by absolute path. Returns the module or None on failure."""
# Ensure the plugins_registry package and its submodules are importable in the
# fresh module namespace created by module_from_spec(). Plugin adapters
# (molecule-skill-*/adapters/*.py) use "from plugins_registry.builtins import ..."
# which requires plugins_registry and its submodules to already be in sys.modules.
# We import and register them before exec_module so the plugin's own
# from ... import statements resolve correctly.
import sys
import plugins_registry
sys.modules.setdefault("plugins_registry", plugins_registry)
for _sub in ("builtins", "protocol", "raw_drop"):
try:
sub = importlib.import_module(f"plugins_registry.{_sub}")
sys.modules.setdefault(f"plugins_registry.{_sub}", sub)
except Exception:
# Submodule may not exist in all versions; skip if absent.
pass
spec = importlib.util.spec_from_file_location(module_name, path)
if spec is None or spec.loader is None:
return None
@@ -1,60 +0,0 @@
"""Tests for _load_module_from_path sys.modules injection fix (issue #296).
Verifies that plugin adapters using "from plugins_registry.builtins import ..."
can be loaded via _load_module_from_path() without ModuleNotFoundError.
"""
import sys
import tempfile
import os
from pathlib import Path
# Ensure the plugins_registry package is importable
import plugins_registry
from plugins_registry import _load_module_from_path
def test_load_adapter_with_plugins_registry_import():
"""Plugin adapter using 'from plugins_registry.builtins import ...' loads cleanly."""
# Write a temp adapter file that does the exact import from the bug report.
with tempfile.NamedTemporaryFile(
mode="w", suffix=".py", delete=False, dir=tempfile.gettempdir()
) as f:
f.write("from plugins_registry.builtins import AgentskillsAdaptor as Adaptor\n")
f.write("assert Adaptor is not None\n")
adapter_path = Path(f.name)
try:
module = _load_module_from_path("test_adapter", adapter_path)
assert module is not None, "module should load without error"
assert hasattr(module, "Adaptor"), "module should expose Adaptor"
finally:
os.unlink(adapter_path)
def test_load_adapter_with_full_plugins_registry_import():
"""Plugin adapter using 'from plugins_registry import ...' loads cleanly."""
with tempfile.NamedTemporaryFile(
mode="w", suffix=".py", delete=False, dir=tempfile.gettempdir()
) as f:
f.write("from plugins_registry import InstallContext, resolve\n")
f.write("from plugins_registry.protocol import PluginAdaptor\n")
f.write("assert InstallContext is not None\n")
f.write("assert resolve is not None\n")
f.write("assert PluginAdaptor is not None\n")
adapter_path = Path(f.name)
try:
module = _load_module_from_path("test_adapter_full", adapter_path)
assert module is not None, "module should load without error"
assert hasattr(module, "InstallContext"), "module should expose InstallContext"
assert hasattr(module, "resolve"), "module should expose resolve"
assert hasattr(module, "PluginAdaptor"), "module should expose PluginAdaptor"
finally:
os.unlink(adapter_path)
if __name__ == "__main__":
test_load_adapter_with_plugins_registry_import()
test_load_adapter_with_full_plugins_registry_import()
print("ALL TESTS PASS")
@@ -127,51 +127,3 @@ class TestPollBudgetEnvOverride:
# numeric and >= the documented floor (180s healthsweep budget).
assert isinstance(a2a_tools_delegation._SYNC_POLL_BUDGET_S, float)
assert a2a_tools_delegation._SYNC_POLL_BUDGET_S >= 180.0
# ============== Self-delegation guard ==============
class TestSelfDelegationGuard:
"""delegate_task / delegate_task_async to your own workspace ID must be
rejected immediately (it deadlocks _run_lock on the sync path — the
sending turn holds the lock, the receive handler waits for it, the
request 30s-times-out). A genuinely different target must NOT be
short-circuited by the guard."""
def _fresh(self, monkeypatch, own_id):
import a2a_tools_delegation as d
monkeypatch.setattr(d, "WORKSPACE_ID", own_id)
monkeypatch.setattr(d, "_peer_to_source", {}, raising=False)
return d
def test_delegate_task_rejects_self(self, monkeypatch):
import asyncio
d = self._fresh(monkeypatch, "ws-self-abc")
out = asyncio.run(d.tool_delegate_task("ws-self-abc", "do a thing"))
assert "your own workspace" in out.lower()
def test_delegate_task_rejects_self_via_explicit_source(self, monkeypatch):
import asyncio
d = self._fresh(monkeypatch, "ws-other-default")
out = asyncio.run(
d.tool_delegate_task("ws-X", "do a thing", source_workspace_id="ws-X")
)
assert "your own workspace" in out.lower()
def test_delegate_task_async_rejects_self(self, monkeypatch):
import asyncio
d = self._fresh(monkeypatch, "ws-self-abc")
out = asyncio.run(d.tool_delegate_task_async("ws-self-abc", "do a thing"))
assert "your own workspace" in out.lower()
def test_delegate_task_allows_different_target(self, monkeypatch):
"""Guard passes through for a real peer — it reaches discover_peer
(stubbed to 'not found' here) rather than returning the self message."""
import asyncio
d = self._fresh(monkeypatch, "ws-self-abc")
async def _no_peer(*_a, **_kw):
return None
monkeypatch.setattr(d, "discover_peer", _no_peer)
out = asyncio.run(d.tool_delegate_task("ws-OTHER-xyz", "do a thing"))
assert "your own workspace" not in out.lower()
assert "not found" in out.lower()
-87
View File
@@ -1,12 +1,10 @@
"""Tests for config.py — workspace configuration loading."""
import logging
import os
import pytest
import yaml
import config
from config import (
A2AConfig,
ComplianceConfig,
@@ -19,17 +17,6 @@ from config import (
)
@pytest.fixture(autouse=True)
def _clean_model_env(monkeypatch):
"""Every test starts with no MODEL* env vars set and the legacy-name
deprecation latch reset, so picked-model resolution is deterministic
regardless of the CI shell environment or test ordering."""
for name in ("MOLECULE_MODEL", "MODEL", "MODEL_PROVIDER"):
monkeypatch.delenv(name, raising=False)
monkeypatch.setattr(config, "_legacy_model_provider_warned", False, raising=False)
yield
def test_load_config_basic(tmp_path):
"""load_config reads a YAML file and returns a WorkspaceConfig."""
config_yaml = tmp_path / "config.yaml"
@@ -177,80 +164,6 @@ def test_runtime_config_model_env_wins_over_explicit_yaml(tmp_path, monkeypatch)
assert cfg.runtime_config.model == "minimax/MiniMax-M2.7"
def test_picked_model_MODEL_env_wins_over_legacy_MODEL_PROVIDER(tmp_path, monkeypatch):
"""MODEL (the correctly-named env var) beats the legacy MODEL_PROVIDER.
Regression for the 2026-05-10 dev-team incident: lead persona env files
set MODEL=claude-opus-4-7 (the intended model) AND MODEL_PROVIDER=claude-code
(mistaking MODEL_PROVIDER for "the runtime"). The old code read
MODEL_PROVIDER → the claude CLI got `--model claude-code` → 404. MODEL must
win so the operator's intended value lands at both levels.
"""
monkeypatch.setenv("MODEL", "opus")
monkeypatch.setenv("MODEL_PROVIDER", "claude-code")
config_yaml = tmp_path / "config.yaml"
config_yaml.write_text(
yaml.dump({"model": "anthropic:claude-opus-4-7",
"runtime_config": {"model": "sonnet"}})
)
cfg = load_config(str(tmp_path))
assert cfg.model == "opus"
assert cfg.runtime_config.model == "opus"
def test_picked_model_MOLECULE_MODEL_wins_over_MODEL(tmp_path, monkeypatch):
"""MOLECULE_MODEL (the unambiguous canonical name) wins over MODEL, which
in turn wins over the legacy MODEL_PROVIDER."""
monkeypatch.setenv("MOLECULE_MODEL", "claude-opus-4-7")
monkeypatch.setenv("MODEL", "sonnet")
monkeypatch.setenv("MODEL_PROVIDER", "claude-code")
config_yaml = tmp_path / "config.yaml"
config_yaml.write_text(yaml.dump({"model": "openai:gpt-4o"}))
cfg = load_config(str(tmp_path))
assert cfg.model == "claude-opus-4-7"
assert cfg.runtime_config.model == "claude-opus-4-7"
def test_picked_model_MODEL_env_overrides_yaml(tmp_path, monkeypatch):
"""MODEL env overrides the YAML `model:` field — same role MODEL_PROVIDER
had, now under the correctly-named var."""
config_yaml = tmp_path / "config.yaml"
config_yaml.write_text(yaml.dump({"model": "openai:gpt-4o"}))
monkeypatch.setenv("MODEL", "google:gemini-2.0-flash")
cfg = load_config(str(tmp_path))
assert cfg.model == "google:gemini-2.0-flash"
def test_legacy_MODEL_PROVIDER_still_honored_but_warns(tmp_path, monkeypatch, caplog):
"""MODEL_PROVIDER alone still resolves the model (back-compat: canvas
Save+Restart, secret-mint, existing persona env files keep working) but
logs a one-time deprecation pointing at the misnomer."""
config_yaml = tmp_path / "config.yaml"
config_yaml.write_text(yaml.dump({"model": "openai:gpt-4o"}))
monkeypatch.setenv("MODEL_PROVIDER", "MiniMax-M2.7-highspeed")
with caplog.at_level(logging.WARNING):
cfg = load_config(str(tmp_path))
assert cfg.model == "MiniMax-M2.7-highspeed"
assert cfg.runtime_config.model == "MiniMax-M2.7-highspeed"
assert any(
"MODEL_PROVIDER" in r.getMessage() and "deprecated" in r.getMessage()
for r in caplog.records
)
def test_no_deprecation_when_MODEL_is_set(tmp_path, monkeypatch, caplog):
"""When MODEL is set, MODEL_PROVIDER is ignored entirely and NOT warned
about — a workspace that already does it right shouldn't get nagged."""
config_yaml = tmp_path / "config.yaml"
config_yaml.write_text(yaml.dump({"model": "openai:gpt-4o"}))
monkeypatch.setenv("MODEL", "opus")
monkeypatch.setenv("MODEL_PROVIDER", "claude-code")
with caplog.at_level(logging.WARNING):
cfg = load_config(str(tmp_path))
assert cfg.model == "opus"
assert not any("MODEL_PROVIDER" in r.getMessage() for r in caplog.records)
def test_runtime_config_model_picks_up_env_via_top_level(tmp_path, monkeypatch):
"""End-to-end path the canvas Save+Restart relies on: user picks
a model → workspace_secrets.MODEL_PROVIDER updated → CP user-data