diff --git a/.github/workflows/block-internal-paths.yml b/.github/workflows/block-internal-paths.yml index da4679b2..6cd35b0e 100644 --- a/.github/workflows/block-internal-paths.yml +++ b/.github/workflows/block-internal-paths.yml @@ -15,6 +15,11 @@ on: types: [opened, synchronize, reopened] push: branches: [main, staging] + # Required for GitHub merge queue: the queue's pre-merge CI run on + # `gh-readonly-queue/...` refs needs this check to fire so the queue + # gets a real result instead of stalling forever AWAITING_CHECKS. + merge_group: + types: [checks_requested] jobs: check: @@ -25,6 +30,13 @@ jobs: with: fetch-depth: 2 # need previous commit to diff against on push events + # For pull_request events the diff base is github.event.pull_request.base.sha, + # which may be many commits behind HEAD and therefore absent from the + # shallow clone above. Fetch it explicitly (depth=1 keeps it fast). + - name: Fetch PR base SHA (pull_request events only) + if: github.event_name == 'pull_request' + run: git fetch --depth=1 origin ${{ github.event.pull_request.base.sha }} + - name: Refuse if forbidden paths appear run: | # Paths that must NEVER live in the public monorepo. Add to this diff --git a/.github/workflows/check-merge-group-trigger.yml b/.github/workflows/check-merge-group-trigger.yml new file mode 100644 index 00000000..77f4c7b3 --- /dev/null +++ b/.github/workflows/check-merge-group-trigger.yml @@ -0,0 +1,123 @@ +name: Check merge_group trigger on required workflows + +# Pre-merge guard against the deadlock pattern where a workflow whose +# check is in `required_status_checks` lacks a `merge_group:` trigger. +# Without it, GitHub merge queue stalls forever in AWAITING_CHECKS +# because the required check can't fire on `gh-readonly-queue/...` refs. +# +# This workflow: +# 1. Lists required status checks on the branch protection rule for `staging` +# 2. For each required check, finds the workflow that produces it (by job +# name match) +# 3. Fails if any such workflow lacks `merge_group:` in its triggers +# +# Reasoning for staging-only: main has its own CI gating model (PR review), +# but staging is what the merge queue runs on, so it's the trigger that +# matters. + +on: + pull_request: + paths: + - '.github/workflows/**.yml' + - '.github/workflows/**.yaml' + push: + branches: [staging, main] + paths: + - '.github/workflows/**.yml' + - '.github/workflows/**.yaml' + # Self-listen on merge_group so the linter passes its own queue run. + merge_group: + types: [checks_requested] + +jobs: + check: + name: Required workflows have merge_group trigger + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - uses: actions/checkout@v4 + - name: Verify merge_group trigger on required-check workflows + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO: ${{ github.repository }} + shell: bash + run: | + set -euo pipefail + + # Branch we care about — the one merge queue runs on. + BRANCH=staging + + # Pull the list of required status check contexts. If the branch + # has no protection or no required checks, exit clean — nothing + # to lint. + REQUIRED=$(gh api "repos/${REPO}/branches/${BRANCH}/protection/required_status_checks" \ + --jq '.contexts[]' 2>/dev/null || true) + if [ -z "$REQUIRED" ]; then + echo "No required status checks on ${BRANCH} — nothing to verify." + exit 0 + fi + + echo "Required checks on ${BRANCH}:" + echo "${REQUIRED}" | sed 's/^/ - /' + echo + + # Build a map: workflow file -> set of job names declared in it. + # We use yq if available, otherwise grep the `name:` lines under + # `jobs:`. Stick with grep for portability — runner image always + # has it; yq isn't in the default image as of 2026-04. + declare -A workflow_jobs + shopt -s nullglob + for wf in .github/workflows/*.yml .github/workflows/*.yaml; do + [ -f "$wf" ] || continue + # Extract the workflow name (the `name:` at file root). + wf_name=$(awk '/^name:[[:space:]]/ {sub(/^name:[[:space:]]+/,""); gsub(/^"|"$/,""); print; exit}' "$wf") + # Extract job step names from the `jobs:` block. A job step is: + # - id under `jobs:` (key with 2-space indent followed by colon) + # - the `name:` field inside that job (4-space indent) + # We collect both because required_status_checks contexts can + # match either, depending on how the workflow was authored. + jobs_block=$(awk '/^jobs:/{flag=1; next} flag' "$wf") + job_names=$(echo "$jobs_block" | awk '/^[[:space:]]{4}name:[[:space:]]/ {sub(/^[[:space:]]+name:[[:space:]]+/,""); gsub(/^["'"'"']|["'"'"']$/,""); print}') + workflow_jobs["$wf"]="${wf_name}"$'\n'"${job_names}" + done + + # For each required check, find the workflow that produces it. + # Then verify that workflow lists merge_group as a trigger. + FAILED=0 + while IFS= read -r check; do + [ -z "$check" ] && continue + owning_wf="" + for wf in "${!workflow_jobs[@]}"; do + if echo "${workflow_jobs[$wf]}" | grep -Fxq "$check"; then + owning_wf="$wf" + break + fi + done + + if [ -z "$owning_wf" ]; then + echo "::warning::Required check '${check}' has no matching workflow in this repo. Skipping (may be from an external app)." + continue + fi + + # Does the workflow's trigger list include merge_group? + # Match either bare `merge_group:` line or merge_group with + # subsequent indented config (types: [checks_requested]). + if grep -qE '^[[:space:]]*merge_group:' "$owning_wf"; then + echo "OK: '${check}' (in $owning_wf) — has merge_group trigger" + else + echo "::error file=${owning_wf}::Required check '${check}' is produced by ${owning_wf}, but the workflow does not declare a 'merge_group:' trigger. With merge queue enabled on ${BRANCH}, this will deadlock the queue (every PR sits AWAITING_CHECKS forever). Add this to the workflow's 'on:' block:" + echo "::error file=${owning_wf}:: merge_group:" + echo "::error file=${owning_wf}:: types: [checks_requested]" + FAILED=1 + fi + done <<< "$REQUIRED" + + if [ "$FAILED" -ne 0 ]; then + echo + echo "::error::Block. See errors above. Reference: $(grep -l 'reference_merge_queue' /dev/null 2>/dev/null || echo 'memory: reference_merge_queue_enablement.md')." + exit 1 + fi + + echo + echo "All required workflows on ${BRANCH} declare merge_group triggers." diff --git a/.github/workflows/e2e-staging-canvas.yml b/.github/workflows/e2e-staging-canvas.yml index c90794bd..dbdab154 100644 --- a/.github/workflows/e2e-staging-canvas.yml +++ b/.github/workflows/e2e-staging-canvas.yml @@ -5,18 +5,21 @@ name: E2E Staging Canvas (Playwright) # e2e-staging-saas.yml (which tests the API shape) by exercising the # actual browser + canvas bundle against live staging. # -# Triggers: push to main or PR touching canvas sources + this workflow, +# Triggers: push to main/staging or PR touching canvas sources + this workflow, # manual dispatch, and weekly cron to catch browser/runtime drift even # when canvas is quiet. +# Added staging to push/pull_request branches so the auto-promote gate +# check (--event push --branch staging) can see a completed run for this +# workflow — mirrors what PR #1891 does for e2e-api.yml. on: push: - branches: [main] + branches: [main, staging] paths: - 'canvas/**' - '.github/workflows/e2e-staging-canvas.yml' pull_request: - branches: [main] + branches: [main, staging] paths: - 'canvas/**' - '.github/workflows/e2e-staging-canvas.yml' diff --git a/.github/workflows/e2e-staging-saas.yml b/.github/workflows/e2e-staging-saas.yml index c1e2b878..8ef1c950 100644 --- a/.github/workflows/e2e-staging-saas.yml +++ b/.github/workflows/e2e-staging-saas.yml @@ -5,7 +5,7 @@ name: E2E Staging SaaS (full lifecycle) # HMA memory → activity → peers), then tears down and asserts leak-free. # # Why a separate workflow (not folded into ci.yml): -# - The run takes ~20 min (EC2 boot + cloudflared DNS + provision sweeps + +# - The run takes ~25-35 min (EC2 boot + cloudflared DNS + provision sweeps + # agent bootstrap), way too slow for every PR. # - Needs its own concurrency group so two pushes don't fight over the # same staging org slug prefix. @@ -68,7 +68,7 @@ jobs: e2e-staging-saas: name: E2E Staging SaaS runs-on: ubuntu-latest - timeout-minutes: 30 + timeout-minutes: 45 permissions: contents: read diff --git a/.github/workflows/publish-workspace-server-image.yml b/.github/workflows/publish-workspace-server-image.yml index df0c3098..c7f3127f 100644 --- a/.github/workflows/publish-workspace-server-image.yml +++ b/.github/workflows/publish-workspace-server-image.yml @@ -73,7 +73,20 @@ jobs: # - canary-verify.yml runs smoke tests against them # - On green → canary-verify retags :staging- → :latest # - On red → :latest stays on the prior good digest, prod is safe - - name: Build & push platform image to GHCR (staging- only) + # Every push of :staging- also retags the same digest as + # :staging-latest so staging CP (which pins TENANT_IMAGE at + # :staging-latest) picks up new builds automatically — no more manual + # Railway env-var edits. Prod's :latest retag still happens in + # canary-verify.yml after the canary fleet greenlights this digest; + # :staging-latest is strictly the "most recent main build," not a + # canary-verified promotion. + # + # Before this, TENANT_IMAGE on Railway staging was pinned to a static + # :staging- and drifted months behind (2026-04-24 incident: + # canary tenant ran :staging-a14cf86, 10 days stale, which lacked + # applyRuntimeModelEnv and caused every E2E to route hermes+openai + # through openrouter → 401). See issue filed with this PR. + - name: Build & push platform image to GHCR (staging- + staging-latest) uses: docker/build-push-action@v6 with: context: . @@ -82,6 +95,7 @@ jobs: push: true tags: | ${{ env.IMAGE_NAME }}:staging-${{ steps.tags.outputs.sha }} + ${{ env.IMAGE_NAME }}:staging-latest cache-from: type=gha cache-to: type=gha,mode=max labels: | @@ -89,7 +103,7 @@ jobs: org.opencontainers.image.revision=${{ github.sha }} org.opencontainers.image.description=Molecule AI platform (Go API server) — pending canary verify - - name: Build & push tenant image to GHCR (staging- only) + - name: Build & push tenant image to GHCR (staging- + staging-latest) uses: docker/build-push-action@v6 with: context: . @@ -98,6 +112,7 @@ jobs: push: true tags: | ${{ env.TENANT_IMAGE_NAME }}:staging-${{ steps.tags.outputs.sha }} + ${{ env.TENANT_IMAGE_NAME }}:staging-latest cache-from: type=gha cache-to: type=gha,mode=max # Canvas uses same-origin fetches. The tenant Go platform diff --git a/canvas/Dockerfile b/canvas/Dockerfile index 2fb7c92a..e834b7a5 100644 --- a/canvas/Dockerfile +++ b/canvas/Dockerfile @@ -1,4 +1,4 @@ -FROM node:20-alpine AS builder +FROM node:22-alpine AS builder WORKDIR /app COPY package.json package-lock.json* ./ RUN npm install @@ -11,7 +11,7 @@ ENV NEXT_PUBLIC_WS_URL=$NEXT_PUBLIC_WS_URL ENV NEXT_PUBLIC_ADMIN_TOKEN=$NEXT_PUBLIC_ADMIN_TOKEN RUN npm run build -FROM node:20-alpine +FROM node:22-alpine WORKDIR /app COPY --from=builder /app/.next/standalone ./ COPY --from=builder /app/.next/static ./.next/static diff --git a/canvas/e2e/staging-setup.ts b/canvas/e2e/staging-setup.ts index 598fb877..7147f4ea 100644 --- a/canvas/e2e/staging-setup.ts +++ b/canvas/e2e/staging-setup.ts @@ -26,8 +26,13 @@ const CP_URL = process.env.MOLECULE_CP_URL || "https://staging-api.moleculesai.a const ADMIN_TOKEN = process.env.MOLECULE_ADMIN_TOKEN; const STAGING = process.env.CANVAS_E2E_STAGING === "1"; -const PROVISION_TIMEOUT_MS = 15 * 60 * 1000; -const WORKSPACE_ONLINE_TIMEOUT_MS = 10 * 60 * 1000; +// Tenant cold boot on staging regularly takes 12-15 min when the +// workspace-server Docker image isn't already cached on the AMI. Raised +// to 20 min to match tests/e2e/test_staging_full_saas.sh (PR #1930) +// after repeated "tenant provision: timed out after 900s" flakes +// were blocking staging→main syncs on 2026-04-24. +const PROVISION_TIMEOUT_MS = 20 * 60 * 1000; +const WORKSPACE_ONLINE_TIMEOUT_MS = 20 * 60 * 1000; const TLS_TIMEOUT_MS = 3 * 60 * 1000; async function jsonFetch( diff --git a/canvas/src/app/blog/2026-04-20-chrome-devtools-mcp/page.mdx b/canvas/src/app/blog/2026-04-20-chrome-devtools-mcp/page.mdx new file mode 100644 index 00000000..f4ec240e --- /dev/null +++ b/canvas/src/app/blog/2026-04-20-chrome-devtools-mcp/page.mdx @@ -0,0 +1,240 @@ +--- +title: "Give Your AI Agent Browser Superpowers: Chrome DevTools MCP Integration" +date: "2026-04-20" +canonical: "https://docs.molecule.ai/blog/chrome-devtools-mcp" +og_title: "Give Your AI Agent Browser Superpowers with Chrome DevTools MCP" +og_description: "Chrome DevTools MCP brings AI agent browser control to Molecule AI. Every browser action is audit-attributed via org API keys. MCP browser automation with governance built in." +og_image: "/blog/chrome-devtools-mcp/chrome-devtools-mcp-social-card.png" +twitter_card: "summary_large_image" +author: "Molecule AI" +keywords: + - "AI agent browser control" + - "MCP browser automation" + - "browser automation AI agents" + - "browser automation governance" + - "Chrome DevTools MCP" + - "MCP governance layer" + - "AI agent web UI automation" +--- + +import { Callout } from '@/components/blog/Callout' +import { CodeBlock } from '@/components/blog/CodeBlock' + +# Give Your AI Agent Browser Superpowers: Chrome DevTools MCP Integration + +Every AI agent platform eventually gets asked the same question: "Can it interact with a web interface?" The answer is usually some variant of "sort of — give it your credentials and hope for the best." That's not a real answer. It's a trust fall. + +Chrome DevTools MCP changes this. It gives your AI agent a structured, governed interface to a real Chrome browser session — with full **MCP browser automation** capability and an audit trail that actually answers the question: "which agent touched what, and what did it do?" + +This post covers what Chrome DevTools MCP is, how Molecule AI's governance layer makes it enterprise-safe, and how to put it to work in your agent fleet. + +--- + +## What is Chrome DevTools MCP? + +Chrome DevTools MCP is an integration between the [MCP (Model Context Protocol)](https://modelcontextprotocol.io) and Google Chrome's DevTools Protocol. MCP is a standardized interface layer that lets AI agents connect to external tools with consistent tooling, authentication, and telemetry. The DevTools Protocol is Chrome's native debugging interface — the same interface your browser's developer tools use to inspect pages, capture network traffic, and control the browser. + +When you connect an AI agent to Chrome DevTools via MCP, you get: + +- **Full CDP access** — navigate, click, type, screenshot, evaluate JavaScript, read network logs, intercept requests, read cookies and local storage +- **MCP protocol layer** — structured JSON-RPC instead of raw CDP, consistent tool naming, type-safe parameters +- **Molecule AI governance layer** — org API key attribution, audit logging, session scoping, instant revocation + +The third item is what separates this from "use Puppeteer with an API key." It's the difference between browser automation AI agents and browser automation AI agents with a compliance story. + +--- + +## The Browser Problem: Trust Falls and Black Boxes + +When most teams give an AI agent browser access, the workflow looks like this: + +1. Agent receives a task ("find our competitors' pricing pages") +2. Agent uses browser credentials to log into Chrome +3. Agent navigates, reads, screenshots, and reports +4. Nobody knows exactly what the agent did, which session it used, or whether credentials were exposed + +This is a trust fall, not a governance model. The agent *can* do the task. But you have no audit trail if something goes wrong. No way to revoke access if the agent's behavior becomes unexpected. No attribution if you need to trace a call back to a specific integration. + +The **MCP governance layer** in Molecule AI addresses all three: + +- Every browser action is logged with the org API key prefix that initiated it +- Chrome sessions are token-scoped — Agent A's session is never Agent B's +- Revocation is one API call — the key stops working, the session closes, no redeploy required + +--- + +## How MCP Browser Automation Works in Molecule AI + +The integration uses Chrome's CDP over a WebSocket connection managed by the MCP server. Molecule AI's MCP server exposes a structured set of tools that map to CDP commands. Your agent calls these tools like any other MCP tool — the same interface whether you're automating Chrome, reading memory, or querying the platform API. + +Here's the sequence: + +1. **Workspace starts with a Chrome session attached** — the session is scoped to a specific Chrome profile or fresh browser context, isolated from other agents +2. **Agent calls MCP tools** — `cdp_navigate`, `cdp_click`, `cdp_evaluate`, `cdp_screenshot`, and others are available as structured tools with type-safe parameters +3. **Every call is audit-attributed** — the org API key prefix (e.g., `mole_a1b2`) is logged with the tool name, parameters, and result for every CDP call +4. **Session is revocable at any time** — revoke the org API key and the agent loses Chrome access immediately + +### AI Agent Browser Control: What You Can Do + +**Navigation and interaction:** +- `cdp_navigate` — navigate to any URL (supports `data:` and `about:` URLs via browser UI) +- `cdp_click` — click a DOM element by selector +- `cdp_type` — type text into a focused element +- `cdp_hover` — hover over a DOM element +- `cdp_scroll` — scroll an element or the page + +**Inspection and debugging:** +- `cdp_screenshot` — capture a full-page or viewport screenshot +- `cdp_evaluate` — execute JavaScript in the page context +- `cdp_get_cookies` / `cdp_set_cookies` — read and write cookies for authenticated sessions +- `cdp_get_local_storage` / `cdp_set_local_storage` — read and write localStorage + +**Network and performance:** +- `cdp_get_requests` — capture and filter network requests (XHR, fetch, WS) +- `cdp_block_urls` — block specific URL patterns to simulate adblocked environments +- `cdp_set_throttle` — throttle network conditions (3G, LTE, offline) + +--- + +## Browser Automation AI Agents: Use Cases That Actually Ship + +The Chrome DevTools MCP integration is most useful in workflows where browser state is the source of truth — and where audit attribution matters. + +### Automated Lighthouse audits on every PR + +A research agent runs a Lighthouse audit against every pull request in your repo. It navigates to the preview URL, captures the performance score, flags regressions below your threshold, and reports to the PM agent. Every audit run is logged with the org API key — your observability team can trace which agent ran which audit and when. + +```bash +# Agent calls cdp_navigate to the PR preview URL +# Agent calls cdp_evaluate to run Lighthouse inline +# Agent calls cdp_screenshot to capture the score +# Agent delegates results to PM workspace +``` + +### Visual regression detection + +An agent maintains a baseline set of screenshots for your key user flows. On every code change, it navigates to each flow, captures screenshots, and diffs against the baseline. Drift beyond your threshold opens a ticket automatically. The governance layer means your QA team can review the full history of which screenshots were captured, when, and by which agent. + +### Auth scraping + +An agent reads authenticated browser state from an existing Chrome session — cookies, localStorage, session tokens — and uses that state to authenticate API calls that would otherwise require separate credential management. The session is scoped; the credentials never leave the browser context. + +--- + +## MCP Governance Layer: Why It Matters + +The MCP protocol gives you tool connectivity. The governance layer is what makes it enterprise-ready. + +### Per-action audit logging + +Every CDP call your agent makes generates an audit log entry. The log includes: + +- **Org API key prefix** — which integration made the call (e.g., `mole_a1b2`) +- **Tool name and parameters** — `cdp_navigate(url=https://...)` +- **Result or error** — success, timeout, or CDP error code +- **Timestamp and workspace ID** — for timeline reconstruction + +This is the audit trail your security team will ask for in the next compliance review. It exists because Molecule AI's MCP server generates it — not because you built a custom logging pipeline. + +### Token-scoped Chrome sessions + +Chrome sessions are isolated per org API key. When you create an org API key for a specific integration (`lighthouse-reporter`), that key's Chrome session is separate from every other key's session. No credential cross-contamination — Agent A cannot read Agent B's authenticated state because their sessions are isolated at the MCP tool layer. + +### Instant revocation without redeployment + +If you need to revoke access — the integration is compromised, the agent behavior is unexpected, the contractor relationship ended — you revoke the org API key: + +```bash +curl -X DELETE https://platform.moleculesai.app/org/tokens/ \ + -H "Authorization: Bearer " +``` + +The key stops working immediately. The Chrome session is closed. The agent loses browser access before the next heartbeat. No redeploy, no container restart, no waiting for DNS cache expiration. + +--- + +## Setting Up Chrome DevTools MCP + +Chrome DevTools MCP requires a Chrome instance running with the remote debugging port enabled, and a `chromedp` or equivalent CDP client connected through Molecule AI's MCP server. + +### Step 1: Enable Chrome remote debugging + +Start Chrome with the `--remote-debugging-port=9222` flag: + +```bash +# macOS +/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome \ + --remote-debugging-port=9222 \ + --user-data-dir=/tmp/chrome-debug + +# Linux +google-chrome --remote-debugging-port=9222 --user-data-dir=/tmp/chrome-debug +``` + +### Step 2: Configure Molecule AI + +In your workspace config, add the Chrome DevTools MCP server URL: + +```yaml +# config.yaml +mcpServers: + - name: chrome-devtools + url: "http://localhost:9222" # CDP WebSocket endpoint + transport: cdp +``` + +### Step 3: Verify the connection + +Your agent can now call CDP tools. Test with a simple navigation: + +``` +Agent: navigate to https://example.com and screenshot the page +``` + +The audit log should show `cdp_navigate` and `cdp_screenshot` entries attributed to the workspace's org API key prefix. + +--- + +## What the Security Review Looks Like + +When your security team asks "what does this integration actually do?", here's the answer: + +**What it can do:** +- Navigate to any URL (with org API key attribution on every navigation) +- Read and write browser state (cookies, localStorage, session tokens) +- Screenshot pages and DOM elements +- Execute JavaScript in the page context + +**What it can't do (by default):** +- Access the host machine beyond the Chrome sandbox +- Read files outside the browser context +- Exfiltrate session tokens across session boundaries + +**What revocation looks like:** +- Revoke org API key → immediate session close +- No redeploy, no agent restart +- Audit trail shows every action taken before revocation + +--- + +## Browser Automation Governance: The Bigger Picture + +Chrome DevTools MCP is one piece of Molecule AI's broader MCP governance story. MCP is a general-purpose protocol — it connects agents to any tool that speaks CDP, stdio, or HTTP. The governance layer applies uniformly: every MCP call gets the same treatment — org API key attribution, audit logging, instant revocation. + +This means you can add new MCP integrations — databases, APIs, code execution environments — with the same governance posture. The MCP protocol is the connectivity layer. Molecule AI's MCP governance layer is the control plane. + +If you're evaluating AI agent platforms for browser automation governance, the question to ask is not "can it control a browser?" It's "can I audit every action, attribute every call, and revoke access in one step?" Chrome DevTools MCP with Molecule AI's MCP governance layer is the answer to that question. + +--- + +## Get Started + +Chrome DevTools MCP is available on all Molecule AI deployments running Phase 30 or later. + +- [MCP Server Setup Guide](/docs/guides/mcp-server-setup) — configure MCP tools in your workspace +- [Org API Keys: Audit Attribution Setup](/blog/org-scoped-api-keys) — set up org API keys with attribution +- [A2A Protocol Reference](/docs/api-protocol/a2a-protocol) — how agents delegate browser tasks to each other + + +Chrome DevTools MCP requires Chrome running with the remote debugging port enabled. CDP access is scoped per org API key — multiple agents can share Chrome sessions only if intentionally scoped that way via key design. + \ No newline at end of file diff --git a/canvas/src/components/AuditTrailPanel.tsx b/canvas/src/components/AuditTrailPanel.tsx index f7056dbe..b38b8fad 100644 --- a/canvas/src/components/AuditTrailPanel.tsx +++ b/canvas/src/components/AuditTrailPanel.tsx @@ -138,6 +138,7 @@ export function AuditTrailPanel({ workspaceId }: Props) {
{FILTERS.map((f) => (
)} )} {step !== "done" && ( + + + + {/* Role */} + {data.role && ( +
{data.role}
+ )} + + {/* Skills */} + {skills.length > 0 && ( +
+ {skills.slice(0, 3).map((skill) => ( + + {skill} + + ))} + {skills.length > 3 && ( + +{skills.length - 3} + )} +
+ )} + + {/* Status + active tasks row */} +
+ {data.status !== "online" ? ( + + {statusCfg.label} + + ) :
} + {data.activeTasks > 0 && ( +
+
+ + {data.activeTasks} + +
+ )} +
+ + {/* Current task banner for sub-agents */} + {data.currentTask && ( + +
+
+ {data.currentTask} +
+ + )} + + {/* Recursive sub-children rendered inside this card */} + {hasSubChildren && depth < MAX_NESTING_DEPTH && ( +
+
Team
+
= 2 ? "grid grid-cols-2 gap-1" : "space-y-1"}> + {subChildren.map((sub) => ( + + ))} +
+
+ )} +
+
+ ); +} function getSkillNames(agentCard: Record | null): string[] { if (!agentCard) return []; diff --git a/canvas/src/components/__tests__/AuthGate.test.tsx b/canvas/src/components/__tests__/AuthGate.test.tsx index 656a7701..633edf83 100644 --- a/canvas/src/components/__tests__/AuthGate.test.tsx +++ b/canvas/src/components/__tests__/AuthGate.test.tsx @@ -105,10 +105,64 @@ describe("AuthGate — authenticated state", () => { }); }); +describe("AuthGate — /cp/auth/* skip guard (redirect loop regression)", () => { + it("renders children without calling fetchSession or redirect when pathname starts with /cp/auth/", async () => { + mockGetTenantSlug.mockReturnValue("acme"); + mockFetchSession.mockResolvedValue(null); + + // Simulate being on the login page + Object.defineProperty(window, "location", { + writable: true, + value: { ...window.location, pathname: "/cp/auth/login" }, + }); + + let result: ReturnType; + await act(async () => { + result = render( + +
Protected content
+
+ ); + }); + + // Children should render — AuthGate skips session fetch for auth paths + expect(result!.getByTestId("child")).toBeTruthy(); + expect(mockFetchSession).not.toHaveBeenCalled(); + expect(mockRedirectToLogin).not.toHaveBeenCalled(); + }); + + it("renders children without calling redirect for /cp/auth/signup path", async () => { + mockGetTenantSlug.mockReturnValue("acme"); + mockFetchSession.mockResolvedValue(null); + + Object.defineProperty(window, "location", { + writable: true, + value: { ...window.location, pathname: "/cp/auth/signup" }, + }); + + let result: ReturnType; + await act(async () => { + result = render( + +
Protected content
+
+ ); + }); + + expect(result!.getByTestId("child")).toBeTruthy(); + expect(mockRedirectToLogin).not.toHaveBeenCalled(); + }); +}); + describe("AuthGate — anonymous / redirect state", () => { it("calls redirectToLogin when session fetch returns null", async () => { mockGetTenantSlug.mockReturnValue("acme"); mockFetchSession.mockResolvedValue(null); + // Ensure pathname is NOT on /cp/auth/* so the redirect guard fires + Object.defineProperty(window, "location", { + writable: true, + value: { ...window.location, pathname: "/dashboard" }, + }); await act(async () => { render( diff --git a/canvas/src/components/__tests__/Canvas.a11y.test.tsx b/canvas/src/components/__tests__/Canvas.a11y.test.tsx index a8231eb3..341a2c7a 100644 --- a/canvas/src/components/__tests__/Canvas.a11y.test.tsx +++ b/canvas/src/components/__tests__/Canvas.a11y.test.tsx @@ -72,6 +72,7 @@ const mockStoreState = { selectedNodeIds: new Set(), clearSelection: vi.fn(), toggleNodeSelection: vi.fn(), + deletingIds: new Set(), }; vi.mock("@/store/canvas", () => ({ diff --git a/canvas/src/components/__tests__/Canvas.pan-to-node.test.tsx b/canvas/src/components/__tests__/Canvas.pan-to-node.test.tsx index 77ac6518..76d9be78 100644 --- a/canvas/src/components/__tests__/Canvas.pan-to-node.test.tsx +++ b/canvas/src/components/__tests__/Canvas.pan-to-node.test.tsx @@ -16,7 +16,9 @@ afterEach(() => { // ── Shared fitView spy — must be set up before vi.mock hoisting ────────────── const mockFitView = vi.fn(); const mockFitBounds = vi.fn(); -const mockGetIntersectingNodes = vi.fn(() => []); +const mockGetIntersectingNodes = vi.fn( + (): Array<{ id: string; position: { x: number; y: number } }> => [], +); vi.mock("@xyflow/react", () => { const ReactFlow = ({ @@ -83,6 +85,12 @@ const mockStoreState = { selectedNodeIds: new Set(), clearSelection: vi.fn(), toggleNodeSelection: vi.fn(), + // Cascade-delete / deploy animation state (added in the multilevel- + // layout-UX bundle). Canvas.tsx reads deletingIds.size to decide + // whether to apply the "locked during delete" class on each node; + // an empty Set mirrors the idle canvas and doesn't interact with + // any pan/fit behaviour under test here. + deletingIds: new Set(), }; vi.mock("@/store/canvas", () => ({ diff --git a/canvas/src/components/__tests__/ProvisioningTimeout.test.tsx b/canvas/src/components/__tests__/ProvisioningTimeout.test.tsx index f1c5b150..2424ea49 100644 --- a/canvas/src/components/__tests__/ProvisioningTimeout.test.tsx +++ b/canvas/src/components/__tests__/ProvisioningTimeout.test.tsx @@ -8,6 +8,12 @@ global.fetch = vi.fn(() => import { useCanvasStore } from "../../store/canvas"; import type { WorkspaceData } from "../../store/socket"; import { DEFAULT_PROVISION_TIMEOUT_MS } from "../ProvisioningTimeout"; +import { + DEFAULT_RUNTIME_PROFILE, + RUNTIME_PROFILES, + getRuntimeProfile, + provisionTimeoutForRuntime, +} from "@/lib/runtimeProfiles"; // Helper to build a WorkspaceData object function makeWS(overrides: Partial & { id: string }): WorkspaceData { @@ -184,4 +190,102 @@ describe("ProvisioningTimeout", () => { .nodes.filter((n) => n.data.status === "provisioning"); expect(stillProvisioning).toHaveLength(2); }); + + // ── Runtime-aware timeout regression tests (2026-04-24 outage) ──────────── + // Prior to this, a hermes workspace consistently false-alarmed at 2 min + // into its 8-13 min cold boot, pushing users to retry something that + // would have come online on its own. The runtime-aware override keeps + // the 2-min floor for fast docker runtimes while giving hermes its + // honest 12-min budget. + + describe("runtime profile resolution (@/lib/runtimeProfiles)", () => { + describe("provisionTimeoutForRuntime", () => { + it("returns the default for unknown/missing runtimes", () => { + expect(provisionTimeoutForRuntime(undefined)).toBe( + DEFAULT_RUNTIME_PROFILE.provisionTimeoutMs, + ); + expect(provisionTimeoutForRuntime("")).toBe( + DEFAULT_RUNTIME_PROFILE.provisionTimeoutMs, + ); + expect(provisionTimeoutForRuntime("some-future-runtime")).toBe( + DEFAULT_RUNTIME_PROFILE.provisionTimeoutMs, + ); + }); + + it("returns default for known-fast runtimes (not in profile map)", () => { + // If someone ever adds one of these to RUNTIME_PROFILES with a + // slower value, this test catches the unintended regression. + expect(provisionTimeoutForRuntime("claude-code")).toBe( + DEFAULT_RUNTIME_PROFILE.provisionTimeoutMs, + ); + expect(provisionTimeoutForRuntime("langgraph")).toBe( + DEFAULT_RUNTIME_PROFILE.provisionTimeoutMs, + ); + expect(provisionTimeoutForRuntime("crewai")).toBe( + DEFAULT_RUNTIME_PROFILE.provisionTimeoutMs, + ); + }); + + it("returns hermes override when runtime = hermes", () => { + expect(provisionTimeoutForRuntime("hermes")).toBe( + RUNTIME_PROFILES.hermes?.provisionTimeoutMs, + ); + expect(provisionTimeoutForRuntime("hermes")).toBeGreaterThanOrEqual( + DEFAULT_RUNTIME_PROFILE.provisionTimeoutMs * 5, + ); + }); + + it("server-side workspace override wins over runtime profile", () => { + // The resolution order is: overrides → profile → default. + // An operator-tunable per-workspace number on the backend + // (e.g. via a template manifest field) should beat the canvas + // runtime map. + expect( + provisionTimeoutForRuntime("hermes", { + provisionTimeoutMs: 60_000, + }), + ).toBe(60_000); + expect( + provisionTimeoutForRuntime("some-unknown", { + provisionTimeoutMs: 300_000, + }), + ).toBe(300_000); + }); + }); + + describe("getRuntimeProfile", () => { + it("returns a structural profile with required fields", () => { + const profile = getRuntimeProfile("hermes"); + expect(profile.provisionTimeoutMs).toBeTypeOf("number"); + expect(profile.provisionTimeoutMs).toBeGreaterThan(0); + }); + + it("default profile is a valid superset of every override", () => { + // Every entry in RUNTIME_PROFILES must provide fields the + // default does — otherwise consumers could get undefined where + // they expected a number. This test enforces that contract so + // future entries can't accidentally drop fields. + for (const [runtime, profile] of Object.entries(RUNTIME_PROFILES)) { + const resolved = getRuntimeProfile(runtime); + expect( + resolved.provisionTimeoutMs, + `runtime=${runtime} must resolve to a number`, + ).toBeTypeOf("number"); + expect(resolved.provisionTimeoutMs).toBeGreaterThan(0); + // Profile's explicit value should be used iff present. + if (profile.provisionTimeoutMs !== undefined) { + expect(resolved.provisionTimeoutMs).toBe(profile.provisionTimeoutMs); + } + } + }); + }); + + describe("DEFAULT_PROVISION_TIMEOUT_MS backward-compat export", () => { + it("still exports the same default for legacy importers", () => { + expect(DEFAULT_PROVISION_TIMEOUT_MS).toBe( + DEFAULT_RUNTIME_PROFILE.provisionTimeoutMs, + ); + }); + }); + }); }); diff --git a/canvas/src/components/__tests__/tabs.a11y.test.tsx b/canvas/src/components/__tests__/tabs.a11y.test.tsx index 91f2c370..a7000917 100644 --- a/canvas/src/components/__tests__/tabs.a11y.test.tsx +++ b/canvas/src/components/__tests__/tabs.a11y.test.tsx @@ -183,7 +183,31 @@ describe("ChannelsTab — htmlFor/id label associations (WCAG 1.3.1)", () => { beforeEach(() => { mockApiGet.mockImplementation((url: string) => { if (url.includes("/channels/adapters")) { - return Promise.resolve([{ type: "telegram", display_name: "Telegram" }]); + // Mirror the real GET /channels/adapters shape — schema-driven form + // relies on config_schema arriving from the adapter. A bare + // {type, display_name} mock renders an empty form and every + // getByLabelText below fails. + return Promise.resolve([ + { + type: "telegram", + display_name: "Telegram", + config_schema: [ + { + key: "bot_token", + label: "Bot Token", + type: "password", + required: true, + sensitive: true, + }, + { + key: "chat_id", + label: "Chat IDs", + type: "text", + required: true, + }, + ], + }, + ]); } return Promise.resolve([]); }); diff --git a/canvas/src/components/settings/OrgTokensTab.tsx b/canvas/src/components/settings/OrgTokensTab.tsx index ea270bac..bfce1576 100644 --- a/canvas/src/components/settings/OrgTokensTab.tsx +++ b/canvas/src/components/settings/OrgTokensTab.tsx @@ -125,6 +125,7 @@ export function OrgTokensTab() { onChange={(e) => setNameInput(e.target.value)} placeholder="Label (e.g. zapier, my-ci)" maxLength={100} + aria-label="Organization API key label" className="flex-1 text-[11px] bg-zinc-900/60 border border-zinc-700/50 rounded px-2 py-1.5 text-zinc-200 placeholder-zinc-600" /> - diff --git a/canvas/src/components/tabs/ActivityTab.tsx b/canvas/src/components/tabs/ActivityTab.tsx index 74f0d781..fc857842 100644 --- a/canvas/src/components/tabs/ActivityTab.tsx +++ b/canvas/src/components/tabs/ActivityTab.tsx @@ -186,7 +186,7 @@ function ActivityRow({ : "bg-zinc-800/60 border-zinc-700/40" }`} > - + + {/* Render one input per schema field. Fallback path: if the + backend didn't return a schema (older platform version) show + a single bot_token + chat_id pair to preserve the old UX. */} + {currentSchema.length === 0 ? ( +
+ Platform exposes no config schema — upgrade the platform to pick up first-class support.
- {discoveredChats.length > 0 && ( -
- {discoveredChats.map((chat) => ( - - ))} -
- )} - {(discoveredChats.length === 0 || showManualInput) && ( - setFormChatId(e.target.value)} - placeholder="-100123456789, -100987654321" - className="w-full text-xs bg-zinc-900 border border-zinc-700 rounded px-2 py-1.5 text-zinc-300 placeholder-zinc-600" + ) : ( + currentSchema.map((field) => ( + setFieldValue(field.key, v)} + // Detect Chats button lives next to the chat_id input on + // Telegram only (the only platform with getUpdates). + renderExtras={ + field.key === "chat_id" && SUPPORTS_DETECT_CHATS.has(formType) + ? () => ( + <> +
+ +
+ {discoveredChats.length > 0 && ( +
+ {discoveredChats.map((chat) => ( + + ))} + +
+ )} + + ) + : undefined + } /> - )} -

- {discoveredChats.length > 0 ? ( - <> - Chats: {formChatId || "(none selected)"} - {" · "} - - - ) : ( - "Click Detect Chats after adding the bot to groups or sending /start in DMs." - )} -

-
+ )) + )} +
{formError && ( @@ -343,7 +378,7 @@ export function ChannelsTab({ workspaceId }: Props) {

No channels connected

- Connect Telegram, Slack, or Discord to chat with this agent from social platforms. + Connect Telegram, Slack, Discord, or Lark / Feishu to chat with this agent from social platforms.

)} @@ -364,7 +399,7 @@ export function ChannelsTab({ workspaceId }: Props) { {ch.channel_type.charAt(0).toUpperCase() + ch.channel_type.slice(1)} - {ch.config.chat_id} + {ch.config.chat_id || ch.config.channel_id || ""}
@@ -415,3 +450,53 @@ export function ChannelsTab({ workspaceId }: Props) {
); } + +// SchemaField renders one ConfigField as a label + input. Kept inline in +// this file so the ChannelsTab stays self-contained; promote to its own +// module if another tab ever needs it. +function SchemaField({ + field, + value, + onChange, + renderExtras, +}: { + field: ConfigField; + value: string; + onChange: (v: string) => void; + renderExtras?: () => React.ReactNode; +}) { + const inputId = useId(); + const common = + "w-full text-xs bg-zinc-900 border border-zinc-700 rounded px-2 py-1.5 text-zinc-300 placeholder-zinc-600"; + return ( +
+ + {field.type === "textarea" ? ( +