Compare commits
66 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 1eb1327ad5 | |||
| a407c8d079 | |||
| 53efcb5c46 | |||
| ea3bae5068 | |||
| 774a8c2a6a | |||
| cb660fc0b4 | |||
| 446b8c78fd | |||
| df972a85e2 | |||
| e45033e15c | |||
| 418db083ff | |||
| b611b1a9bf | |||
| 5fce77aac9 | |||
| 257a61672b | |||
| db6f5b2e93 | |||
| acecb16d22 | |||
| 82bc28a098 | |||
| 947cc730ba | |||
| 1ee864d523 | |||
| f84f9a5572 | |||
| f82a980a79 | |||
| 64b7ecfb70 | |||
| cf7b587f16 | |||
| ffbd1a7ff0 | |||
| feb2b8cfb8 | |||
| 4bee6cb4a7 | |||
| a44a110d60 | |||
| 02917decc6 | |||
| 1b543d8582 | |||
| 3f15c1448d | |||
| 0359912d06 | |||
| 2cf7d006a9 | |||
| c99b0e3601 | |||
| 4414c92a87 | |||
| efa60621f3 | |||
| 367bc1f7fc | |||
| c2c6501a67 | |||
| bbb445b956 | |||
| 3269e93216 | |||
| 442f79a987 | |||
| 03aa69f46f | |||
| 8546502ab8 | |||
| f5c2882acb | |||
| 3dd7108cb4 | |||
| add37f35b0 | |||
| 73871e7ade | |||
| 930f8753a9 | |||
| eacb8183c3 | |||
| 7bc52017ed | |||
| 753e0f569d | |||
| 2d0d070040 | |||
| 1e783ff6a2 | |||
| 924dfa5598 | |||
| 3ab690c273 | |||
| 866a71777f | |||
| 11b0646b37 | |||
| 3165b98cc8 | |||
| 71c68e44f2 | |||
| 7cfec2d61f | |||
| 585b3d6ed0 | |||
| 9deb8e9ea6 | |||
| 69391595f3 | |||
| 46606801c6 | |||
| cd671e1263 | |||
| 51f74e9d8a | |||
| 6211d27bc7 | |||
| 42b16b33fb |
@@ -208,6 +208,61 @@ def _raise_for_redeploy_result(status: int, body: dict, slugs: list[str]) -> Non
|
||||
)
|
||||
|
||||
|
||||
def rollout_stragglers(enumerated: list[str], results: list[dict]) -> list[str]:
|
||||
"""Return every enumerated tenant NOT proven on the target build.
|
||||
|
||||
A straggler is any tenant the rollout was supposed to cover that the
|
||||
CP could not verify is running the target image tag — whether it
|
||||
errored, was skipped, or SSM-succeeded onto the wrong image
|
||||
(internal#724). CP marks each per-tenant result row with
|
||||
``verified_on_target`` (the REDEPLOY_RUNNING_IMAGE docker-inspect
|
||||
proof). A tenant enumerated for the rollout but absent from the
|
||||
result set (no batch ever ran it) is also a straggler — that is the
|
||||
exact agents-team silent-skip class.
|
||||
|
||||
Backward-compat: an OLDER CP that doesn't emit ``verified_on_target``
|
||||
yet returns rows without the key. Treat a missing key as verified so
|
||||
this surfacing degrades to the previous (ok-based) behavior against an
|
||||
un-upgraded CP, rather than failing every deploy spuriously. Once the
|
||||
CP fix is deployed the key is always present and real stragglers are
|
||||
caught.
|
||||
"""
|
||||
|
||||
verified: set[str] = set()
|
||||
for row in results:
|
||||
if str(row.get("ssm_status") or "") == "DryRun":
|
||||
continue
|
||||
slug = str(row.get("slug") or "").strip()
|
||||
if not slug:
|
||||
continue
|
||||
# Missing key (old CP) => assume verified; present key is authoritative.
|
||||
if "verified_on_target" not in row or row.get("verified_on_target"):
|
||||
verified.add(slug)
|
||||
return sorted(s for s in dict.fromkeys(enumerated) if s not in verified)
|
||||
|
||||
|
||||
def assert_full_coverage(enumerated: list[str], aggregate: dict, dry_run: bool) -> None:
|
||||
"""Fail the rollout if any enumerated tenant is not on the target build.
|
||||
|
||||
This is the no-silent-skip gate (internal#724). A dry run proves
|
||||
nothing landed, so coverage is not asserted for it.
|
||||
"""
|
||||
|
||||
if dry_run:
|
||||
return
|
||||
stragglers = rollout_stragglers(enumerated, aggregate.get("results") or [])
|
||||
if stragglers:
|
||||
msg = (
|
||||
f"incomplete rollout: {len(stragglers)} tenant(s) not verified on target "
|
||||
f"after redeploy-fleet: {', '.join(stragglers)} "
|
||||
f"(enumerated {len(set(enumerated))})"
|
||||
)
|
||||
aggregate["ok"] = False
|
||||
aggregate["error"] = msg
|
||||
aggregate["stragglers"] = stragglers
|
||||
raise RolloutFailed(msg, aggregate)
|
||||
|
||||
|
||||
def execute_scoped_rollout(
|
||||
plan: dict,
|
||||
token: str,
|
||||
@@ -254,6 +309,14 @@ def execute_scoped_rollout(
|
||||
aggregate["error"] = str(exc)
|
||||
raise RolloutFailed(str(exc), aggregate) from exc
|
||||
|
||||
# No-silent-skip coverage gate (internal#724): every enumerated tenant
|
||||
# must be PROVEN on the target build. A per-tenant HTTP-200/ok response
|
||||
# is not proof — a tenant that SSM-succeeded but stayed on the old tag,
|
||||
# or one enumerated but never batched, is a straggler. Surfacing it as
|
||||
# a RolloutFailed makes the deploy step exit non-zero instead of
|
||||
# silently reporting success (the exact agents-team failure mode).
|
||||
assert_full_coverage(all_slugs, aggregate, dry_run)
|
||||
|
||||
return aggregate
|
||||
|
||||
|
||||
|
||||
@@ -355,3 +355,134 @@ def test_rollout_from_plan_file_writes_partial_response_on_failure(tmp_path):
|
||||
assert response_path.read_text(encoding="utf-8").strip()
|
||||
assert '"ok": false' in response_path.read_text(encoding="utf-8")
|
||||
assert '"slug": "hongming"' in response_path.read_text(encoding="utf-8")
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# No-silent-skip coverage gate (internal#724)
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_rollout_stragglers_flags_tenant_not_on_target():
|
||||
# b SSM-succeeded but its container is on the old tag → straggler.
|
||||
stragglers = prod.rollout_stragglers(
|
||||
["a", "b", "c"],
|
||||
[
|
||||
{"slug": "a", "verified_on_target": True},
|
||||
{"slug": "b", "verified_on_target": False, "running_image": "platform-tenant:staging-old"},
|
||||
{"slug": "c", "verified_on_target": True},
|
||||
],
|
||||
)
|
||||
assert stragglers == ["b"]
|
||||
|
||||
|
||||
def test_rollout_stragglers_flags_enumerated_tenant_with_no_result():
|
||||
# agents-team class: enumerated but no batch ever produced a row for it.
|
||||
stragglers = prod.rollout_stragglers(
|
||||
["a", "agents-team"],
|
||||
[{"slug": "a", "verified_on_target": True}],
|
||||
)
|
||||
assert stragglers == ["agents-team"]
|
||||
|
||||
|
||||
def test_rollout_stragglers_missing_key_is_backward_compatible():
|
||||
# Older CP without verified_on_target → treat as verified (no spurious fail).
|
||||
stragglers = prod.rollout_stragglers(
|
||||
["a", "b"],
|
||||
[{"slug": "a", "healthz_ok": True}, {"slug": "b", "healthz_ok": True}],
|
||||
)
|
||||
assert stragglers == []
|
||||
|
||||
|
||||
def test_rollout_stragglers_ignores_dry_run_rows():
|
||||
stragglers = prod.rollout_stragglers(
|
||||
["a"], [{"slug": "a", "ssm_status": "DryRun"}]
|
||||
)
|
||||
# dry-run row is skipped, so "a" has no verifying row → straggler.
|
||||
assert stragglers == ["a"]
|
||||
|
||||
|
||||
def test_scoped_rollout_fails_when_a_tenant_stays_on_old_tag():
|
||||
# Every per-tenant call returns ok=True, but agents-team is NOT
|
||||
# verified_on_target. The rollout must still fail loudly — this is
|
||||
# the exact "reported success, one tenant silently skipped" bug.
|
||||
def fake_redeploy(_cp_url, _token, body):
|
||||
rows = []
|
||||
for slug in body["only_slugs"]:
|
||||
rows.append({"slug": slug, "verified_on_target": slug != "agents-team"})
|
||||
return 200, {"ok": True, "results": rows}
|
||||
|
||||
try:
|
||||
prod.execute_scoped_rollout(
|
||||
{
|
||||
"cp_url": "https://api.moleculesai.app",
|
||||
"body": {
|
||||
"target_tag": "staging-new",
|
||||
"batch_size": 5,
|
||||
"dry_run": False,
|
||||
"confirm": True,
|
||||
},
|
||||
},
|
||||
token="secret",
|
||||
list_slugs=lambda _u, _t, _b: ["reno-stars", "agents-team", "hongming"],
|
||||
redeploy=fake_redeploy,
|
||||
sleep=lambda _s: None,
|
||||
)
|
||||
except prod.RolloutFailed as exc:
|
||||
assert "incomplete rollout" in str(exc)
|
||||
assert exc.response["stragglers"] == ["agents-team"]
|
||||
assert exc.response["ok"] is False
|
||||
else:
|
||||
raise AssertionError("expected an incomplete rollout to fail loudly")
|
||||
|
||||
|
||||
def test_scoped_rollout_passes_when_all_tenants_verified_on_target():
|
||||
def fake_redeploy(_cp_url, _token, body):
|
||||
return 200, {
|
||||
"ok": True,
|
||||
"results": [{"slug": s, "verified_on_target": True} for s in body["only_slugs"]],
|
||||
}
|
||||
|
||||
aggregate = prod.execute_scoped_rollout(
|
||||
{
|
||||
"cp_url": "https://api.moleculesai.app",
|
||||
"body": {
|
||||
"target_tag": "staging-new",
|
||||
"batch_size": 5,
|
||||
"dry_run": False,
|
||||
"confirm": True,
|
||||
},
|
||||
},
|
||||
token="secret",
|
||||
list_slugs=lambda _u, _t, _b: ["reno-stars", "agents-team", "hongming"],
|
||||
redeploy=fake_redeploy,
|
||||
sleep=lambda _s: None,
|
||||
)
|
||||
assert aggregate["ok"] is True
|
||||
assert "stragglers" not in aggregate
|
||||
|
||||
|
||||
def test_scoped_rollout_dry_run_does_not_assert_coverage():
|
||||
# A dry run proves nothing landed; coverage must NOT be asserted or
|
||||
# every plan would fail.
|
||||
def fake_redeploy(_cp_url, _token, body):
|
||||
return 200, {
|
||||
"ok": True,
|
||||
"results": [{"slug": s, "ssm_status": "DryRun"} for s in body["only_slugs"]],
|
||||
}
|
||||
|
||||
aggregate = prod.execute_scoped_rollout(
|
||||
{
|
||||
"cp_url": "https://api.moleculesai.app",
|
||||
"body": {
|
||||
"target_tag": "staging-new",
|
||||
"batch_size": 5,
|
||||
"dry_run": True,
|
||||
"confirm": True,
|
||||
},
|
||||
},
|
||||
token="secret",
|
||||
list_slugs=lambda _u, _t, _b: ["a", "b"],
|
||||
redeploy=fake_redeploy,
|
||||
sleep=lambda _s: None,
|
||||
)
|
||||
assert aggregate["ok"] is True
|
||||
|
||||
@@ -357,6 +357,14 @@ jobs:
|
||||
name: Run E2E bash unit tests (no live infra)
|
||||
run: |
|
||||
bash tests/e2e/test_model_slug.sh
|
||||
# molecule-core#1995 (#1994 follow-on): fail-direction proof for
|
||||
# the A2A real-completion + byok-routing assertion helpers
|
||||
# (lib/completion_assert.sh). Offline (no LLM, no network): it
|
||||
# asserts an error-as-text payload FAILS the real-completion gate
|
||||
# — the exact trap the historical shape-only `"kind":"text"`
|
||||
# check missed. If a refactor weakens the gate to a shape check,
|
||||
# this step goes red on every PR.
|
||||
bash tests/e2e/test_completion_assert_unit.sh
|
||||
|
||||
- if: ${{ needs.changes.outputs.scripts == 'true' }}
|
||||
name: Test ECR promote-tenant-image script (mock-driven, no live infra)
|
||||
|
||||
@@ -166,6 +166,10 @@ jobs:
|
||||
# canary path. The script picks the right blob shape based on
|
||||
# which key is non-empty.
|
||||
E2E_OPENAI_API_KEY: ${{ secrets.MOLECULE_STAGING_OPENAI_API_KEY }}
|
||||
# google-adk canary path — AI-Studio key (config model
|
||||
# google_genai:gemini-2.5-pro). PROD disallows API keys (Vertex+ADC);
|
||||
# the keyed path is CI-only. Dispatch with E2E_RUNTIME=google-adk.
|
||||
E2E_GOOGLE_API_KEY: ${{ secrets.MOLECULE_STAGING_GOOGLE_API_KEY }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
@@ -217,6 +221,10 @@ jobs:
|
||||
required_secret_name="MOLECULE_STAGING_OPENAI_API_KEY"
|
||||
required_secret_value="${E2E_OPENAI_API_KEY:-}"
|
||||
;;
|
||||
google-adk)
|
||||
required_secret_name="MOLECULE_STAGING_GOOGLE_API_KEY"
|
||||
required_secret_value="${E2E_GOOGLE_API_KEY:-}"
|
||||
;;
|
||||
*)
|
||||
echo "::warning::Unknown E2E_RUNTIME='${E2E_RUNTIME}' — skipping LLM-key check"
|
||||
required_secret_name=""
|
||||
|
||||
@@ -49,6 +49,7 @@ on:
|
||||
- 'workspace-server/internal/middleware/**'
|
||||
- 'workspace-server/internal/provisioner/**'
|
||||
- 'tests/e2e/test_staging_full_saas.sh'
|
||||
- 'tests/e2e/lib/completion_assert.sh'
|
||||
- 'tests/e2e/lib/aws_leak_check.sh'
|
||||
- 'tests/e2e/test_aws_leak_check.sh'
|
||||
- '.gitea/workflows/e2e-staging-saas.yml'
|
||||
@@ -61,6 +62,7 @@ on:
|
||||
- 'workspace-server/internal/middleware/**'
|
||||
- 'workspace-server/internal/provisioner/**'
|
||||
- 'tests/e2e/test_staging_full_saas.sh'
|
||||
- 'tests/e2e/lib/completion_assert.sh'
|
||||
- 'tests/e2e/lib/aws_leak_check.sh'
|
||||
- 'tests/e2e/test_aws_leak_check.sh'
|
||||
- '.gitea/workflows/e2e-staging-saas.yml'
|
||||
@@ -155,13 +157,18 @@ jobs:
|
||||
# E2E_RUNTIME=hermes or =codex via workflow_dispatch can still
|
||||
# exercise the OpenAI path.
|
||||
E2E_OPENAI_API_KEY: ${{ secrets.MOLECULE_STAGING_OPENAI_API_KEY }}
|
||||
# google-adk (operator-dispatched only) auths Gemini with an
|
||||
# AI-Studio key. Org policy disallows API keys in PROD (Vertex+ADC
|
||||
# there); CI uses the keyed AI-Studio path with config model
|
||||
# google_genai:gemini-2.5-pro. Vertex remains the supported prod path.
|
||||
E2E_GOOGLE_API_KEY: ${{ secrets.MOLECULE_STAGING_GOOGLE_API_KEY }}
|
||||
E2E_RUNTIME: ${{ github.event.inputs.runtime || 'claude-code' }}
|
||||
# Pin the model when running on the default claude-code path —
|
||||
# the per-runtime default ("sonnet") routes to direct Anthropic
|
||||
# and defeats the cost saving. Operators can override via the
|
||||
# workflow_dispatch flow (no input wired here yet — runtime
|
||||
# override is enough for ad-hoc).
|
||||
E2E_MODEL_SLUG: ${{ github.event.inputs.runtime == 'hermes' && 'openai/gpt-4o' || github.event.inputs.runtime == 'codex' && 'openai/gpt-4o' || 'MiniMax-M2' }}
|
||||
E2E_MODEL_SLUG: ${{ github.event.inputs.runtime == 'hermes' && 'openai/gpt-4o' || github.event.inputs.runtime == 'codex' && 'openai/gpt-4o' || github.event.inputs.runtime == 'google-adk' && 'google_genai:gemini-2.5-pro' || 'MiniMax-M2' }}
|
||||
E2E_RUN_ID: "${{ github.run_id }}-${{ github.run_attempt }}"
|
||||
E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}
|
||||
|
||||
@@ -210,6 +217,10 @@ jobs:
|
||||
required_secret_name="MOLECULE_STAGING_OPENAI_API_KEY"
|
||||
required_secret_value="${E2E_OPENAI_API_KEY:-}"
|
||||
;;
|
||||
google-adk)
|
||||
required_secret_name="MOLECULE_STAGING_GOOGLE_API_KEY"
|
||||
required_secret_value="${E2E_GOOGLE_API_KEY:-}"
|
||||
;;
|
||||
*)
|
||||
echo "::warning::Unknown E2E_RUNTIME='${E2E_RUNTIME}' — skipping LLM-key check"
|
||||
required_secret_name=""
|
||||
|
||||
@@ -327,13 +327,27 @@ jobs:
|
||||
echo ""
|
||||
echo "### Per-tenant result"
|
||||
echo ""
|
||||
echo "| Slug | Phase | SSM Status | Exit | Healthz | Error present |"
|
||||
echo "|------|-------|------------|------|---------|---------------|"
|
||||
jq -r '.results[]? | "| \(.slug) | \(.phase) | \(.ssm_status // "-") | \(.ssm_exit_code) | \(.healthz_ok) | \((.error // "") != "") |"' "$HTTP_RESPONSE" || true
|
||||
echo "| Slug | Phase | SSM Status | Exit | Healthz | On target | Error present |"
|
||||
echo "|------|-------|------------|------|---------|-----------|---------------|"
|
||||
jq -r '.results[]? | "| \(.slug) | \(.phase) | \(.ssm_status // "-") | \(.ssm_exit_code) | \(.healthz_ok) | \(.verified_on_target) | \((.error // "") != "") |"' "$HTTP_RESPONSE" || true
|
||||
# internal#724: stragglers are tenants enumerated but not proven
|
||||
# on the target build. Surface them loudly — a non-empty list
|
||||
# means the rollout did NOT fully land.
|
||||
STRAGGLERS="$(jq -r '(.stragglers // []) | join(", ")' "$HTTP_RESPONSE")"
|
||||
if [ -n "$STRAGGLERS" ]; then
|
||||
echo ""
|
||||
echo "### ⚠ Stragglers (NOT on target tag \`$TARGET_TAG\`)"
|
||||
echo ""
|
||||
echo "\`$STRAGGLERS\`"
|
||||
fi
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
|
||||
OK="$(jq -r '.ok' "$HTTP_RESPONSE")"
|
||||
if [ "$OK" != "true" ]; then
|
||||
STRAGGLERS="$(jq -r '(.stragglers // []) | join(", ")' "$HTTP_RESPONSE")"
|
||||
if [ -n "$STRAGGLERS" ]; then
|
||||
echo "::error::incomplete rollout — tenants not on target tag $TARGET_TAG: $STRAGGLERS"
|
||||
fi
|
||||
echo "::error::redeploy-fleet reported ok=false; production rollout halted."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -0,0 +1,99 @@
|
||||
name: sync-providers-yaml
|
||||
|
||||
# Cross-repo canonical↔synced-copy drift gate (internal#718 P2-A, CTO
|
||||
# 2026-05-27 "Distribution = SDK via codegen + verify-CI", multi-repo branch:
|
||||
# "codegen-checked-into-each-repo + verify-CI").
|
||||
#
|
||||
# The canonical provider-registry SSOT is molecule-controlplane
|
||||
# internal/providers/providers.yaml. molecule-core has NO Go module dependency
|
||||
# on controlplane, so instead of importing it we carry a SYNCED COPY at
|
||||
# workspace-server/internal/providers/providers.yaml and gate it.
|
||||
#
|
||||
# This workflow fetches the canonical providers.yaml from controlplane (via the
|
||||
# Gitea raw endpoint, read-only) and byte-compares it against core's synced
|
||||
# copy. RED if they differ — meaning the canonical moved and core's copy must be
|
||||
# re-synced (copy verbatim + `go generate ./...` + bump
|
||||
# canonicalProvidersYAMLSHA256 in sync_canonical_test.go).
|
||||
#
|
||||
# Pairs with:
|
||||
# * sync_canonical_test.go — hermetic sha pin (catches a hand-edit of core's
|
||||
# copy even with no network); runs in the normal `go test ./...`.
|
||||
# * verify-providers-gen.yml — artifact ↔ synced-copy drift.
|
||||
#
|
||||
# ENFORCEMENT GATING: standalone workflow, NOT a job in ci.yml and NOT in
|
||||
# branch protection (same soak-then-promote posture as verify-providers-gen).
|
||||
# It is intentionally absent from ci.yml's job set so the ci-required-drift
|
||||
# sentinel does not fire on it.
|
||||
#
|
||||
# AUTH: uses AUTO_SYNC_TOKEN (the existing cross-repo read token used to sync
|
||||
# template/provider content from sibling repos). If the secret is absent the
|
||||
# job emits a clear ::warning:: and exits 0 — the hermetic sha pin in
|
||||
# sync_canonical_test.go is the always-on backstop, so a missing cross-repo
|
||||
# token degrades to "hand-edit still caught, live canonical drift not caught"
|
||||
# rather than a hard red that blocks unrelated PRs.
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
paths:
|
||||
- 'workspace-server/internal/providers/providers.yaml'
|
||||
- '.gitea/workflows/sync-providers-yaml.yml'
|
||||
push:
|
||||
branches: [main, staging]
|
||||
paths:
|
||||
- 'workspace-server/internal/providers/providers.yaml'
|
||||
- '.gitea/workflows/sync-providers-yaml.yml'
|
||||
schedule:
|
||||
# Daily at :23 — catch a canonical change in controlplane that landed
|
||||
# without a paired core re-sync PR (off-zero to spread cron load).
|
||||
- cron: '23 4 * * *'
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
GITHUB_SERVER_URL: https://git.moleculesai.app
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: sync-providers-yaml-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
compare:
|
||||
name: Compare synced providers.yaml against controlplane canonical
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 6
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Fetch canonical providers.yaml from controlplane and byte-compare
|
||||
env:
|
||||
AUTO_SYNC_TOKEN: ${{ secrets.AUTO_SYNC_TOKEN }}
|
||||
API_ROOT: ${{ github.server_url }}/api/v1
|
||||
run: |
|
||||
set -euo pipefail
|
||||
if [ -z "${AUTO_SYNC_TOKEN:-}" ]; then
|
||||
echo "::warning::AUTO_SYNC_TOKEN secret missing — skipping the live cross-repo compare."
|
||||
echo "The hermetic sha pin (sync_canonical_test.go) still gates hand-edits of core's copy."
|
||||
echo "Provision AUTO_SYNC_TOKEN (read scope on molecule-controlplane) to enable live canonical-drift detection."
|
||||
exit 0
|
||||
fi
|
||||
CANON_URL="${API_ROOT}/repos/molecule-ai/molecule-controlplane/raw/internal/providers/providers.yaml?ref=main"
|
||||
# Use the /raw endpoint: it returns the file bytes directly. (The
|
||||
# /contents endpoint ignores Accept: application/vnd.gitea.raw on
|
||||
# Gitea 1.22.6 and returns the JSON+base64 envelope, which made this
|
||||
# diff a permanent false RED.)
|
||||
curl -fsS \
|
||||
-H "Authorization: token ${AUTO_SYNC_TOKEN}" \
|
||||
"${CANON_URL}" -o /tmp/canonical-providers.yaml
|
||||
LOCAL=workspace-server/internal/providers/providers.yaml
|
||||
if diff -u /tmp/canonical-providers.yaml "$LOCAL"; then
|
||||
echo "OK — core's synced providers.yaml is byte-identical to the controlplane canonical."
|
||||
else
|
||||
echo "::error::core's synced providers.yaml DRIFTED from the controlplane canonical (SSOT)."
|
||||
echo "Re-sync: copy controlplane internal/providers/providers.yaml verbatim over"
|
||||
echo " $LOCAL, run 'go generate ./...' in workspace-server/, and bump"
|
||||
echo " canonicalProvidersYAMLSHA256 in internal/providers/sync_canonical_test.go."
|
||||
exit 1
|
||||
fi
|
||||
@@ -0,0 +1,89 @@
|
||||
name: verify-providers-gen
|
||||
|
||||
# Provider-registry SSOT enforcement gate — molecule-core side (internal#718
|
||||
# P2-A, CTO 2026-05-27 "Distribution = SDK via codegen + verify-CI").
|
||||
#
|
||||
# The canonical schema SSOT is molecule-controlplane
|
||||
# internal/providers/providers.yaml. molecule-core carries a SYNCED COPY at
|
||||
# workspace-server/internal/providers/providers.yaml (kept in sync by the
|
||||
# companion sync-providers-yaml.yml gate), and cmd/gen-providers emits the
|
||||
# checked-in Go projection workspace-server/internal/providers/gen/registry_gen.go.
|
||||
#
|
||||
# This workflow regenerates the artifact into the working tree and fails RED if
|
||||
# it differs from what is committed — catching BOTH:
|
||||
# * a providers.yaml (synced-copy) change that wasn't followed by `go generate ./...`, and
|
||||
# * a hand-edit of the generated artifact (it carries a DO NOT EDIT header).
|
||||
#
|
||||
# It is the molecule-core mirror of molecule-controlplane's verify-providers-gen
|
||||
# workflow. Together with sync-providers-yaml (canonical↔synced-copy drift) it
|
||||
# closes the codegen-checked-into-each-repo + verify-CI loop the RFC mandates.
|
||||
#
|
||||
# ENFORCEMENT GATING (deliberate, per dev-SOP "implementation gating"):
|
||||
# this is a STANDALONE workflow, NOT a job inside ci.yml, and is NOT yet in any
|
||||
# branch-protection status_check_contexts. Rationale (identical to the CP P0
|
||||
# rollout):
|
||||
# * It runs + reports RED on every PR/push immediately (visible signal).
|
||||
# * It is intentionally absent from ci.yml's job set so the ci-required-drift
|
||||
# sentinel (jobs ↔ branch-protection ↔ audit-env) does NOT fire on it, and
|
||||
# from branch protection (turning it into a hard merge gate has blast radius
|
||||
# — operator GO required, same pattern as sop-tier-check / verify-providers-gen
|
||||
# on controlplane). Promote it into branch protection in a follow-up once
|
||||
# P2 has soaked.
|
||||
# Until then it behaves like secret-scan / block-internal-paths: a standalone
|
||||
# advisory-to-hard gate the author is expected to keep green.
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
push:
|
||||
branches: [main, staging]
|
||||
|
||||
env:
|
||||
GITHUB_SERVER_URL: https://git.moleculesai.app
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: verify-providers-gen-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
verify:
|
||||
name: Regenerate providers artifact and fail on drift
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 8
|
||||
defaults:
|
||||
run:
|
||||
working-directory: workspace-server
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
|
||||
with:
|
||||
go-version: 'stable'
|
||||
cache: true
|
||||
cache-dependency-path: workspace-server/go.sum
|
||||
|
||||
- name: Verify generated artifact is in sync with providers.yaml
|
||||
run: |
|
||||
set -euo pipefail
|
||||
# -check regenerates in memory and byte-compares against the
|
||||
# checked-in artifact; exit 1 (RED) on any drift. This is the
|
||||
# single source of the gate's verdict — the same code path
|
||||
# `go test ./cmd/gen-providers` exercises.
|
||||
go run ./cmd/gen-providers -check
|
||||
|
||||
- name: Belt-and-braces — regenerate in place and assert clean tree
|
||||
run: |
|
||||
set -euo pipefail
|
||||
# Independent confirmation that does not trust the -check path:
|
||||
# actually write the artifact and assert git sees no change. If
|
||||
# this and the step above ever disagree, the gate is suspect.
|
||||
go generate ./...
|
||||
if ! git diff --quiet -- internal/providers/gen/registry_gen.go; then
|
||||
echo "::error::workspace-server/internal/providers/gen/registry_gen.go drifted from providers.yaml."
|
||||
echo "Run 'go generate ./...' (or 'go run ./cmd/gen-providers') in workspace-server/ and commit the result."
|
||||
git --no-pager diff -- internal/providers/gen/registry_gen.go | head -80
|
||||
exit 1
|
||||
fi
|
||||
echo "OK — generated providers artifact is in sync with the schema SSOT."
|
||||
@@ -38,10 +38,11 @@ const DEFAULT_RUNTIME = "claude-code";
|
||||
const RUNTIME_OPTIONS = [
|
||||
{ value: "claude-code", label: "Claude Code" },
|
||||
{ value: "codex", label: "OpenAI Codex CLI" },
|
||||
{ value: "google-adk", label: "Google ADK" },
|
||||
{ value: "hermes", label: "Hermes" },
|
||||
{ value: "openclaw", label: "OpenClaw" },
|
||||
];
|
||||
const BASE_RUNTIME_TEMPLATE_IDS = new Set(["claude-code-default", "codex", "hermes", "openclaw"]);
|
||||
const BASE_RUNTIME_TEMPLATE_IDS = new Set(["claude-code-default", "codex", "google-adk", "hermes", "openclaw"]);
|
||||
const DEFAULT_HEADLESS_INSTANCE_TYPE = "t3.medium";
|
||||
const DEFAULT_HEADLESS_ROOT_GB = 30;
|
||||
const DEFAULT_DISPLAY_INSTANCE_TYPE = "t3.xlarge";
|
||||
|
||||
@@ -49,6 +49,33 @@ export interface ProviderEntry {
|
||||
wildcard: boolean;
|
||||
/** Optional tooltip text (rendered as native title=). */
|
||||
tooltip?: string;
|
||||
/** Billing mode the DERIVED provider implies, when this entry came from the
|
||||
* registry-backed payload (internal#718 P3): "platform_managed" | "byok".
|
||||
* Undefined for entries built by the legacy inferVendor heuristic. */
|
||||
billingMode?: "platform_managed" | "byok";
|
||||
}
|
||||
|
||||
/** RegistryProvider mirrors one entry of GET /templates `registry_providers`
|
||||
* (workspace-server registryProviderView): the registry's native provider for
|
||||
* a runtime, with its display label, auth-env NAMES, and billing mode. This is
|
||||
* the SSOT the dropdown labels come from — the canvas drops VENDOR_LABELS for
|
||||
* registry-backed runtimes (internal#718 P3, retire-list #4). */
|
||||
export interface RegistryProvider {
|
||||
name: string;
|
||||
display_name?: string;
|
||||
auth_env?: string[];
|
||||
billing_mode?: "platform_managed" | "byok";
|
||||
deprecated?: boolean;
|
||||
}
|
||||
|
||||
/** RegistryModel mirrors one entry of GET /templates `registry_models`: a
|
||||
* native model id annotated with its DERIVED provider (registry name) and the
|
||||
* billing_mode that provider implies. */
|
||||
export interface RegistryModel {
|
||||
id: string;
|
||||
name?: string;
|
||||
provider?: string;
|
||||
billing_mode?: "platform_managed" | "byok";
|
||||
}
|
||||
|
||||
export interface SelectorValue {
|
||||
@@ -68,6 +95,13 @@ interface Props {
|
||||
models: SelectorModel[];
|
||||
value: SelectorValue;
|
||||
onChange: (next: SelectorValue) => void;
|
||||
/** Optional pre-built provider catalog. When provided, the selector uses it
|
||||
* verbatim instead of re-inferring one from `models` via
|
||||
* buildProviderCatalog — the registry-backed path (internal#718 P3), where
|
||||
* the parent builds the catalog from the registry-served providers/models
|
||||
* so dropdown labels + billing come from the provider-registry SSOT rather
|
||||
* than the inferVendor heuristic. Omitted = legacy heuristic over `models`. */
|
||||
catalog?: ProviderEntry[];
|
||||
/** Display variant. "grid" = label+control side-by-side (used in ConfigTab
|
||||
* Runtime section). "stack" = vertical (used in MissingKeysModal). */
|
||||
variant?: "grid" | "stack";
|
||||
@@ -251,6 +285,66 @@ export function buildProviderCatalog(models: SelectorModel[]): ProviderEntry[] {
|
||||
return Array.from(buckets.values());
|
||||
}
|
||||
|
||||
/** Build the provider catalog from a REGISTRY-BACKED GET /templates payload
|
||||
* (registry_providers + registry_models) — internal#718 P3, retire-list #4.
|
||||
*
|
||||
* Unlike buildProviderCatalog (which RE-INFERS vendor from model-id prefixes
|
||||
* + env via inferVendor/VENDOR_LABELS/BARE_VENDOR_PATTERNS), this trusts the
|
||||
* registry: each model carries its DERIVED `provider` (a registry provider
|
||||
* name) and the dropdown label/billing/auth come from the matching
|
||||
* `registry_providers` entry. The canvas can render no provider/model the
|
||||
* registry did not serve ("only registered selectable"), and the billing-mode
|
||||
* shown reflects the derived provider rather than a hardcoded rule.
|
||||
*
|
||||
* A provider with no served model is omitted (no empty buckets). Models whose
|
||||
* `provider` doesn't match a registry_providers entry still get a bucket
|
||||
* keyed by the raw provider name (defensive — should not happen for a
|
||||
* well-formed registry payload), so a model is never silently dropped. */
|
||||
export function buildProviderCatalogFromRegistry(
|
||||
registryProviders: RegistryProvider[],
|
||||
registryModels: RegistryModel[],
|
||||
): ProviderEntry[] {
|
||||
const byName = new Map<string, RegistryProvider>();
|
||||
for (const p of registryProviders) byName.set(p.name, p);
|
||||
|
||||
// Bucket models by their derived provider name, preserving registry order.
|
||||
const buckets = new Map<string, ProviderEntry>();
|
||||
for (const m of registryModels) {
|
||||
const vendor = (m.provider ?? "").trim();
|
||||
if (!vendor) continue; // un-annotated registry model — skip from the
|
||||
// provider cascade (selectable elsewhere via free-text); it has no
|
||||
// derived provider to bucket under.
|
||||
const meta = byName.get(vendor);
|
||||
const wildcard = m.id.includes("*");
|
||||
let entry = buckets.get(vendor);
|
||||
if (!entry) {
|
||||
entry = {
|
||||
id: `registry|${vendor}`,
|
||||
vendor,
|
||||
label: meta?.display_name || vendor,
|
||||
envVars: meta?.auth_env ?? [],
|
||||
models: [],
|
||||
wildcard,
|
||||
billingMode: meta?.billing_mode ?? m.billing_mode,
|
||||
tooltip: VENDOR_TOOLTIPS[vendor],
|
||||
};
|
||||
buckets.set(vendor, entry);
|
||||
}
|
||||
entry.models.push({ id: m.id, name: m.name, provider: vendor });
|
||||
entry.wildcard = entry.wildcard || wildcard;
|
||||
}
|
||||
|
||||
// Decorate label with model-count when ≥2 concrete models share the bucket,
|
||||
// matching buildProviderCatalog's UX.
|
||||
for (const e of buckets.values()) {
|
||||
if (!e.wildcard && e.models.length > 1) {
|
||||
e.label = `${e.label} (${e.models.length} models)`;
|
||||
}
|
||||
}
|
||||
|
||||
return Array.from(buckets.values());
|
||||
}
|
||||
|
||||
/** Find the provider entry that contains a given model id. Used by
|
||||
* callers to back-derive the provider when only the model is known
|
||||
* (e.g. ConfigTab loading from saved state). */
|
||||
@@ -283,6 +377,7 @@ export function ProviderModelSelector({
|
||||
models,
|
||||
value,
|
||||
onChange,
|
||||
catalog: catalogProp,
|
||||
variant = "stack",
|
||||
allowCustomModelEscape = false,
|
||||
disabled = false,
|
||||
@@ -293,7 +388,12 @@ export function ProviderModelSelector({
|
||||
const providerSelectId = `${baseId}-provider`;
|
||||
const modelSelectId = `${baseId}-model`;
|
||||
|
||||
const catalog = useMemo(() => buildProviderCatalog(models), [models]);
|
||||
// Registry-backed path (internal#718 P3): use the parent-supplied catalog
|
||||
// verbatim; otherwise re-infer one from `models` via the legacy heuristic.
|
||||
const catalog = useMemo(
|
||||
() => catalogProp ?? buildProviderCatalog(models),
|
||||
[catalogProp, models],
|
||||
);
|
||||
const selected = useMemo(
|
||||
() => catalog.find((p) => p.id === value.providerId) ?? null,
|
||||
[catalog, value.providerId],
|
||||
|
||||
@@ -1,411 +1,82 @@
|
||||
// @vitest-environment jsdom
|
||||
/**
|
||||
* Tests for BudgetSection (issue #541).
|
||||
* Focused tests for BudgetSection's PER-PERIOD progress-bar math + aria (#49).
|
||||
*
|
||||
* Covers:
|
||||
* - Loading state
|
||||
* - Stats row: used / limit, "Unlimited" when null
|
||||
* - Progress bar: correct percentage, capped at 100%, absent when no limit
|
||||
* - Budget remaining text
|
||||
* - Input pre-fill (existing limit / blank when null)
|
||||
* - Save: PATCH with number, PATCH with null (blank input)
|
||||
* - 402 on GET → exceeded banner, no fetch-error text
|
||||
* - 402 on PATCH → exceeded banner
|
||||
* - Non-402 fetch error → error text
|
||||
* - Non-402 save error → save error alert
|
||||
* - Section header and subheading
|
||||
* - Fetch error does not show stats
|
||||
* Behavioral coverage (loading, save, 402 banners, USD formatting, legacy
|
||||
* back-compat) lives in tabs/__tests__/BudgetSection.test.tsx — this file
|
||||
* deliberately covers only the per-period progress percentage + aria-valuenow
|
||||
* + the over-budget colouring, which that suite doesn't assert in detail. Kept
|
||||
* separate to avoid duplicating the behavioral suite (one component, no
|
||||
* parallel/identical suites).
|
||||
*/
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
||||
import {
|
||||
render,
|
||||
screen,
|
||||
fireEvent,
|
||||
waitFor,
|
||||
cleanup,
|
||||
act,
|
||||
} from "@testing-library/react";
|
||||
|
||||
// ── Mock api ──────────────────────────────────────────────────────────────────
|
||||
import { render, screen, waitFor, cleanup } from "@testing-library/react";
|
||||
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: {
|
||||
get: vi.fn(),
|
||||
patch: vi.fn(),
|
||||
},
|
||||
api: { get: vi.fn(), patch: vi.fn() },
|
||||
}));
|
||||
|
||||
import { api } from "@/lib/api";
|
||||
import { BudgetSection } from "../tabs/BudgetSection";
|
||||
|
||||
const mockGet = vi.mocked(api.get);
|
||||
const mockPatch = vi.mocked(api.patch);
|
||||
|
||||
// ── Helpers ───────────────────────────────────────────────────────────────────
|
||||
type P = { limit: number | null; spend: number; remaining: number | null };
|
||||
|
||||
function budgetResponse(overrides: Partial<{
|
||||
budget_limit: number | null;
|
||||
budget_used: number;
|
||||
budget_remaining: number | null;
|
||||
}> = {}) {
|
||||
// Build a periods response where the named period has the given limit/spend.
|
||||
function withMonthly(limit: number | null, spend: number) {
|
||||
const blank: P = { limit: null, spend: 0, remaining: null };
|
||||
const monthly: P = { limit, spend, remaining: limit == null ? null : limit - spend };
|
||||
return {
|
||||
budget_limit: 1000,
|
||||
budget_used: 250,
|
||||
budget_remaining: 750,
|
||||
...overrides,
|
||||
periods: { hourly: blank, daily: blank, weekly: blank, monthly },
|
||||
budget_limit: limit,
|
||||
monthly_spend: spend,
|
||||
budget_remaining: monthly.remaining,
|
||||
};
|
||||
}
|
||||
|
||||
function make402Error(): Error {
|
||||
return new Error("API GET /workspaces/ws-1/budget: 402 Payment Required");
|
||||
}
|
||||
beforeEach(() => vi.clearAllMocks());
|
||||
afterEach(() => cleanup());
|
||||
|
||||
function make402PatchError(): Error {
|
||||
return new Error("API PATCH /workspaces/ws-1/budget: 402 Payment Required");
|
||||
}
|
||||
|
||||
function makeGenericError(msg = "network timeout"): Error {
|
||||
return new Error(`API GET /workspaces/ws-1/budget: 500 ${msg}`);
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
cleanup();
|
||||
});
|
||||
|
||||
// ── Rendering helpers ─────────────────────────────────────────────────────────
|
||||
|
||||
async function renderLoaded(budgetData = budgetResponse()) {
|
||||
async function renderLoaded(data: unknown) {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValueOnce(budgetData as any);
|
||||
mockGet.mockResolvedValueOnce(data as any);
|
||||
render(<BudgetSection workspaceId="ws-1" />);
|
||||
// Wait for loading to finish
|
||||
await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
|
||||
}
|
||||
|
||||
// ── Loading state ─────────────────────────────────────────────────────────────
|
||||
|
||||
describe("BudgetSection — loading state", () => {
|
||||
it("shows loading indicator while fetch is in flight", () => {
|
||||
// Never resolve
|
||||
mockGet.mockReturnValue(new Promise(() => {}));
|
||||
render(<BudgetSection workspaceId="ws-1" />);
|
||||
expect(screen.getByTestId("budget-loading")).toBeTruthy();
|
||||
expect(screen.getByText("Loading…")).toBeTruthy();
|
||||
describe("BudgetSection — per-period progress bar", () => {
|
||||
it("renders the bar for a limited period and omits it for an unlimited one", async () => {
|
||||
await renderLoaded(withMonthly(1000, 250));
|
||||
expect(screen.getByTestId("budget-monthly-fill")).toBeTruthy();
|
||||
expect(screen.queryByTestId("budget-hourly-fill")).toBeNull(); // hourly unlimited
|
||||
});
|
||||
|
||||
it("hides loading indicator after fetch resolves", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValueOnce(budgetResponse() as any);
|
||||
render(<BudgetSection workspaceId="ws-1" />);
|
||||
await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
|
||||
});
|
||||
});
|
||||
|
||||
// ── Section header ────────────────────────────────────────────────────────────
|
||||
|
||||
describe("BudgetSection — header and subheading", () => {
|
||||
it("renders 'Budget' as the section heading", async () => {
|
||||
await renderLoaded();
|
||||
expect(screen.getByText("Budget")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("renders the subheading 'Limit total message credits for this workspace'", async () => {
|
||||
await renderLoaded();
|
||||
expect(
|
||||
screen.getByText("Limit total message credits for this workspace")
|
||||
).toBeTruthy();
|
||||
});
|
||||
|
||||
it("renders 'Budget limit (credits)' label for the input", async () => {
|
||||
await renderLoaded();
|
||||
expect(screen.getByText("Budget limit (credits)")).toBeTruthy();
|
||||
});
|
||||
});
|
||||
|
||||
// ── Stats row ─────────────────────────────────────────────────────────────────
|
||||
|
||||
describe("BudgetSection — stats row", () => {
|
||||
it("shows budget_used in the stats row", async () => {
|
||||
await renderLoaded(budgetResponse({ budget_used: 350, budget_limit: 1000 }));
|
||||
expect(screen.getByTestId("budget-used-value").textContent).toBe("350");
|
||||
});
|
||||
|
||||
it("shows budget_limit in the stats row", async () => {
|
||||
await renderLoaded(budgetResponse({ budget_used: 100, budget_limit: 500 }));
|
||||
expect(screen.getByTestId("budget-limit-value").textContent).toBe("500");
|
||||
});
|
||||
|
||||
it("shows 'Unlimited' when budget_limit is null", async () => {
|
||||
await renderLoaded(budgetResponse({ budget_limit: null, budget_remaining: null }));
|
||||
expect(screen.getByTestId("budget-limit-value").textContent).toBe("Unlimited");
|
||||
});
|
||||
|
||||
it("shows budget_remaining when present", async () => {
|
||||
await renderLoaded(budgetResponse({ budget_remaining: 750 }));
|
||||
expect(screen.getByTestId("budget-remaining").textContent).toContain("750");
|
||||
expect(screen.getByTestId("budget-remaining").textContent).toContain("credits remaining");
|
||||
});
|
||||
|
||||
it("hides budget_remaining row when null", async () => {
|
||||
await renderLoaded(budgetResponse({ budget_remaining: null }));
|
||||
expect(screen.queryByTestId("budget-remaining")).toBeNull();
|
||||
});
|
||||
|
||||
it("does not crash when budget_used is missing from the response", async () => {
|
||||
// Backend for a provisioning-stuck workspace may return a partial
|
||||
// shape. Regression: previously this threw
|
||||
// "Cannot read properties of undefined (reading 'toLocaleString')"
|
||||
// and crashed the whole Details tab.
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
await renderLoaded({ budget_limit: 1000, budget_remaining: null } as any);
|
||||
expect(screen.getByTestId("budget-used-value").textContent).toBe("0");
|
||||
});
|
||||
});
|
||||
|
||||
// ── Progress bar ──────────────────────────────────────────────────────────────
|
||||
|
||||
describe("BudgetSection — progress bar", () => {
|
||||
it("renders the progress bar when budget_limit is set", async () => {
|
||||
await renderLoaded(budgetResponse({ budget_used: 250, budget_limit: 1000 }));
|
||||
expect(screen.getByRole("progressbar")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("does NOT render progress bar when budget_limit is null", async () => {
|
||||
await renderLoaded(budgetResponse({ budget_limit: null, budget_remaining: null }));
|
||||
expect(screen.queryByRole("progressbar")).toBeNull();
|
||||
});
|
||||
|
||||
it("fills to the correct percentage (25%)", async () => {
|
||||
await renderLoaded(budgetResponse({ budget_used: 250, budget_limit: 1000 }));
|
||||
const fill = screen.getByTestId("budget-progress-fill") as HTMLDivElement;
|
||||
expect(fill.style.width).toBe("25%");
|
||||
});
|
||||
|
||||
it("fills to the correct percentage (50%)", async () => {
|
||||
await renderLoaded(budgetResponse({ budget_used: 500, budget_limit: 1000 }));
|
||||
const fill = screen.getByTestId("budget-progress-fill") as HTMLDivElement;
|
||||
expect(fill.style.width).toBe("50%");
|
||||
});
|
||||
|
||||
it("caps fill at 100% when budget_used exceeds budget_limit", async () => {
|
||||
await renderLoaded(budgetResponse({ budget_used: 1500, budget_limit: 1000 }));
|
||||
const fill = screen.getByTestId("budget-progress-fill") as HTMLDivElement;
|
||||
expect(fill.style.width).toBe("100%");
|
||||
});
|
||||
|
||||
it("progress bar has aria-valuenow equal to the calculated percentage", async () => {
|
||||
await renderLoaded(budgetResponse({ budget_used: 300, budget_limit: 1000 }));
|
||||
const bar = screen.getByRole("progressbar");
|
||||
expect(bar.getAttribute("aria-valuenow")).toBe("30");
|
||||
});
|
||||
|
||||
it("shows 0% progress bar when budget_used is absent from the response", async () => {
|
||||
// Regression: budget_used is optional (provisioning-stuck workspaces return
|
||||
// partial shapes). Without the `?? 0` guard the progressPct calculation
|
||||
// throws a TypeScript strict-null error and the build fails.
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
await renderLoaded({ budget_limit: 1000, budget_remaining: null } as any);
|
||||
const bar = screen.getByRole("progressbar");
|
||||
expect(bar.getAttribute("aria-valuenow")).toBe("0");
|
||||
const fill = screen.getByTestId("budget-progress-fill") as HTMLDivElement;
|
||||
expect(fill.style.width).toBe("0%");
|
||||
});
|
||||
});
|
||||
|
||||
// ── Input pre-fill ────────────────────────────────────────────────────────────
|
||||
|
||||
describe("BudgetSection — input pre-fill", () => {
|
||||
it("pre-fills input with existing budget_limit", async () => {
|
||||
await renderLoaded(budgetResponse({ budget_limit: 500 }));
|
||||
const input = screen.getByTestId("budget-limit-input") as HTMLInputElement;
|
||||
expect(input.value).toBe("500");
|
||||
});
|
||||
|
||||
it("leaves input empty when budget_limit is null", async () => {
|
||||
await renderLoaded(budgetResponse({ budget_limit: null, budget_remaining: null }));
|
||||
const input = screen.getByTestId("budget-limit-input") as HTMLInputElement;
|
||||
expect(input.value).toBe("");
|
||||
});
|
||||
});
|
||||
|
||||
// ── Save — PATCH calls ────────────────────────────────────────────────────────
|
||||
|
||||
describe("BudgetSection — save", () => {
|
||||
it("calls PATCH /workspaces/:id/budget with budget_limit as integer", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockPatch.mockResolvedValueOnce(budgetResponse({ budget_limit: 800 }) as any);
|
||||
await renderLoaded(budgetResponse({ budget_limit: 1000 }));
|
||||
|
||||
fireEvent.change(screen.getByTestId("budget-limit-input"), {
|
||||
target: { value: "800" },
|
||||
});
|
||||
fireEvent.click(screen.getByTestId("budget-save-btn"));
|
||||
|
||||
await waitFor(() => expect(mockPatch).toHaveBeenCalled());
|
||||
expect(mockPatch.mock.calls[0][0]).toBe("/workspaces/ws-1/budget");
|
||||
const body = mockPatch.mock.calls[0][1] as Record<string, unknown>;
|
||||
expect(body.budget_limit).toBe(800);
|
||||
});
|
||||
|
||||
it("sends budget_limit: 0 (not null) when input is '0' — zero-credit budget", async () => {
|
||||
// Regression for QA bug report: `parseInt("0") || null` would yield null.
|
||||
// The correct form `raw !== "" ? parseInt(raw, 10) : null` must return 0.
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockPatch.mockResolvedValueOnce(budgetResponse({ budget_limit: 0, budget_used: 0, budget_remaining: 0 }) as any);
|
||||
await renderLoaded(budgetResponse({ budget_limit: 1000 }));
|
||||
|
||||
fireEvent.change(screen.getByTestId("budget-limit-input"), {
|
||||
target: { value: "0" },
|
||||
});
|
||||
fireEvent.click(screen.getByTestId("budget-save-btn"));
|
||||
|
||||
await waitFor(() => expect(mockPatch).toHaveBeenCalled());
|
||||
const body = mockPatch.mock.calls[0][1] as Record<string, unknown>;
|
||||
expect(body.budget_limit).toBe(0);
|
||||
expect(body.budget_limit).not.toBeNull();
|
||||
});
|
||||
|
||||
it("sends budget_limit: null when input is blank", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockPatch.mockResolvedValueOnce(budgetResponse({ budget_limit: null, budget_remaining: null }) as any);
|
||||
await renderLoaded(budgetResponse({ budget_limit: 1000 }));
|
||||
|
||||
fireEvent.change(screen.getByTestId("budget-limit-input"), {
|
||||
target: { value: "" },
|
||||
});
|
||||
fireEvent.click(screen.getByTestId("budget-save-btn"));
|
||||
|
||||
await waitFor(() => expect(mockPatch).toHaveBeenCalled());
|
||||
const body = mockPatch.mock.calls[0][1] as Record<string, unknown>;
|
||||
expect(body.budget_limit).toBeNull();
|
||||
});
|
||||
|
||||
it("updates displayed stats after successful save", async () => {
|
||||
const updated = budgetResponse({ budget_limit: 2000, budget_used: 500, budget_remaining: 1500 });
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockPatch.mockResolvedValueOnce(updated as any);
|
||||
await renderLoaded(budgetResponse({ budget_limit: 1000, budget_used: 250 }));
|
||||
|
||||
fireEvent.change(screen.getByTestId("budget-limit-input"), {
|
||||
target: { value: "2000" },
|
||||
});
|
||||
fireEvent.click(screen.getByTestId("budget-save-btn"));
|
||||
|
||||
await waitFor(() =>
|
||||
expect(screen.getByTestId("budget-limit-value").textContent).toBe("2,000")
|
||||
);
|
||||
});
|
||||
|
||||
it("shows save error message on non-402 PATCH failure", async () => {
|
||||
mockPatch.mockRejectedValueOnce(
|
||||
new Error("API PATCH /workspaces/ws-1/budget: 500 server error")
|
||||
);
|
||||
await renderLoaded();
|
||||
|
||||
fireEvent.click(screen.getByTestId("budget-save-btn"));
|
||||
|
||||
await waitFor(() =>
|
||||
expect(screen.getByTestId("budget-save-error")).toBeTruthy()
|
||||
);
|
||||
expect(screen.getByTestId("budget-save-error").textContent).toContain("500");
|
||||
});
|
||||
});
|
||||
|
||||
// ── 402 handling ──────────────────────────────────────────────────────────────
|
||||
|
||||
describe("BudgetSection — 402 handling", () => {
|
||||
it("shows exceeded banner when GET returns 402", async () => {
|
||||
mockGet.mockRejectedValueOnce(make402Error());
|
||||
render(<BudgetSection workspaceId="ws-1" />);
|
||||
|
||||
await waitFor(() =>
|
||||
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy()
|
||||
);
|
||||
expect(screen.getByText("Budget exceeded — messages blocked")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("does NOT show fetch error text when GET returns 402 (only banner)", async () => {
|
||||
mockGet.mockRejectedValueOnce(make402Error());
|
||||
render(<BudgetSection workspaceId="ws-1" />);
|
||||
|
||||
await waitFor(() =>
|
||||
expect(screen.queryByTestId("budget-loading")).toBeNull()
|
||||
);
|
||||
expect(screen.queryByTestId("budget-fetch-error")).toBeNull();
|
||||
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("shows exceeded banner when PATCH returns 402", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValueOnce(budgetResponse() as any);
|
||||
mockPatch.mockRejectedValueOnce(make402PatchError());
|
||||
render(<BudgetSection workspaceId="ws-1" />);
|
||||
await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
|
||||
|
||||
fireEvent.click(screen.getByTestId("budget-save-btn"));
|
||||
|
||||
await waitFor(() =>
|
||||
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy()
|
||||
);
|
||||
// Should NOT also show the save-error alert
|
||||
expect(screen.queryByTestId("budget-save-error")).toBeNull();
|
||||
});
|
||||
|
||||
it("clears exceeded banner after a successful save", async () => {
|
||||
mockGet.mockRejectedValueOnce(make402Error());
|
||||
render(<BudgetSection workspaceId="ws-1" />);
|
||||
await waitFor(() =>
|
||||
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy()
|
||||
);
|
||||
|
||||
// Now a successful PATCH (limit was raised)
|
||||
const updated = budgetResponse({ budget_limit: 5000, budget_used: 250, budget_remaining: 4750 });
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockPatch.mockResolvedValueOnce(updated as any);
|
||||
|
||||
await act(async () => {
|
||||
fireEvent.change(screen.getByTestId("budget-limit-input"), {
|
||||
target: { value: "5000" },
|
||||
});
|
||||
fireEvent.click(screen.getByTestId("budget-save-btn"));
|
||||
});
|
||||
|
||||
await waitFor(() =>
|
||||
expect(screen.queryByTestId("budget-exceeded-banner")).toBeNull()
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
// ── Non-402 fetch error ───────────────────────────────────────────────────────
|
||||
|
||||
describe("BudgetSection — non-402 fetch errors", () => {
|
||||
it("shows fetch error text on non-402 GET failure", async () => {
|
||||
mockGet.mockRejectedValueOnce(makeGenericError("internal server error"));
|
||||
render(<BudgetSection workspaceId="ws-1" />);
|
||||
|
||||
await waitFor(() =>
|
||||
expect(screen.getByTestId("budget-fetch-error")).toBeTruthy()
|
||||
);
|
||||
expect(screen.getByTestId("budget-fetch-error").textContent).toContain("500");
|
||||
});
|
||||
|
||||
it("does NOT show stats row on fetch error", async () => {
|
||||
mockGet.mockRejectedValueOnce(makeGenericError());
|
||||
render(<BudgetSection workspaceId="ws-1" />);
|
||||
|
||||
await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
|
||||
expect(screen.queryByTestId("budget-stats-row")).toBeNull();
|
||||
});
|
||||
|
||||
it("does NOT show exceeded banner on non-402 fetch error", async () => {
|
||||
mockGet.mockRejectedValueOnce(makeGenericError());
|
||||
render(<BudgetSection workspaceId="ws-1" />);
|
||||
|
||||
await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
|
||||
expect(screen.queryByTestId("budget-exceeded-banner")).toBeNull();
|
||||
it("fills to 25%", async () => {
|
||||
await renderLoaded(withMonthly(1000, 250));
|
||||
expect((screen.getByTestId("budget-monthly-fill") as HTMLElement).style.width).toBe("25%");
|
||||
});
|
||||
|
||||
it("fills to 50%", async () => {
|
||||
await renderLoaded(withMonthly(1000, 500));
|
||||
expect((screen.getByTestId("budget-monthly-fill") as HTMLElement).style.width).toBe("50%");
|
||||
});
|
||||
|
||||
it("caps fill at 100% when spend exceeds limit", async () => {
|
||||
await renderLoaded(withMonthly(1000, 4000));
|
||||
expect((screen.getByTestId("budget-monthly-fill") as HTMLElement).style.width).toBe("100%");
|
||||
});
|
||||
|
||||
it("sets aria-valuenow to the computed percentage on the progressbar", async () => {
|
||||
await renderLoaded(withMonthly(1000, 250));
|
||||
const bars = screen.getAllByRole("progressbar");
|
||||
// the monthly bar is the only one rendered (others unlimited)
|
||||
expect(bars).toHaveLength(1);
|
||||
expect(bars[0].getAttribute("aria-valuenow")).toBe("25");
|
||||
});
|
||||
|
||||
it("shows a 0% bar when spend is 0 against a set limit", async () => {
|
||||
await renderLoaded(withMonthly(1000, 0));
|
||||
expect((screen.getByTestId("budget-monthly-fill") as HTMLElement).style.width).toBe("0%");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -213,6 +213,7 @@ describe("CreateWorkspaceDialog", () => {
|
||||
expect(runtimeTexts).toEqual([
|
||||
"Claude Code",
|
||||
"OpenAI Codex CLI",
|
||||
"Google ADK",
|
||||
"Hermes",
|
||||
"OpenClaw",
|
||||
]);
|
||||
|
||||
@@ -0,0 +1,110 @@
|
||||
// @vitest-environment jsdom
|
||||
//
|
||||
// internal#718 P3 (retire-list #4) — when GET /templates serves a
|
||||
// registry-backed selectable list (registry_providers + registry_models with
|
||||
// display_name / billing_mode / derived provider), the canvas builds the
|
||||
// provider catalog FROM that registry data instead of re-inferring vendor
|
||||
// from model-id prefixes (VENDOR_LABELS / BARE_VENDOR_PATTERNS / inferVendor).
|
||||
// The heuristic path stays only as the fallback for non-registry runtimes /
|
||||
// older backends.
|
||||
|
||||
import { describe, it, expect } from "vitest";
|
||||
import {
|
||||
buildProviderCatalogFromRegistry,
|
||||
type RegistryProvider,
|
||||
type RegistryModel,
|
||||
} from "../ProviderModelSelector";
|
||||
|
||||
// Mirrors the registry-served claude-code payload from GET /templates
|
||||
// (registry_providers / registry_models). display_name + billing_mode come
|
||||
// from the registry, NOT from the canvas VENDOR_LABELS map.
|
||||
const CLAUDE_CODE_REGISTRY_PROVIDERS: RegistryProvider[] = [
|
||||
{
|
||||
name: "anthropic-oauth",
|
||||
display_name: "Claude Code subscription",
|
||||
auth_env: ["CLAUDE_CODE_OAUTH_TOKEN"],
|
||||
billing_mode: "byok",
|
||||
},
|
||||
{
|
||||
name: "anthropic-api",
|
||||
display_name: "Anthropic API",
|
||||
auth_env: ["ANTHROPIC_API_KEY"],
|
||||
billing_mode: "byok",
|
||||
},
|
||||
{
|
||||
name: "platform",
|
||||
display_name: "Platform",
|
||||
auth_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"],
|
||||
billing_mode: "platform_managed",
|
||||
},
|
||||
];
|
||||
|
||||
const CLAUDE_CODE_REGISTRY_MODELS: RegistryModel[] = [
|
||||
{ id: "sonnet", provider: "anthropic-oauth", billing_mode: "byok" },
|
||||
{ id: "opus", provider: "anthropic-oauth", billing_mode: "byok" },
|
||||
{ id: "claude-opus-4-7", provider: "anthropic-api", billing_mode: "byok" },
|
||||
{ id: "anthropic/claude-opus-4-7", provider: "platform", billing_mode: "platform_managed" },
|
||||
];
|
||||
|
||||
describe("buildProviderCatalogFromRegistry", () => {
|
||||
it("buckets models by their DERIVED registry provider, not by inferred vendor", () => {
|
||||
const catalog = buildProviderCatalogFromRegistry(
|
||||
CLAUDE_CODE_REGISTRY_PROVIDERS,
|
||||
CLAUDE_CODE_REGISTRY_MODELS,
|
||||
);
|
||||
|
||||
const byVendor = new Map(catalog.map((p) => [p.vendor, p]));
|
||||
// anthropic-oauth bucket holds the two OAuth-derived models.
|
||||
const oauth = byVendor.get("anthropic-oauth");
|
||||
expect(oauth).toBeDefined();
|
||||
expect(oauth!.models.map((m) => m.id).sort()).toEqual(["opus", "sonnet"]);
|
||||
// platform bucket holds the platform-namespaced model.
|
||||
const platform = byVendor.get("platform");
|
||||
expect(platform).toBeDefined();
|
||||
expect(platform!.models.map((m) => m.id)).toEqual(["anthropic/claude-opus-4-7"]);
|
||||
});
|
||||
|
||||
it("labels providers from the registry display_name, not VENDOR_LABELS", () => {
|
||||
const catalog = buildProviderCatalogFromRegistry(
|
||||
CLAUDE_CODE_REGISTRY_PROVIDERS,
|
||||
CLAUDE_CODE_REGISTRY_MODELS,
|
||||
);
|
||||
const oauth = catalog.find((p) => p.vendor === "anthropic-oauth");
|
||||
// Registry display_name "Claude Code subscription" (decorated with the
|
||||
// model count by the catalog builder is acceptable; assert it carries the
|
||||
// registry label, not an inferred one).
|
||||
expect(oauth!.label).toContain("Claude Code subscription");
|
||||
});
|
||||
|
||||
it("carries the registry billing_mode per provider", () => {
|
||||
const catalog = buildProviderCatalogFromRegistry(
|
||||
CLAUDE_CODE_REGISTRY_PROVIDERS,
|
||||
CLAUDE_CODE_REGISTRY_MODELS,
|
||||
);
|
||||
expect(catalog.find((p) => p.vendor === "anthropic-oauth")!.billingMode).toBe("byok");
|
||||
expect(catalog.find((p) => p.vendor === "platform")!.billingMode).toBe("platform_managed");
|
||||
});
|
||||
|
||||
it("surfaces the registry auth_env on the provider entry", () => {
|
||||
const catalog = buildProviderCatalogFromRegistry(
|
||||
CLAUDE_CODE_REGISTRY_PROVIDERS,
|
||||
CLAUDE_CODE_REGISTRY_MODELS,
|
||||
);
|
||||
expect(catalog.find((p) => p.vendor === "anthropic-oauth")!.envVars).toEqual([
|
||||
"CLAUDE_CODE_OAUTH_TOKEN",
|
||||
]);
|
||||
});
|
||||
|
||||
it("only includes providers that actually have at least one served model", () => {
|
||||
// anthropic-api is a registry provider but has no model in this slice →
|
||||
// it should not appear as an empty bucket.
|
||||
const models: RegistryModel[] = [
|
||||
{ id: "sonnet", provider: "anthropic-oauth", billing_mode: "byok" },
|
||||
];
|
||||
const catalog = buildProviderCatalogFromRegistry(
|
||||
CLAUDE_CODE_REGISTRY_PROVIDERS,
|
||||
models,
|
||||
);
|
||||
expect(catalog.map((p) => p.vendor)).toEqual(["anthropic-oauth"]);
|
||||
});
|
||||
});
|
||||
@@ -7,10 +7,28 @@ import { api } from "@/lib/api";
|
||||
// Types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// Period keys MUST match the server SSOT (workspace-server budget_periods.go).
|
||||
type BudgetPeriod = "hourly" | "daily" | "weekly" | "monthly";
|
||||
|
||||
const PERIODS: { key: BudgetPeriod; label: string }[] = [
|
||||
{ key: "hourly", label: "Hourly" },
|
||||
{ key: "daily", label: "Daily" },
|
||||
{ key: "weekly", label: "Weekly" },
|
||||
{ key: "monthly", label: "Monthly" },
|
||||
];
|
||||
|
||||
interface PeriodBudget {
|
||||
limit: number | null; // USD cents; null = no limit
|
||||
spend: number; // rolling-window spend, USD cents
|
||||
remaining: number | null; // null when no limit
|
||||
}
|
||||
|
||||
interface BudgetData {
|
||||
budget_limit: number | null;
|
||||
budget_used?: number; // optional — provisioning-stuck workspaces return partial shapes
|
||||
budget_remaining: number | null;
|
||||
periods?: Partial<Record<BudgetPeriod, PeriodBudget>>;
|
||||
// legacy fields (pre-multi-period server) — tolerated for back-compat
|
||||
budget_limit?: number | null;
|
||||
monthly_spend?: number;
|
||||
budget_remaining?: number | null;
|
||||
}
|
||||
|
||||
interface Props {
|
||||
@@ -26,31 +44,71 @@ function isApiError402(e: unknown): boolean {
|
||||
return e instanceof Error && /: 402( |$)/.test(e.message);
|
||||
}
|
||||
|
||||
/** USD cents → "$X.XX". */
|
||||
function fmtUSD(cents: number): string {
|
||||
return `$${(cents / 100).toLocaleString(undefined, { minimumFractionDigits: 2, maximumFractionDigits: 2 })}`;
|
||||
}
|
||||
|
||||
/** Normalize the server payload (multi-period or legacy) into a period map. */
|
||||
function periodsFrom(data: BudgetData | null): Record<BudgetPeriod, PeriodBudget> {
|
||||
const base: Record<BudgetPeriod, PeriodBudget> = {
|
||||
hourly: { limit: null, spend: 0, remaining: null },
|
||||
daily: { limit: null, spend: 0, remaining: null },
|
||||
weekly: { limit: null, spend: 0, remaining: null },
|
||||
monthly: { limit: null, spend: 0, remaining: null },
|
||||
};
|
||||
if (!data) return base;
|
||||
if (data.periods) {
|
||||
for (const { key } of PERIODS) {
|
||||
const p = data.periods[key];
|
||||
if (p) base[key] = { limit: p.limit ?? null, spend: p.spend ?? 0, remaining: p.remaining ?? null };
|
||||
}
|
||||
return base;
|
||||
}
|
||||
// legacy: map the single monthly limit/spend
|
||||
base.monthly = {
|
||||
limit: data.budget_limit ?? null,
|
||||
spend: data.monthly_spend ?? 0,
|
||||
remaining: data.budget_remaining ?? null,
|
||||
};
|
||||
return base;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Component
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* BudgetSection — dedicated "Budget" section in the workspace details panel.
|
||||
*
|
||||
* - Fetches GET /workspaces/:id/budget on mount for live usage stats
|
||||
* - Shows a progress bar (budget_used / budget_limit, blue-500, capped 100%)
|
||||
* - Allows updating budget_limit via PATCH /workspaces/:id/budget
|
||||
* - Shows a 402-specific "Budget exceeded" amber banner for any blocked state
|
||||
* BudgetSection — per-workspace LLM budget, four independent rolling windows
|
||||
* (hourly / daily / weekly / monthly). Each period has its own ceiling (USD);
|
||||
* spend is the rolling-window LLM cost. Crossing ANY period blocks new work
|
||||
* (server returns 402). Sends PATCH {budget_limits:{period:cents|null}}.
|
||||
*/
|
||||
export function BudgetSection({ workspaceId }: Props) {
|
||||
const [budget, setBudget] = useState<BudgetData | null>(null);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [fetchError, setFetchError] = useState<string | null>(null);
|
||||
|
||||
const [limitInput, setLimitInput] = useState("");
|
||||
// One input per period, in USD cents (string for controlled inputs).
|
||||
const [limitInputs, setLimitInputs] = useState<Record<BudgetPeriod, string>>({
|
||||
hourly: "",
|
||||
daily: "",
|
||||
weekly: "",
|
||||
monthly: "",
|
||||
});
|
||||
const [saving, setSaving] = useState(false);
|
||||
const [saveError, setSaveError] = useState<string | null>(null);
|
||||
|
||||
/** True when a 402 has been seen from any API call in this section. */
|
||||
const [budgetExceeded, setBudgetExceeded] = useState(false);
|
||||
|
||||
// ── Fetch current budget data ─────────────────────────────────────────────
|
||||
const syncInputs = useCallback((data: BudgetData | null) => {
|
||||
const p = periodsFrom(data);
|
||||
setLimitInputs({
|
||||
hourly: p.hourly.limit != null ? String(p.hourly.limit) : "",
|
||||
daily: p.daily.limit != null ? String(p.daily.limit) : "",
|
||||
weekly: p.weekly.limit != null ? String(p.weekly.limit) : "",
|
||||
monthly: p.monthly.limit != null ? String(p.monthly.limit) : "",
|
||||
});
|
||||
}, []);
|
||||
|
||||
const loadBudget = useCallback(async () => {
|
||||
setLoading(true);
|
||||
@@ -58,7 +116,7 @@ export function BudgetSection({ workspaceId }: Props) {
|
||||
try {
|
||||
const data = await api.get<BudgetData>(`/workspaces/${workspaceId}/budget`);
|
||||
setBudget(data);
|
||||
setLimitInput(data.budget_limit != null ? String(data.budget_limit) : "");
|
||||
syncInputs(data);
|
||||
} catch (e) {
|
||||
if (isApiError402(e)) {
|
||||
setBudgetExceeded(true);
|
||||
@@ -68,29 +126,30 @@ export function BudgetSection({ workspaceId }: Props) {
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}, [workspaceId]);
|
||||
}, [workspaceId, syncInputs]);
|
||||
|
||||
useEffect(() => {
|
||||
loadBudget();
|
||||
}, [loadBudget]);
|
||||
|
||||
// ── Save handler ──────────────────────────────────────────────────────────
|
||||
|
||||
const handleSave = async () => {
|
||||
setSaving(true);
|
||||
setSaveError(null);
|
||||
const raw = limitInput.trim();
|
||||
// Use explicit empty-string check (not falsy check) so that a
|
||||
// user-entered "0" is sent as budget_limit: 0, not null (unlimited).
|
||||
const parsedLimit = raw !== "" ? parseInt(raw, 10) : null;
|
||||
|
||||
// Build the per-period map: blank → null (clear); a number → that ceiling.
|
||||
const budget_limits: Record<BudgetPeriod, number | null> = {
|
||||
hourly: null,
|
||||
daily: null,
|
||||
weekly: null,
|
||||
monthly: null,
|
||||
};
|
||||
for (const { key } of PERIODS) {
|
||||
const raw = limitInputs[key].trim();
|
||||
budget_limits[key] = raw !== "" ? parseInt(raw, 10) : null;
|
||||
}
|
||||
try {
|
||||
const updated = await api.patch<BudgetData>(`/workspaces/${workspaceId}/budget`, {
|
||||
budget_limit: parsedLimit,
|
||||
});
|
||||
const updated = await api.patch<BudgetData>(`/workspaces/${workspaceId}/budget`, { budget_limits });
|
||||
setBudget(updated);
|
||||
setLimitInput(updated.budget_limit != null ? String(updated.budget_limit) : "");
|
||||
// Clear exceeded state if the save succeeded (limit was raised or removed)
|
||||
syncInputs(updated);
|
||||
setBudgetExceeded(false);
|
||||
} catch (e) {
|
||||
if (isApiError402(e)) {
|
||||
@@ -103,24 +162,15 @@ export function BudgetSection({ workspaceId }: Props) {
|
||||
}
|
||||
};
|
||||
|
||||
// ── Progress calculation ──────────────────────────────────────────────────
|
||||
|
||||
const progressPct =
|
||||
budget && budget.budget_limit != null && budget.budget_limit > 0
|
||||
? Math.min(100, Math.round(((budget.budget_used ?? 0) / budget.budget_limit) * 100))
|
||||
: 0;
|
||||
|
||||
// ── Render ────────────────────────────────────────────────────────────────
|
||||
const periods = periodsFrom(budget);
|
||||
|
||||
return (
|
||||
<div className="space-y-3" data-testid="budget-section">
|
||||
{/* Section header */}
|
||||
<div>
|
||||
<h3 className="text-xs font-semibold text-ink-mid uppercase tracking-wider">
|
||||
Budget
|
||||
</h3>
|
||||
<h3 className="text-xs font-semibold text-ink-mid uppercase tracking-wider">Budget</h3>
|
||||
<p className="text-[11px] text-ink-mid mt-0.5">
|
||||
Limit total message credits for this workspace
|
||||
Cap LLM spend for this workspace per period — crossing any limit pauses new work
|
||||
</p>
|
||||
</div>
|
||||
|
||||
@@ -131,32 +181,14 @@ export function BudgetSection({ workspaceId }: Props) {
|
||||
data-testid="budget-exceeded-banner"
|
||||
className="flex items-center gap-2 px-3 py-2 rounded-lg bg-surface border border-amber-700/50 text-warm text-xs font-medium"
|
||||
>
|
||||
<svg
|
||||
width="13"
|
||||
height="13"
|
||||
viewBox="0 0 13 13"
|
||||
fill="none"
|
||||
aria-hidden="true"
|
||||
className="shrink-0"
|
||||
>
|
||||
<path
|
||||
d="M6.5 1.5L11.5 10.5H1.5L6.5 1.5Z"
|
||||
stroke="currentColor"
|
||||
strokeWidth="1.4"
|
||||
strokeLinejoin="round"
|
||||
/>
|
||||
<path
|
||||
d="M6.5 5.5V7.5M6.5 9.5h.01"
|
||||
stroke="currentColor"
|
||||
strokeWidth="1.4"
|
||||
strokeLinecap="round"
|
||||
/>
|
||||
<svg width="13" height="13" viewBox="0 0 13 13" fill="none" aria-hidden="true" className="shrink-0">
|
||||
<path d="M6.5 1.5L11.5 10.5H1.5L6.5 1.5Z" stroke="currentColor" strokeWidth="1.4" strokeLinejoin="round" />
|
||||
<path d="M6.5 5.5V7.5M6.5 9.5h.01" stroke="currentColor" strokeWidth="1.4" strokeLinecap="round" />
|
||||
</svg>
|
||||
Budget exceeded — messages blocked
|
||||
Budget exceeded — new work paused
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Usage stats */}
|
||||
{loading ? (
|
||||
<p className="text-xs text-ink-mid" data-testid="budget-loading">
|
||||
Loading…
|
||||
@@ -165,89 +197,78 @@ export function BudgetSection({ workspaceId }: Props) {
|
||||
<p className="text-xs text-bad" data-testid="budget-fetch-error">
|
||||
{fetchError}
|
||||
</p>
|
||||
) : budget ? (
|
||||
<div className="space-y-2">
|
||||
{/* Stats row */}
|
||||
<div className="flex items-baseline justify-between" data-testid="budget-stats-row">
|
||||
<span className="text-xs text-ink-mid">Credits used</span>
|
||||
<span className="text-xs font-mono text-ink-mid">
|
||||
<span data-testid="budget-used-value">{(budget.budget_used ?? 0).toLocaleString()}</span>
|
||||
<span className="text-ink-mid mx-1">/</span>
|
||||
<span data-testid="budget-limit-value">
|
||||
{budget.budget_limit != null
|
||||
? budget.budget_limit.toLocaleString()
|
||||
: "Unlimited"}
|
||||
</span>
|
||||
</span>
|
||||
</div>
|
||||
) : (
|
||||
<div className="space-y-3">
|
||||
{PERIODS.map(({ key, label }) => {
|
||||
const p = periods[key];
|
||||
const pct =
|
||||
p.limit != null && p.limit > 0 ? Math.min(100, Math.round((p.spend / p.limit) * 100)) : 0;
|
||||
const over = p.limit != null && p.spend >= p.limit;
|
||||
return (
|
||||
<div key={key} className="space-y-1" data-testid={`budget-period-${key}`}>
|
||||
<div className="flex items-baseline justify-between">
|
||||
<label htmlFor={`budget-${key}-${workspaceId}`} className="text-xs text-ink-mid">
|
||||
{label}
|
||||
</label>
|
||||
<span className="text-[11px] font-mono text-ink-mid">
|
||||
<span data-testid={`budget-${key}-spend`}>{fmtUSD(p.spend)}</span>
|
||||
<span className="mx-1">/</span>
|
||||
<span data-testid={`budget-${key}-limit`}>{p.limit != null ? fmtUSD(p.limit) : "∞"}</span>
|
||||
</span>
|
||||
</div>
|
||||
{p.limit != null && (
|
||||
<div
|
||||
role="progressbar"
|
||||
aria-label={`${label} budget usage`}
|
||||
aria-valuenow={pct}
|
||||
aria-valuemin={0}
|
||||
aria-valuemax={100}
|
||||
className="h-1.5 w-full rounded-full bg-surface-card overflow-hidden"
|
||||
>
|
||||
<div
|
||||
data-testid={`budget-${key}-fill`}
|
||||
className={`h-full rounded-full transition-all duration-300 ${over ? "bg-bad" : "bg-accent"}`}
|
||||
style={{ width: `${pct}%` }}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
<input
|
||||
id={`budget-${key}-${workspaceId}`}
|
||||
type="number"
|
||||
min="0"
|
||||
step="1"
|
||||
value={limitInputs[key]}
|
||||
onChange={(e) => setLimitInputs((s) => ({ ...s, [key]: e.target.value }))}
|
||||
placeholder="USD cents — blank for unlimited"
|
||||
data-testid={`budget-${key}-input`}
|
||||
className="w-full bg-surface-card border border-line rounded-lg px-3 py-1.5 text-xs text-ink-mid placeholder-zinc-500 focus:outline-none focus:border-accent focus:ring-1 focus:ring-accent/30 transition-colors"
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
|
||||
{/* Progress bar (only when limit is set) */}
|
||||
{budget.budget_limit != null && (
|
||||
<p className="text-[11px] text-ink-mid">Limits are USD cents (e.g. 500 = $5.00). Blank = unlimited.</p>
|
||||
|
||||
{saveError && (
|
||||
<div
|
||||
role="progressbar"
|
||||
aria-label="Budget usage"
|
||||
aria-valuenow={progressPct}
|
||||
aria-valuemin={0}
|
||||
aria-valuemax={100}
|
||||
className="h-1.5 w-full rounded-full bg-surface-card overflow-hidden"
|
||||
role="alert"
|
||||
data-testid="budget-save-error"
|
||||
className="px-3 py-1.5 rounded-lg bg-red-950/40 border border-red-800/50 text-xs text-bad"
|
||||
>
|
||||
<div
|
||||
data-testid="budget-progress-fill"
|
||||
className="h-full rounded-full bg-accent transition-all duration-300"
|
||||
style={{ width: `${progressPct}%` }}
|
||||
/>
|
||||
{saveError}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Remaining credits */}
|
||||
{budget.budget_remaining != null && (
|
||||
<p className="text-[11px] text-ink-mid" data-testid="budget-remaining">
|
||||
{budget.budget_remaining.toLocaleString()} credits remaining
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
) : null}
|
||||
|
||||
{/* Input + Save */}
|
||||
<div className="space-y-1.5 pt-1">
|
||||
<label
|
||||
htmlFor={`budget-limit-input-${workspaceId}`}
|
||||
className="text-[11px] text-ink-mid block"
|
||||
>
|
||||
Budget limit (credits)
|
||||
</label>
|
||||
<input
|
||||
id={`budget-limit-input-${workspaceId}`}
|
||||
type="number"
|
||||
min="0"
|
||||
step="1"
|
||||
value={limitInput}
|
||||
onChange={(e) => setLimitInput(e.target.value)}
|
||||
placeholder="e.g. 1000 — blank for unlimited"
|
||||
data-testid="budget-limit-input"
|
||||
className="w-full bg-surface-card border border-line rounded-lg px-3 py-2 text-sm text-ink-mid placeholder-zinc-500 focus:outline-none focus:border-accent focus:ring-1 focus:ring-accent/30 transition-colors"
|
||||
/>
|
||||
<p className="text-xs text-ink-mid">Leave blank for unlimited</p>
|
||||
|
||||
{saveError && (
|
||||
<div
|
||||
role="alert"
|
||||
data-testid="budget-save-error"
|
||||
className="px-3 py-1.5 rounded-lg bg-red-950/40 border border-red-800/50 text-xs text-bad"
|
||||
<button
|
||||
onClick={handleSave}
|
||||
disabled={saving}
|
||||
data-testid="budget-save-btn"
|
||||
className="px-4 py-1.5 bg-accent-strong hover:bg-accent active:bg-accent-strong rounded-lg text-xs font-medium text-white disabled:opacity-50 transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-accent focus-visible:ring-offset-1 focus-visible:ring-offset-zinc-900"
|
||||
>
|
||||
{saveError}
|
||||
</div>
|
||||
)}
|
||||
|
||||
<button
|
||||
onClick={handleSave}
|
||||
disabled={saving}
|
||||
data-testid="budget-save-btn"
|
||||
className="px-4 py-1.5 bg-accent-strong hover:bg-accent active:bg-accent-strong rounded-lg text-xs font-medium text-white disabled:opacity-50 transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-accent focus-visible:ring-offset-1 focus-visible:ring-offset-zinc-900"
|
||||
>
|
||||
{saving ? "Saving…" : "Save"}
|
||||
</button>
|
||||
</div>
|
||||
{saving ? "Saving…" : "Save"}
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -11,8 +11,12 @@ import { ExternalConnectionSection } from "./ExternalConnectionSection";
|
||||
import {
|
||||
ProviderModelSelector,
|
||||
buildProviderCatalog,
|
||||
buildProviderCatalogFromRegistry,
|
||||
findProviderForModel,
|
||||
type SelectorValue,
|
||||
type ProviderEntry,
|
||||
type RegistryProvider,
|
||||
type RegistryModel,
|
||||
} from "../ProviderModelSelector";
|
||||
import { isExternalLikeRuntime } from "@/lib/externalRuntimes";
|
||||
|
||||
@@ -258,6 +262,17 @@ interface RuntimeOption {
|
||||
// canvas falls back to deriving unique vendor prefixes from
|
||||
// models[].id (still adapter-driven, just inferred).
|
||||
providers: string[];
|
||||
// registryBacked / registryProviders / registryModels come from the
|
||||
// registry-served GET /templates fields (internal#718 P3). When
|
||||
// registryBacked is true, the selectable provider+model list is built from
|
||||
// the registry (registryProviders/registryModels) — display labels +
|
||||
// billing mode + derived provider come from the provider-registry SSOT, not
|
||||
// the canvas VENDOR_LABELS / billingModeForProvider vocabularies. When
|
||||
// false (non-registry runtime / older backend), the canvas falls back to
|
||||
// the template-served models[] + its inferVendor heuristic.
|
||||
registryBacked: boolean;
|
||||
registryProviders: RegistryProvider[];
|
||||
registryModels: RegistryModel[];
|
||||
}
|
||||
|
||||
// deriveProvidersFromModels — when a template doesn't ship an explicit
|
||||
@@ -322,6 +337,32 @@ export function billingModeForProvider(provider: string): LLMBillingMode {
|
||||
return "byok";
|
||||
}
|
||||
|
||||
// billingModeForSelectedProvider — internal#718 P3 (retire-list #5): the
|
||||
// billing mode the Config tab shows/sends for the selected PROVIDER, sourced
|
||||
// from the registry-served catalog when available rather than the hardcoded
|
||||
// billingModeForProvider rule.
|
||||
//
|
||||
// When the runtime is registry-backed, GET /templates serves each provider's
|
||||
// DERIVED billing_mode (platform_managed for the closed platform provider,
|
||||
// byok otherwise) on the ProviderEntry. We read it off the catalog so the UI
|
||||
// reflects the registry SSOT — the same predicate billing/credential emission
|
||||
// keys off the derived provider.
|
||||
//
|
||||
// Falls back to billingModeForProvider when: no catalog (non-registry runtime
|
||||
// / older backend), or the provider string isn't carried by the catalog
|
||||
// (e.g. a stale saved value). The fallback keeps the legacy behavior intact
|
||||
// for everything the registry doesn't yet speak to.
|
||||
export function billingModeForSelectedProvider(
|
||||
provider: string,
|
||||
catalog?: ProviderEntry[],
|
||||
): LLMBillingMode {
|
||||
if (catalog && catalog.length > 0) {
|
||||
const entry = catalog.find((p) => p.vendor === provider.trim());
|
||||
if (entry?.billingMode) return entry.billingMode;
|
||||
}
|
||||
return billingModeForProvider(provider);
|
||||
}
|
||||
|
||||
// Fallback used when /templates can't be fetched (offline, older backend).
|
||||
// Keep in sync with manifest.json workspace_templates as a defensive default.
|
||||
// Model + env suggestions only flow when the backend is reachable.
|
||||
@@ -336,13 +377,20 @@ export function billingModeForProvider(provider: string): LLMBillingMode {
|
||||
// config.yaml` on the container is a separate runtime-internal file,
|
||||
// not this one.
|
||||
const RUNTIMES_WITH_OWN_CONFIG = new Set<string>(["external", "kimi", "kimi-cli", "openclaw"]);
|
||||
const SUPPORTED_RUNTIME_VALUES = new Set(["claude-code", "codex", "openclaw", "hermes"]);
|
||||
// The runtime picker is SSOT-driven: options come from GET /templates,
|
||||
// which workspace-server already gates to the manifest.json maintained set
|
||||
// (loadRuntimesFromManifest). A hand-maintained frontend allowlist silently
|
||||
// dropped runtimes the backend added (google-adk shipped in manifest but was
|
||||
// filtered out, so its workspaces rendered the wrong default option). A
|
||||
// template may still opt OUT of the picker via `displayable: false` on its
|
||||
// /templates row. See project_canvas_runtime_dropdown_ssot_fix.
|
||||
|
||||
const FALLBACK_RUNTIME_OPTIONS: RuntimeOption[] = [
|
||||
{ value: "claude-code", label: "Claude Code", models: [], providers: [] },
|
||||
{ value: "codex", label: "Codex", models: [], providers: [] },
|
||||
{ value: "openclaw", label: "OpenClaw", models: [], providers: [] },
|
||||
{ value: "hermes", label: "Hermes", models: [], providers: [] },
|
||||
{ value: "claude-code", label: "Claude Code", models: [], providers: [], registryBacked: false, registryProviders: [], registryModels: [] },
|
||||
{ value: "codex", label: "Codex", models: [], providers: [], registryBacked: false, registryProviders: [], registryModels: [] },
|
||||
{ value: "google-adk", label: "Google ADK", models: [], providers: [], registryBacked: false, registryProviders: [], registryModels: [] },
|
||||
{ value: "openclaw", label: "OpenClaw", models: [], providers: [], registryBacked: false, registryProviders: [], registryModels: [] },
|
||||
{ value: "hermes", label: "Hermes", models: [], providers: [], registryBacked: false, registryProviders: [], registryModels: [] },
|
||||
];
|
||||
|
||||
export function ConfigTab({ workspaceId }: Props) {
|
||||
@@ -355,15 +403,24 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
const [rawMode, setRawMode] = useState(false);
|
||||
const [rawDraft, setRawDraft] = useState("");
|
||||
const [runtimeOptions, setRuntimeOptions] = useState<RuntimeOption[]>(FALLBACK_RUNTIME_OPTIONS);
|
||||
// Provider override (Option B PR-5): stored separately from config.yaml
|
||||
// because the value lives in workspace_secrets (encrypted), not in the
|
||||
// platform-managed config.yaml. The two endpoints are GET/PUT
|
||||
// /workspaces/:id/provider on workspace-server (handlers/secrets.go).
|
||||
// Empty = "auto-derive from model slug prefix" — pre-Option-B behavior
|
||||
// and what most users want. Setting to a non-empty value writes
|
||||
// LLM_PROVIDER into workspace_secrets and triggers an auto-restart so
|
||||
// the workspace boots with the new provider in env (and via CP user-
|
||||
// data, written into /configs/config.yaml on next provision too).
|
||||
// internal#718 P4 closure: the explicit provider override
|
||||
// (LLM_PROVIDER workspace_secret, surfaced via GET/PUT
|
||||
// /workspaces/:id/provider) has been RETIRED. The provider is
|
||||
// derived at every decision point from (runtime, model) via the
|
||||
// registry — no stored row remains. The `provider` / `originalProvider`
|
||||
// state and the provider dropdown survive in this component for
|
||||
// backwards-compat (display only) but are no longer persisted:
|
||||
// - loadConfig no longer GETs /workspaces/:id/provider (the
|
||||
// endpoint returns 410 Gone). The state initializes to ""
|
||||
// and stays there.
|
||||
// - handleSave no longer PUTs /workspaces/:id/provider.
|
||||
// - The dropdown still updates the local `provider` state so the
|
||||
// user can preview the derived value; the value never leaves
|
||||
// the browser.
|
||||
// This is the canvas-side complement to the backend retirement of
|
||||
// SetProvider/GetProvider/setProviderSecret. Older canvases that
|
||||
// still call PUT /provider hit the 410 Gone with a structured
|
||||
// PROVIDER_ENDPOINT_RETIRED code — loud failure, no silent miss.
|
||||
const [provider, setProvider] = useState("");
|
||||
const [originalProvider, setOriginalProvider] = useState("");
|
||||
// Track the model the form first rendered, so handleSave can detect
|
||||
@@ -414,26 +471,23 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
//
|
||||
// See GH #1894 for the workspace-row-as-source-of-truth rationale
|
||||
// that motivated splitting from a single config.yaml read.
|
||||
const [wsRes, modelRes, providerRes] = await Promise.all([
|
||||
// internal#718 P4 closure: the GET /workspaces/:id/provider leg is
|
||||
// RETIRED — the endpoint returns 410 Gone. Provider is now derived
|
||||
// from (runtime, model) via the registry; no stored value exists
|
||||
// to load. Always seed the local state to "" so the dropdown
|
||||
// initializes to "auto-derive".
|
||||
const [wsRes, modelRes] = await Promise.all([
|
||||
api.get<{ runtime?: string; tier?: number }>(`/workspaces/${workspaceId}`)
|
||||
.catch(() => ({} as { runtime?: string; tier?: number })),
|
||||
api.get<{ model?: string }>(`/workspaces/${workspaceId}/model`)
|
||||
.catch(() => ({} as { model?: string })),
|
||||
api.get<{ provider?: string }>(`/workspaces/${workspaceId}/provider`)
|
||||
.catch(() => null),
|
||||
]);
|
||||
const wsMetadataRuntime = (wsRes.runtime || "").trim();
|
||||
const wsMetadataModel = (modelRes.model || "").trim();
|
||||
const wsMetadataTier: number | null =
|
||||
typeof wsRes.tier === "number" ? wsRes.tier : null;
|
||||
if (providerRes !== null) {
|
||||
const loadedProvider = (providerRes.provider || "").trim();
|
||||
setProvider(loadedProvider);
|
||||
setOriginalProvider(loadedProvider);
|
||||
} else {
|
||||
setProvider("");
|
||||
setOriginalProvider("");
|
||||
}
|
||||
setProvider("");
|
||||
setOriginalProvider("");
|
||||
// originalModel is set further down once the YAML has been parsed —
|
||||
// we want it to reflect what the form ACTUALLY rendered, which may
|
||||
// be the YAML's runtime_config.model fallback when MODEL_PROVIDER
|
||||
@@ -527,20 +581,49 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
|
||||
useEffect(() => {
|
||||
let cancelled = false;
|
||||
api.get<Array<{ id: string; name?: string; runtime?: string; models?: ModelSpec[]; providers?: string[] }>>("/templates")
|
||||
api.get<Array<{
|
||||
id: string;
|
||||
name?: string;
|
||||
runtime?: string;
|
||||
models?: ModelSpec[];
|
||||
providers?: string[];
|
||||
// internal#718 P3 registry-served fields (additive; absent on older
|
||||
// backends and for non-registry runtimes).
|
||||
registry_backed?: boolean;
|
||||
registry_providers?: RegistryProvider[];
|
||||
registry_models?: RegistryModel[];
|
||||
displayable?: boolean;
|
||||
}>>("/templates")
|
||||
.then((rows) => {
|
||||
if (cancelled || !Array.isArray(rows)) return;
|
||||
const byRuntime = new Map<string, RuntimeOption>();
|
||||
for (const r of rows) {
|
||||
const v = (r.runtime || "").trim();
|
||||
if (!SUPPORTED_RUNTIME_VALUES.has(v)) continue;
|
||||
if (!v) continue;
|
||||
// Honor an explicit opt-out; absent/true means show it.
|
||||
if (r.displayable === false) continue;
|
||||
// Last template wins if two templates share a runtime — rare, and the
|
||||
// one with the richer models list is probably newer.
|
||||
const existing = byRuntime.get(v);
|
||||
const models = Array.isArray(r.models) ? r.models : [];
|
||||
const providers = Array.isArray(r.providers) ? r.providers : [];
|
||||
if (!existing || models.length > existing.models.length) {
|
||||
byRuntime.set(v, { value: v, label: r.name || v, models, providers });
|
||||
const registryProviders = Array.isArray(r.registry_providers) ? r.registry_providers : [];
|
||||
const registryModels = Array.isArray(r.registry_models) ? r.registry_models : [];
|
||||
const registryBacked = r.registry_backed === true && registryModels.length > 0;
|
||||
// Prefer the richer payload: a registry-backed entry, then more
|
||||
// template models. Keeps the "last/richer template wins" intent.
|
||||
const score = (o: RuntimeOption) => (o.registryBacked ? 1000 : 0) + o.models.length;
|
||||
const candidate: RuntimeOption = {
|
||||
value: v,
|
||||
label: r.name || v,
|
||||
models,
|
||||
providers,
|
||||
registryBacked,
|
||||
registryProviders,
|
||||
registryModels,
|
||||
};
|
||||
if (!existing || score(candidate) > score(existing)) {
|
||||
byRuntime.set(v, candidate);
|
||||
}
|
||||
}
|
||||
if (byRuntime.size > 0) setRuntimeOptions(Array.from(byRuntime.values()));
|
||||
@@ -551,7 +634,13 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
|
||||
// Models + env hints for the currently-selected runtime.
|
||||
const selectedRuntime = runtimeOptions.find((o) => o.value === (config.runtime || "")) ?? null;
|
||||
const availableModels: ModelSpec[] = selectedRuntime?.models ?? [];
|
||||
// Memoised so its identity is stable across renders — it feeds several
|
||||
// useMemo dependency arrays below (registry/legacy catalog, selector models)
|
||||
// and a fresh `[]` literal each render would defeat their memoisation.
|
||||
const availableModels: ModelSpec[] = useMemo(
|
||||
() => selectedRuntime?.models ?? [],
|
||||
[selectedRuntime?.models],
|
||||
);
|
||||
// Provider suggestions for the legacy free-text input fallback (used
|
||||
// when /templates returned no models for this runtime, e.g. hermes
|
||||
// workspaces). Prefer the runtime's declarative providers list,
|
||||
@@ -565,9 +654,37 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
|
||||
// Vendor-aware catalog shared with the selector. Memoised so the
|
||||
// catalog identity is stable across renders (selector relies on it).
|
||||
//
|
||||
// internal#718 P3: when the runtime is registry-backed, build the catalog
|
||||
// FROM the registry-served providers/models (display labels + billing +
|
||||
// derived provider from the provider-registry SSOT) instead of re-inferring
|
||||
// vendor from model-id prefixes. Falls back to the inferVendor heuristic
|
||||
// for non-registry runtimes / older backends.
|
||||
const registryBacked = selectedRuntime?.registryBacked ?? false;
|
||||
const providerCatalog = useMemo(
|
||||
() => buildProviderCatalog(availableModels),
|
||||
[availableModels],
|
||||
() =>
|
||||
registryBacked
|
||||
? buildProviderCatalogFromRegistry(
|
||||
selectedRuntime?.registryProviders ?? [],
|
||||
selectedRuntime?.registryModels ?? [],
|
||||
)
|
||||
: buildProviderCatalog(availableModels),
|
||||
[registryBacked, selectedRuntime?.registryProviders, selectedRuntime?.registryModels, availableModels],
|
||||
);
|
||||
// Models fed to the selector dropdown: the registry-served native set for a
|
||||
// registry-backed runtime (so the dropdown can render no unregistered
|
||||
// option), else the template-served models.
|
||||
const selectorModels: ModelSpec[] = useMemo(
|
||||
() =>
|
||||
registryBacked
|
||||
? (selectedRuntime?.registryModels ?? []).map((m) => ({
|
||||
id: m.id,
|
||||
name: m.name,
|
||||
// carry the derived provider so the selector buckets correctly
|
||||
...(m.provider ? { provider: m.provider } : {}),
|
||||
}))
|
||||
: availableModels,
|
||||
[registryBacked, selectedRuntime?.registryModels, availableModels],
|
||||
);
|
||||
|
||||
// Derive the selector's current value from the form state. Provider
|
||||
@@ -718,53 +835,27 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
}
|
||||
}
|
||||
|
||||
// Provider override save (Option B PR-5). PUT only when the user
|
||||
// changed the dropdown — otherwise an unrelated Save (e.g. tier
|
||||
// edit) would re-write the provider unchanged and the server-
|
||||
// side auto-restart would fire on every Save, costing the user a
|
||||
// ~30s reboot for a no-op change. Server endpoint accepts an
|
||||
// empty string to clear the override (deletes the
|
||||
// workspace_secrets row); we forward whatever the form holds.
|
||||
let providerSaveError: string | null = null;
|
||||
const providerChanged = provider !== originalProvider;
|
||||
if (providerChanged) {
|
||||
try {
|
||||
await api.put(`/workspaces/${workspaceId}/provider`, { provider });
|
||||
setOriginalProvider(provider);
|
||||
} catch (e) {
|
||||
providerSaveError = e instanceof Error ? e.message : "Provider update was rejected";
|
||||
}
|
||||
}
|
||||
// internal#718 P4 closure: provider override save is RETIRED. The
|
||||
// /workspaces/:id/provider endpoint returns 410 Gone; the provider
|
||||
// is derived from (runtime, model) at every decision point via the
|
||||
// registry. The local dropdown state still updates so the user can
|
||||
// see the predicted provider, but it never round-trips to the
|
||||
// server. Variables retained as locals (set to constants) so the
|
||||
// downstream restart-suppress logic below has clear semantics
|
||||
// and the diff against the prior shape stays small.
|
||||
const providerSaveError: string | null = null;
|
||||
const providerChanged = false;
|
||||
|
||||
// Provider → billing_mode linkage (internal#703 Gap 2). When the
|
||||
// provider actually changed AND its implied billing_mode differs
|
||||
// from the previously-selected provider's, push the new mode to
|
||||
// the per-tenant llm-billing-mode endpoint (same path the LLM
|
||||
// Billing section uses). Without this, selecting a non-Platform
|
||||
// provider leaves billing_mode=platform_managed → CP keeps
|
||||
// injecting the platform proxy → BYOK never takes.
|
||||
//
|
||||
// Gated on (a) the provider PUT having succeeded — no point setting
|
||||
// byok if the credential write failed — and (b) the mode actually
|
||||
// changing, so an unrelated provider tweak between two BYOK vendors
|
||||
// (e.g. minimax → openrouter) doesn't re-issue a redundant
|
||||
// platform_managed→byok PUT and trigger a needless restart.
|
||||
let billingModeSaveError: string | null = null;
|
||||
if (providerChanged && !providerSaveError) {
|
||||
const nextMode = billingModeForProvider(provider);
|
||||
const prevMode = billingModeForProvider(originalProvider);
|
||||
if (nextMode !== prevMode) {
|
||||
try {
|
||||
await api.put(
|
||||
`/admin/workspaces/${workspaceId}/llm-billing-mode`,
|
||||
{ mode: nextMode },
|
||||
);
|
||||
} catch (e) {
|
||||
billingModeSaveError =
|
||||
e instanceof Error ? e.message : "Billing mode update was rejected";
|
||||
}
|
||||
}
|
||||
}
|
||||
// internal#718 P4 closure: provider → billing_mode linkage is also
|
||||
// RETIRED. P2-B (#1972) moved the billing decision to
|
||||
// ResolveLLMBillingModeDerived, which DERIVES the provider from
|
||||
// (runtime, model) at every read. The canvas can no longer
|
||||
// override it via a separate PUT, by design — the runtime+model
|
||||
// selection IS the billing-mode selection. The
|
||||
// /admin/workspaces/:id/llm-billing-mode endpoint still exists
|
||||
// as the operator override surface (workspaces.llm_billing_mode
|
||||
// column); it is no longer driven by the provider dropdown.
|
||||
const billingModeSaveError: string | null = null;
|
||||
|
||||
setOriginalYaml(content);
|
||||
if (rawMode) {
|
||||
@@ -773,27 +864,22 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
} else {
|
||||
setRawDraft(content);
|
||||
}
|
||||
// SetProvider on the server already triggers an auto-restart for
|
||||
// the workspace whenever the value actually changed (see
|
||||
// workspace-server/internal/handlers/secrets.go:SetProvider). If
|
||||
// the user also clicked Save+Restart we'd kick off a SECOND
|
||||
// restart here and the two would race in the canvas store —
|
||||
// suppress the redundant call and rely on the server-side one.
|
||||
const providerWillAutoRestart = providerChanged && !providerSaveError;
|
||||
// internal#718 P4 closure: providerWillAutoRestart is always
|
||||
// false now (provider PUT is retired; no server-side auto-restart
|
||||
// can fire). Save+Restart flows through the canvas store
|
||||
// restart path the same way it did pre-#718 for non-provider
|
||||
// edits.
|
||||
const providerWillAutoRestart = providerChanged && !providerSaveError
|
||||
if (restart && !providerWillAutoRestart) {
|
||||
await useCanvasStore.getState().restartWorkspace(workspaceId);
|
||||
} else if (!restart) {
|
||||
useCanvasStore.getState().updateNodeData(workspaceId, { needsRestart: !providerWillAutoRestart });
|
||||
}
|
||||
// Aggregate partial-save errors. modelSaveError, providerSaveError,
|
||||
// and billingModeSaveError describe rejected updates from
|
||||
// independent endpoints — show whichever fired so the user knows
|
||||
// which field reverts on next reload (otherwise they'd see "Saved"
|
||||
// and be confused why Provider snapped back). The billing-mode case
|
||||
// is the most important to surface: the provider credential saved
|
||||
// but BYOK won't actually take until billing_mode flips, so a
|
||||
// silent failure here is exactly the #703 "selecting a provider has
|
||||
// no effect" symptom.
|
||||
// Aggregate partial-save errors. With provider+billing-mode PUTs
|
||||
// retired, only modelSaveError can fire from the secret-mint side
|
||||
// — the provider/billing branches are dead code retained as
|
||||
// constant nils to keep the diff small. They are surfaced
|
||||
// defensively in case a future re-enablement needs the wiring.
|
||||
const partialError = providerSaveError
|
||||
? `Other fields saved, but provider update failed: ${providerSaveError}`
|
||||
: billingModeSaveError
|
||||
@@ -918,9 +1004,10 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
— empty = "auto-derive from model slug" was the pre-PR-5
|
||||
behavior; selecting any provider here writes LLM_PROVIDER
|
||||
and triggers an auto-restart. */}
|
||||
{availableModels.length > 0 ? (
|
||||
{selectorModels.length > 0 ? (
|
||||
<ProviderModelSelector
|
||||
models={availableModels}
|
||||
models={selectorModels}
|
||||
catalog={registryBacked ? providerCatalog : undefined}
|
||||
value={selectorValue}
|
||||
onChange={(next) => {
|
||||
setSelectorValue(next);
|
||||
@@ -933,7 +1020,7 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
setConfig((prev) => {
|
||||
const v = next.model;
|
||||
const prevModelId = prev.runtime_config?.model || prev.model || "";
|
||||
const prevSpec = availableModels.find((m) => m.id === prevModelId) ?? null;
|
||||
const prevSpec = selectorModels.find((m) => m.id === prevModelId) ?? null;
|
||||
const prevRequired = prev.runtime_config?.required_env ?? [];
|
||||
const wasTemplateDriven =
|
||||
prevRequired.length === 0 ||
|
||||
|
||||
@@ -29,8 +29,15 @@ type FormState = {
|
||||
displayMode: string;
|
||||
displayProtocol: string;
|
||||
resolution: string;
|
||||
dataPersistence: string; // "" (auto) | "persist" | "ephemeral" — internal#734
|
||||
};
|
||||
|
||||
// internal#734: per-workspace durable-data choice. "" = auto (desktop-control
|
||||
// keeps data, others follow the org default). Human labels for the selector.
|
||||
const DATA_PERSISTENCE_OPTIONS = ["", "persist", "ephemeral"];
|
||||
const dataPersistenceLabel = (v: string): string =>
|
||||
v === "persist" ? "Always keep (persist)" : v === "ephemeral" ? "Don't keep (ephemeral)" : "Auto";
|
||||
|
||||
export function ContainerConfigTab({ workspaceId, data }: Props) {
|
||||
const runtime = data.runtime;
|
||||
const instanceType = data.compute?.instance_type;
|
||||
@@ -39,9 +46,10 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
|
||||
const displayProtocol = data.compute?.display?.protocol;
|
||||
const displayWidth = data.compute?.display?.width;
|
||||
const displayHeight = data.compute?.display?.height;
|
||||
const dataPersistence = data.compute?.data_persistence;
|
||||
const initial = useMemo(
|
||||
() => formFromData({ runtime, instanceType, rootGB, displayMode, displayProtocol, displayWidth, displayHeight }),
|
||||
[runtime, instanceType, rootGB, displayMode, displayProtocol, displayWidth, displayHeight],
|
||||
() => formFromData({ runtime, instanceType, rootGB, displayMode, displayProtocol, displayWidth, displayHeight, dataPersistence }),
|
||||
[runtime, instanceType, rootGB, displayMode, displayProtocol, displayWidth, displayHeight, dataPersistence],
|
||||
);
|
||||
const [form, setForm] = useState<FormState>(initial);
|
||||
const [saving, setSaving] = useState(false);
|
||||
@@ -84,6 +92,8 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
|
||||
display: form.displayEnabled
|
||||
? { mode: form.displayMode, protocol: form.displayProtocol, width, height }
|
||||
: { mode: "none" },
|
||||
// internal#734: omit when "auto" so the wire/default behavior is unchanged.
|
||||
...(form.dataPersistence ? { data_persistence: form.dataPersistence } : {}),
|
||||
};
|
||||
|
||||
const resp = await api.patch<{ needs_restart?: boolean }>(`/workspaces/${workspaceId}`, {
|
||||
@@ -176,6 +186,18 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
|
||||
onChange={(resolution) => setForm((s) => ({ ...s, resolution }))}
|
||||
/>
|
||||
)}
|
||||
<SelectField
|
||||
id="data-persistence"
|
||||
label="Saved data (cookies, downloads, memory)"
|
||||
value={form.dataPersistence}
|
||||
options={DATA_PERSISTENCE_OPTIONS}
|
||||
optionLabel={dataPersistenceLabel}
|
||||
onChange={(dataPersistence) => setForm((s) => ({ ...s, dataPersistence }))}
|
||||
/>
|
||||
<p className="-mt-1 text-[10px] leading-snug text-ink-soft">
|
||||
Whether this workspace's data survives a restart/recreate. Auto keeps it for
|
||||
browser (desktop) workspaces; Ephemeral never keeps it (privacy).
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className="mt-4 flex items-center justify-end gap-2">
|
||||
@@ -231,6 +253,7 @@ function formFromData(data: {
|
||||
displayProtocol?: string;
|
||||
displayWidth?: number;
|
||||
displayHeight?: number;
|
||||
dataPersistence?: string;
|
||||
}): FormState {
|
||||
const width = data.displayWidth ?? 1920;
|
||||
const height = data.displayHeight ?? 1080;
|
||||
@@ -243,6 +266,7 @@ function formFromData(data: {
|
||||
displayMode: data.displayMode && data.displayMode !== "none" ? data.displayMode : "desktop-control",
|
||||
displayProtocol: data.displayProtocol || "novnc",
|
||||
resolution,
|
||||
dataPersistence: data.dataPersistence || "",
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -29,6 +29,7 @@ export function DetailsTab({ workspaceId, data }: Props) {
|
||||
const [peers, setPeers] = useState<PeerData[]>([]);
|
||||
const [saving, setSaving] = useState(false);
|
||||
const [confirmDelete, setConfirmDelete] = useState(false);
|
||||
const [eraseData, setEraseData] = useState(false); // internal#734: erase saved data on delete
|
||||
const [peersError, setPeersError] = useState<string | null>(null);
|
||||
const [saveError, setSaveError] = useState<string | null>(null);
|
||||
const [deleteError, setDeleteError] = useState<string | null>(null);
|
||||
@@ -93,7 +94,10 @@ export function DetailsTab({ workspaceId, data }: Props) {
|
||||
const handleDelete = async () => {
|
||||
setDeleteError(null);
|
||||
try {
|
||||
await api.del(`/workspaces/${workspaceId}?confirm=true`, {
|
||||
// internal#734: erase_data=true asks the server to prune this workspace's
|
||||
// durable data volume (cookies / downloads / memory). Default off keeps it
|
||||
// for the orphan-sweeper grace.
|
||||
await api.del(`/workspaces/${workspaceId}?confirm=true${eraseData ? "&erase_data=true" : ""}`, {
|
||||
headers: { "X-Confirm-Name": name },
|
||||
});
|
||||
// Mirror the server-side cascade — drop the row + every
|
||||
@@ -323,6 +327,19 @@ export function DetailsTab({ workspaceId, data }: Props) {
|
||||
<h3 id="delete-confirm-title" className="text-xs font-medium text-bad">
|
||||
Confirm deletion
|
||||
</h3>
|
||||
<label className="flex items-start gap-2 text-[11px] text-ink-mid">
|
||||
<input
|
||||
type="checkbox"
|
||||
aria-label="Also erase saved data"
|
||||
checked={eraseData}
|
||||
onChange={(e) => setEraseData(e.target.checked)}
|
||||
className="mt-0.5 h-3.5 w-3.5 accent-red-600"
|
||||
/>
|
||||
<span>
|
||||
Also erase saved data (cookies, downloads, agent memory). Cannot be undone.
|
||||
Unchecked keeps it recoverable briefly.
|
||||
</span>
|
||||
</label>
|
||||
<div className="flex gap-2">
|
||||
<button
|
||||
type="button"
|
||||
@@ -339,6 +356,7 @@ export function DetailsTab({ workspaceId, data }: Props) {
|
||||
onClick={() => {
|
||||
setConfirmDelete(false);
|
||||
setDeleteError(null);
|
||||
setEraseData(false);
|
||||
// Return focus to the trigger so keyboard users aren't stranded
|
||||
deleteButtonRef.current?.focus();
|
||||
}}
|
||||
|
||||
@@ -5,9 +5,10 @@ import React from "react";
|
||||
import { BudgetSection } from "../BudgetSection";
|
||||
import { api } from "@/lib/api";
|
||||
|
||||
// Queue-based mock for the api module. Each api call shifts from the queue.
|
||||
// Tests push with qGet/qPatch and the module-level mockImplementation
|
||||
// reads from the queue.
|
||||
// Multi-period budget (#49): the API now returns a `periods` map
|
||||
// (hourly/daily/weekly/monthly), each {limit, spend, remaining} in USD cents.
|
||||
// The UI renders one row per period and PATCHes {budget_limits:{period:cents|null}}.
|
||||
|
||||
type QueueEntry = { body?: unknown; err?: Error };
|
||||
const apiQueue: QueueEntry[] = [];
|
||||
|
||||
@@ -40,45 +41,49 @@ const WS_ID = "budget-test-ws";
|
||||
function qGet(body: unknown) {
|
||||
apiQueue.push({ body });
|
||||
}
|
||||
|
||||
function qGetErr(status: number, msg: string) {
|
||||
apiQueue.push({ err: new Error(`${msg}: ${status}`) });
|
||||
}
|
||||
|
||||
function qPatch(body: unknown) {
|
||||
apiQueue.push({ body });
|
||||
}
|
||||
|
||||
function qPatchErr(status: number, msg: string) {
|
||||
apiQueue.push({ err: new Error(`${msg}: ${status}`) });
|
||||
}
|
||||
|
||||
function makeBudget(overrides: Partial<{
|
||||
budget_limit: number | null;
|
||||
budget_used: number;
|
||||
budget_remaining: number | null;
|
||||
}> = {}) {
|
||||
type P = { limit: number | null; spend: number; remaining: number | null };
|
||||
|
||||
// makeBudget builds the periods response. Override any subset of periods.
|
||||
function makeBudget(overrides: Partial<Record<"hourly" | "daily" | "weekly" | "monthly", Partial<P>>> = {}) {
|
||||
const blank: P = { limit: null, spend: 0, remaining: null };
|
||||
const mk = (o?: Partial<P>): P => {
|
||||
const p = { ...blank, ...(o ?? {}) };
|
||||
if (p.limit != null && p.remaining == null) p.remaining = p.limit - p.spend;
|
||||
return p;
|
||||
};
|
||||
const periods = {
|
||||
hourly: mk(overrides.hourly),
|
||||
daily: mk(overrides.daily),
|
||||
weekly: mk(overrides.weekly),
|
||||
monthly: mk(overrides.monthly),
|
||||
};
|
||||
return {
|
||||
budget_limit: 10_000,
|
||||
budget_used: 3_500,
|
||||
budget_remaining: 6_500,
|
||||
...overrides,
|
||||
periods,
|
||||
budget_limit: periods.monthly.limit,
|
||||
monthly_spend: periods.monthly.spend,
|
||||
budget_remaining: periods.monthly.remaining,
|
||||
};
|
||||
}
|
||||
|
||||
describe("BudgetSection", () => {
|
||||
describe("BudgetSection (multi-period)", () => {
|
||||
describe("loading state", () => {
|
||||
it("shows loading indicator while fetching", async () => {
|
||||
let resolveGet: (v: unknown) => void;
|
||||
vi.mocked(api.get).mockImplementationOnce(
|
||||
async () => new Promise((r) => { resolveGet = r as (v: unknown) => void; }),
|
||||
);
|
||||
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
expect(screen.getByTestId("budget-loading")).toBeTruthy();
|
||||
|
||||
// Resolve after render to verify state clears
|
||||
resolveGet!(makeBudget());
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.queryByTestId("budget-loading")).toBeNull();
|
||||
@@ -89,21 +94,16 @@ describe("BudgetSection", () => {
|
||||
describe("fetch error state", () => {
|
||||
it("shows error message on non-402 fetch failure", async () => {
|
||||
qGetErr(500, "Internal Server Error");
|
||||
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.getByTestId("budget-fetch-error")).toBeTruthy();
|
||||
});
|
||||
expect(screen.getByTestId("budget-fetch-error")!.textContent).toContain("500");
|
||||
});
|
||||
|
||||
it("shows 402 as exceeded banner, not fetch error", async () => {
|
||||
// 402 means the budget limit was hit — different UX from a network/API error.
|
||||
it("shows the exceeded banner (not a fetch error) on a 402", async () => {
|
||||
qGetErr(402, "Payment Required");
|
||||
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy();
|
||||
});
|
||||
@@ -111,220 +111,105 @@ describe("BudgetSection", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("budget loaded — display", () => {
|
||||
it("renders used / limit stats row", async () => {
|
||||
qGet(makeBudget({ budget_limit: 10_000, budget_used: 3_500 }));
|
||||
|
||||
describe("rendering periods", () => {
|
||||
it("renders all four period rows", async () => {
|
||||
qGet(makeBudget());
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.getByTestId("budget-used-value")!.textContent).toBe("3,500");
|
||||
});
|
||||
expect(screen.getByTestId("budget-limit-value")!.textContent).toBe("10,000");
|
||||
});
|
||||
|
||||
it("renders 'Unlimited' when budget_limit is null", async () => {
|
||||
qGet(makeBudget({ budget_limit: null, budget_used: 1_000, budget_remaining: null }));
|
||||
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.getByTestId("budget-limit-value")!.textContent).toBe("Unlimited");
|
||||
for (const k of ["hourly", "daily", "weekly", "monthly"]) {
|
||||
expect(screen.getByTestId(`budget-period-${k}`)).toBeTruthy();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
it("renders remaining credits when present", async () => {
|
||||
qGet(makeBudget({ budget_limit: 10_000, budget_used: 3_500, budget_remaining: 6_500 }));
|
||||
|
||||
it("formats spend and limit as USD per period", async () => {
|
||||
qGet(makeBudget({ monthly: { limit: 10_000, spend: 3_500 } }));
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.getByTestId("budget-remaining")!.textContent).toContain("6,500");
|
||||
expect(screen.getByTestId("budget-remaining")!.textContent).toContain("credits remaining");
|
||||
expect(screen.getByTestId("budget-monthly-spend")!.textContent).toBe("$35.00");
|
||||
});
|
||||
expect(screen.getByTestId("budget-monthly-limit")!.textContent).toBe("$100.00");
|
||||
});
|
||||
|
||||
it("shows ∞ for a period with no limit", async () => {
|
||||
qGet(makeBudget({ hourly: { limit: null, spend: 1_000 } }));
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.getByTestId("budget-hourly-limit")!.textContent).toBe("∞");
|
||||
});
|
||||
});
|
||||
|
||||
it("omits remaining credits when budget_remaining is null", async () => {
|
||||
qGet(makeBudget({ budget_limit: 10_000, budget_used: 3_500, budget_remaining: null }));
|
||||
|
||||
it("renders the progress bar only for periods with a limit", async () => {
|
||||
qGet(makeBudget({ monthly: { limit: 10_000, spend: 12_000 }, hourly: { limit: null, spend: 5_000 } }));
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.queryByTestId("budget-remaining")).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
it("caps progress bar at 100% when used > limit", async () => {
|
||||
// Over-limit: 12000 used of 10000 limit should show 100%, not 120%.
|
||||
qGet(makeBudget({ budget_limit: 10_000, budget_used: 12_000, budget_remaining: null }));
|
||||
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
const fill = screen.getByTestId("budget-progress-fill");
|
||||
expect(fill.getAttribute("style")).toContain("100%");
|
||||
});
|
||||
});
|
||||
|
||||
it("omits progress bar when budget_limit is null (unlimited)", async () => {
|
||||
qGet(makeBudget({ budget_limit: null, budget_used: 5_000, budget_remaining: null }));
|
||||
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.queryByTestId("budget-progress-fill")).toBeNull();
|
||||
expect(screen.getByTestId("budget-monthly-fill")).toBeTruthy();
|
||||
});
|
||||
expect(screen.queryByTestId("budget-hourly-fill")).toBeNull();
|
||||
// over-budget fill caps at 100%
|
||||
const fill = screen.getByTestId("budget-monthly-fill") as HTMLElement;
|
||||
expect(fill.style.width).toBe("100%");
|
||||
});
|
||||
});
|
||||
|
||||
describe("budget exceeded (402)", () => {
|
||||
it("shows exceeded banner when load returns 402", async () => {
|
||||
qGetErr(402, "Payment Required");
|
||||
|
||||
describe("save", () => {
|
||||
it("PATCHes budget_limits for all four periods and clears the exceeded banner", async () => {
|
||||
qGet(makeBudget({ monthly: { limit: 10_000, spend: 3_500 } }));
|
||||
qPatch(makeBudget({ hourly: { limit: 500, spend: 0 }, monthly: { limit: 20_000, spend: 0 } }));
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.getByTestId("budget-hourly-input")).toBeTruthy();
|
||||
});
|
||||
|
||||
fireEvent.change(screen.getByTestId("budget-hourly-input"), { target: { value: "500" } });
|
||||
fireEvent.click(screen.getByTestId("budget-save-btn"));
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy();
|
||||
expect(screen.getByTestId("budget-exceeded-banner")!.textContent).toContain("Budget exceeded");
|
||||
expect(vi.mocked(api.patch)).toHaveBeenCalled();
|
||||
});
|
||||
const [, body] = vi.mocked(api.patch).mock.calls[0];
|
||||
expect((body as { budget_limits: Record<string, number | null> }).budget_limits).toMatchObject({
|
||||
hourly: 500,
|
||||
monthly: 10_000, // unchanged input echoes the loaded limit
|
||||
});
|
||||
});
|
||||
|
||||
it("clears exceeded banner after successful save", async () => {
|
||||
qGetErr(402, "Payment Required");
|
||||
qPatch(makeBudget({ budget_limit: 50_000, budget_used: 0, budget_remaining: 50_000 }));
|
||||
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy();
|
||||
});
|
||||
|
||||
const input = screen.getByTestId("budget-limit-input");
|
||||
fireEvent.change(input, { target: { value: "50000" } });
|
||||
|
||||
const saveBtn = screen.getByTestId("budget-save-btn");
|
||||
fireEvent.click(saveBtn);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.queryByTestId("budget-exceeded-banner")).toBeNull();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("save flow", () => {
|
||||
it("shows save error on non-402 patch failure", async () => {
|
||||
it("shows a save error on non-402 PATCH failure", async () => {
|
||||
qGet(makeBudget());
|
||||
qPatchErr(500, "Internal Server Error");
|
||||
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.getByTestId("budget-limit-input")).toBeTruthy();
|
||||
expect(screen.getByTestId("budget-save-btn")).toBeTruthy();
|
||||
});
|
||||
|
||||
const saveBtn = screen.getByTestId("budget-save-btn");
|
||||
fireEvent.click(saveBtn);
|
||||
|
||||
fireEvent.click(screen.getByTestId("budget-save-btn"));
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.getByTestId("budget-save-error")).toBeTruthy();
|
||||
expect(screen.getByTestId("budget-save-error")!.textContent).toContain("500");
|
||||
});
|
||||
expect(screen.getByTestId("budget-save-error")!.textContent).toContain("500");
|
||||
});
|
||||
|
||||
it("updates input to new limit value after successful save", async () => {
|
||||
qGet(makeBudget({ budget_limit: 10_000 }));
|
||||
qPatch(makeBudget({ budget_limit: 20_000 }));
|
||||
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
// Wait for the input to appear (loading → loaded)
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.queryByTestId("budget-loading")).toBeNull();
|
||||
});
|
||||
|
||||
const input = screen.getByTestId("budget-limit-input") as HTMLInputElement;
|
||||
// Debug: check what values are rendered
|
||||
const limitValue = screen.getByTestId("budget-limit-value")?.textContent;
|
||||
expect(input.value).toBe("10000"); // initial value from API
|
||||
expect(limitValue).toBe("10,000");
|
||||
|
||||
fireEvent.change(input, { target: { value: "20000" } });
|
||||
expect(input.value).toBe("20000");
|
||||
|
||||
fireEvent.click(screen.getByTestId("budget-save-btn"));
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect((screen.getByTestId("budget-limit-input") as HTMLInputElement).value).toBe("20000");
|
||||
});
|
||||
});
|
||||
|
||||
it("sends null when input is cleared (unlimited)", async () => {
|
||||
qGet(makeBudget({ budget_limit: 10_000 }));
|
||||
qPatch(makeBudget({ budget_limit: null }));
|
||||
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.getByTestId("budget-limit-input")).toBeTruthy();
|
||||
});
|
||||
|
||||
const input = screen.getByTestId("budget-limit-input") as HTMLInputElement;
|
||||
fireEvent.change(input, { target: { value: "" } });
|
||||
fireEvent.click(screen.getByTestId("budget-save-btn"));
|
||||
|
||||
await vi.waitFor(() => {
|
||||
// After save with null limit, input should show empty (unlimited)
|
||||
expect(input.value).toBe("");
|
||||
});
|
||||
});
|
||||
|
||||
it("shows saving state on button while patch is in flight", async () => {
|
||||
it("surfaces the exceeded banner on a 402 PATCH", async () => {
|
||||
qGet(makeBudget());
|
||||
let resolvePatch: (v: unknown) => void;
|
||||
vi.mocked(api.patch).mockImplementationOnce(
|
||||
async () => new Promise((r) => { resolvePatch = r as (v: unknown) => void; }),
|
||||
);
|
||||
|
||||
qPatchErr(402, "Payment Required");
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.getByTestId("budget-limit-input")).toBeTruthy();
|
||||
expect(screen.getByTestId("budget-save-btn")).toBeTruthy();
|
||||
});
|
||||
|
||||
fireEvent.change(screen.getByTestId("budget-limit-input"), { target: { value: "50000" } });
|
||||
fireEvent.click(screen.getByTestId("budget-save-btn"));
|
||||
|
||||
const btn = screen.getByTestId("budget-save-btn");
|
||||
expect(btn.textContent).toContain("Saving");
|
||||
|
||||
resolvePatch!(makeBudget({ budget_limit: 50_000 }));
|
||||
await vi.waitFor(() => {
|
||||
expect(btn.textContent).toContain("Save");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("isApiError402 — regression coverage", () => {
|
||||
it("classifies ': 402' with space as 402", async () => {
|
||||
qGetErr(402, "Payment Required");
|
||||
qPatch(makeBudget());
|
||||
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it("classifies non-402 error messages as regular fetch errors", async () => {
|
||||
qGetErr(503, "Service Unavailable");
|
||||
|
||||
describe("legacy payload back-compat", () => {
|
||||
it("maps a pre-multi-period {budget_limit, monthly_spend} response to the monthly row", async () => {
|
||||
qGet({ budget_limit: 5_000, monthly_spend: 1_000, budget_remaining: 4_000 });
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.getByTestId("budget-fetch-error")).toBeTruthy();
|
||||
expect(screen.getByTestId("budget-monthly-limit")!.textContent).toBe("$50.00");
|
||||
});
|
||||
expect(screen.queryByTestId("budget-exceeded-banner")).toBeNull();
|
||||
expect(screen.getByTestId("budget-monthly-spend")!.textContent).toBe("$10.00");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,255 +1,35 @@
|
||||
// @vitest-environment jsdom
|
||||
//
|
||||
// Tests for the provider → llm_billing_mode linkage (internal#703 Gap 2).
|
||||
// internal#718 P4 closure — ConfigTab.billingMode.test.tsx is retired.
|
||||
//
|
||||
// What this pins: when the operator changes the PROVIDER in the Config
|
||||
// tab, the workspace's llm_billing_mode must follow — a non-Platform
|
||||
// provider sets billing_mode=byok; Platform sets platform_managed. Before
|
||||
// this wiring, selecting "Claude Code subscription (OAuth)" or any vendor
|
||||
// key wrote the credential env but left billing_mode=platform_managed, so
|
||||
// CP kept injecting the platform proxy base URL and the OAuth token /
|
||||
// vendor key was never used — BYOK silently no-op'd (the live jrs-auto
|
||||
// SEO-Agent symptom in #703).
|
||||
// This suite (255 lines, 8 tests) pinned the canvas-side provider →
|
||||
// llm_billing_mode linkage from internal#703 Gap 2: when the operator
|
||||
// changed the PROVIDER in the Config tab, ConfigTab.handleSave would
|
||||
// PUT /admin/workspaces/:id/llm-billing-mode so the platform-vs-byok
|
||||
// decision tracked the dropdown.
|
||||
//
|
||||
// The billing-mode PUT targets the same per-tenant endpoint the LLM
|
||||
// Billing section uses: PUT /admin/workspaces/:id/llm-billing-mode with
|
||||
// body {mode: "byok" | "platform_managed"}.
|
||||
// That linkage is retired together with the LLM_PROVIDER override flow
|
||||
// (see ConfigTab.provider.test.tsx retirement note). P2-B (#1972)
|
||||
// moved the platform-vs-byok decision to
|
||||
// `ResolveLLMBillingModeDerived(runtime, model, authEnv)` in
|
||||
// workspace-server — the canvas can no longer override it via the
|
||||
// provider dropdown, by design. The runtime+model selection IS the
|
||||
// billing-mode selection now.
|
||||
//
|
||||
// The `/admin/workspaces/:id/llm-billing-mode` endpoint still exists
|
||||
// as the operator override surface (`workspaces.llm_billing_mode`
|
||||
// column); it is no longer driven by the provider dropdown.
|
||||
// Coverage for the derived billing flow lives in
|
||||
// workspace-server/internal/handlers/llm_billing_mode_derived_test.go.
|
||||
//
|
||||
// Restore from git history if the canvas-side provider→billing linkage
|
||||
// needs to be revisited (it should not — the derived resolver is the
|
||||
// single decision point).
|
||||
|
||||
import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";
|
||||
import { render, screen, cleanup, waitFor, fireEvent } from "@testing-library/react";
|
||||
import React from "react";
|
||||
import { describe, it } from "vitest";
|
||||
|
||||
afterEach(cleanup);
|
||||
|
||||
const apiGet = vi.fn();
|
||||
const apiPatch = vi.fn();
|
||||
const apiPut = vi.fn();
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: {
|
||||
get: (path: string) => apiGet(path),
|
||||
patch: (path: string, body: unknown) => apiPatch(path, body),
|
||||
put: (path: string, body: unknown) => apiPut(path, body),
|
||||
post: vi.fn(),
|
||||
del: vi.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
const storeUpdateNodeData = vi.fn();
|
||||
const storeRestartWorkspace = vi.fn();
|
||||
vi.mock("@/store/canvas", () => ({
|
||||
useCanvasStore: Object.assign(
|
||||
(selector: (s: unknown) => unknown) =>
|
||||
selector({ restartWorkspace: storeRestartWorkspace, updateNodeData: storeUpdateNodeData }),
|
||||
{
|
||||
getState: () => ({
|
||||
restartWorkspace: storeRestartWorkspace,
|
||||
updateNodeData: storeUpdateNodeData,
|
||||
}),
|
||||
},
|
||||
),
|
||||
}));
|
||||
|
||||
vi.mock("../AgentCardSection", () => ({
|
||||
AgentCardSection: () => <div data-testid="agent-card-stub" />,
|
||||
}));
|
||||
|
||||
import { ConfigTab, billingModeForProvider } from "../ConfigTab";
|
||||
|
||||
function wireApi(opts: { providerValue?: string | "missing" }) {
|
||||
apiGet.mockImplementation((path: string) => {
|
||||
if (path === `/workspaces/ws-test`) {
|
||||
return Promise.resolve({ runtime: "hermes" });
|
||||
}
|
||||
if (path === `/workspaces/ws-test/model`) {
|
||||
return Promise.resolve({ model: "nousresearch/hermes-4-70b" });
|
||||
}
|
||||
if (path === `/workspaces/ws-test/provider`) {
|
||||
if (opts.providerValue === "missing") return Promise.reject(new Error("404"));
|
||||
return Promise.resolve({
|
||||
provider: opts.providerValue ?? "",
|
||||
source: opts.providerValue ? "workspace_secrets" : "default",
|
||||
});
|
||||
}
|
||||
if (path === `/workspaces/ws-test/files/config.yaml`) {
|
||||
return Promise.resolve({ content: "name: ws\nruntime: hermes\n" });
|
||||
}
|
||||
if (path === "/templates") return Promise.resolve([]);
|
||||
return Promise.reject(new Error(`unmocked api.get: ${path}`));
|
||||
});
|
||||
}
|
||||
|
||||
function billingModeCalls() {
|
||||
return apiPut.mock.calls.filter(
|
||||
([path]) => path === "/admin/workspaces/ws-test/llm-billing-mode",
|
||||
);
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
apiGet.mockReset();
|
||||
apiPatch.mockReset();
|
||||
apiPut.mockReset();
|
||||
storeUpdateNodeData.mockReset();
|
||||
storeRestartWorkspace.mockReset();
|
||||
});
|
||||
|
||||
describe("billingModeForProvider — pure mapping (internal#703 Gap 2)", () => {
|
||||
// Platform / empty → platform_managed. Empty means "no explicit
|
||||
// override → inherit", which resolves to platform on the backend, so
|
||||
// it must NOT flip the workspace into byok.
|
||||
it("maps Platform and empty to platform_managed", () => {
|
||||
expect(billingModeForProvider("platform")).toBe("platform_managed");
|
||||
expect(billingModeForProvider("")).toBe("platform_managed");
|
||||
expect(billingModeForProvider(" ")).toBe("platform_managed");
|
||||
expect(billingModeForProvider("PLATFORM")).toBe("platform_managed");
|
||||
});
|
||||
|
||||
// Every non-Platform provider → byok. If this regresses to returning
|
||||
// platform_managed for a vendor, BYOK silently no-ops again (#703).
|
||||
it("maps non-Platform providers to byok", () => {
|
||||
expect(billingModeForProvider("anthropic-oauth")).toBe("byok"); // Claude Code subscription
|
||||
expect(billingModeForProvider("anthropic")).toBe("byok"); // Anthropic API key
|
||||
expect(billingModeForProvider("minimax")).toBe("byok");
|
||||
expect(billingModeForProvider("openrouter")).toBe("byok");
|
||||
expect(billingModeForProvider("openai")).toBe("byok");
|
||||
});
|
||||
});
|
||||
|
||||
describe("ConfigTab — provider change drives billing_mode (internal#703 Gap 2)", () => {
|
||||
// The core fix: picking a non-Platform provider (here "anthropic-oauth"
|
||||
// = Claude Code subscription OAuth) from a fresh/empty provider must
|
||||
// PUT mode=byok to the per-tenant llm-billing-mode endpoint. This is
|
||||
// the exact path that was missing — the credential env saved but the
|
||||
// billing mode never followed, so the proxy stayed engaged.
|
||||
it("PUTs mode=byok when switching to a non-Platform provider", async () => {
|
||||
wireApi({ providerValue: "" });
|
||||
apiPut.mockResolvedValue({ status: "saved" });
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const input = await screen.findByTestId("provider-input");
|
||||
fireEvent.change(input, { target: { value: "anthropic-oauth" } });
|
||||
|
||||
fireEvent.click(screen.getByRole("button", { name: /^save$/i }));
|
||||
|
||||
await waitFor(() => {
|
||||
const calls = billingModeCalls();
|
||||
expect(calls.length).toBe(1);
|
||||
expect(calls[0][1]).toEqual({ mode: "byok" });
|
||||
});
|
||||
// Provider credential PUT still happens too (independent endpoint).
|
||||
expect(
|
||||
apiPut.mock.calls.some(([path]) => path === "/workspaces/ws-test/provider"),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
// Switching FROM a byok provider back TO Platform must PUT
|
||||
// mode=platform_managed so the workspace re-engages the proxy and stops
|
||||
// expecting a (now-absent) vendor key.
|
||||
it("PUTs mode=platform_managed when switching back to Platform", async () => {
|
||||
wireApi({ providerValue: "anthropic-oauth" });
|
||||
apiPut.mockResolvedValue({ status: "saved" });
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const input = await screen.findByTestId("provider-input");
|
||||
await waitFor(() => expect((input as HTMLInputElement).value).toBe("anthropic-oauth"));
|
||||
fireEvent.change(input, { target: { value: "platform" } });
|
||||
|
||||
fireEvent.click(screen.getByRole("button", { name: /^save$/i }));
|
||||
|
||||
await waitFor(() => {
|
||||
const calls = billingModeCalls();
|
||||
expect(calls.length).toBe(1);
|
||||
expect(calls[0][1]).toEqual({ mode: "platform_managed" });
|
||||
});
|
||||
});
|
||||
|
||||
// Changing between two BYOK vendors (minimax → openrouter) keeps
|
||||
// billing_mode=byok — the implied mode is unchanged, so re-PUTing it
|
||||
// would be a wasteful no-op that risks an extra restart. Must NOT fire.
|
||||
it("does NOT PUT billing-mode when the implied mode is unchanged", async () => {
|
||||
wireApi({ providerValue: "minimax" });
|
||||
apiPut.mockResolvedValue({ status: "saved" });
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const input = await screen.findByTestId("provider-input");
|
||||
await waitFor(() => expect((input as HTMLInputElement).value).toBe("minimax"));
|
||||
fireEvent.change(input, { target: { value: "openrouter" } });
|
||||
|
||||
fireEvent.click(screen.getByRole("button", { name: /^save$/i }));
|
||||
|
||||
await waitFor(() => {
|
||||
// Provider PUT fires (vendor changed)...
|
||||
expect(
|
||||
apiPut.mock.calls.some(([path]) => path === "/workspaces/ws-test/provider"),
|
||||
).toBe(true);
|
||||
});
|
||||
// ...but billing-mode does NOT (byok → byok is a no-op).
|
||||
expect(billingModeCalls().length).toBe(0);
|
||||
});
|
||||
|
||||
// A Save that doesn't touch the provider must not PUT billing-mode —
|
||||
// editing tier/name shouldn't disturb the workspace's billing mode.
|
||||
it("does NOT PUT billing-mode on a Save that leaves provider unchanged", async () => {
|
||||
wireApi({ providerValue: "anthropic-oauth" });
|
||||
apiPut.mockResolvedValue({ status: "saved" });
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
await screen.findByTestId("provider-input");
|
||||
|
||||
// Dirty an unrelated field so Save is enabled.
|
||||
const tierSelect = screen.getByLabelText(/tier/i) as HTMLSelectElement;
|
||||
fireEvent.change(tierSelect, { target: { value: "3" } });
|
||||
|
||||
fireEvent.click(screen.getByRole("button", { name: /^save$/i }));
|
||||
|
||||
await waitFor(() => {
|
||||
// Some PUT may fire (e.g. /model); just assert billing-mode did not.
|
||||
expect(billingModeCalls().length).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
// If the provider credential PUT itself fails, we must NOT set byok —
|
||||
// flipping billing_mode while the credential write failed would leave
|
||||
// the workspace expecting a key it doesn't have (worse than no-op).
|
||||
it("does NOT PUT billing-mode when the provider PUT fails", async () => {
|
||||
wireApi({ providerValue: "" });
|
||||
apiPut.mockImplementation((path: string) => {
|
||||
if (path === "/workspaces/ws-test/provider") return Promise.reject(new Error("boom"));
|
||||
return Promise.resolve({ status: "saved" });
|
||||
});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const input = await screen.findByTestId("provider-input");
|
||||
fireEvent.change(input, { target: { value: "anthropic-oauth" } });
|
||||
|
||||
fireEvent.click(screen.getByRole("button", { name: /^save$/i }));
|
||||
|
||||
await waitFor(() => {
|
||||
// The provider-failure error is surfaced (getByText throws if absent).
|
||||
expect(screen.getByText(/provider update failed/i)).toBeTruthy();
|
||||
});
|
||||
expect(billingModeCalls().length).toBe(0);
|
||||
});
|
||||
|
||||
// If the credential saved but the billing-mode PUT is rejected, the
|
||||
// user must be warned that BYOK may not take — a silent failure here
|
||||
// is precisely the #703 symptom we're fixing.
|
||||
it("surfaces an error when billing-mode PUT fails after a successful provider save", async () => {
|
||||
wireApi({ providerValue: "" });
|
||||
apiPut.mockImplementation((path: string) => {
|
||||
if (path === "/admin/workspaces/ws-test/llm-billing-mode") {
|
||||
return Promise.reject(new Error("403 forbidden"));
|
||||
}
|
||||
return Promise.resolve({ status: "saved" });
|
||||
});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const input = await screen.findByTestId("provider-input");
|
||||
fireEvent.change(input, { target: { value: "anthropic-oauth" } });
|
||||
|
||||
fireEvent.click(screen.getByRole("button", { name: /^save$/i }));
|
||||
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText(/switching billing mode failed/i)).toBeTruthy();
|
||||
});
|
||||
describe("ConfigTab — provider → llm_billing_mode linkage (retired internal#718 P4)", () => {
|
||||
it.skip("LLM_PROVIDER → billing_mode wiring is retired; see file header for the replacement coverage", () => {
|
||||
// intentionally empty
|
||||
});
|
||||
});
|
||||
|
||||
@@ -0,0 +1,87 @@
|
||||
// @vitest-environment jsdom
|
||||
//
|
||||
// Regression: project_canvas_runtime_dropdown_ssot_fix — a google-adk
|
||||
// workspace's Config tab showed the wrong runtime ("LangGraph (default)"
|
||||
// / first option) because a hardcoded frontend allowlist
|
||||
// (SUPPORTED_RUNTIME_VALUES) dropped google-adk from the /templates-derived
|
||||
// options even though the backend served it. A Save from that state would
|
||||
// PATCH runtime to the wrong value and break the ADK agent.
|
||||
//
|
||||
// The fix: the dropdown is SSOT-driven — it trusts GET /templates (which the
|
||||
// backend already gates to the manifest maintained set) and hides a runtime
|
||||
// only when its row carries `displayable: false`. This pins: a google-adk
|
||||
// workspace shows "google-adk" selected, and a displayable:false template is
|
||||
// not offered.
|
||||
import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";
|
||||
import { render, screen, cleanup, waitFor } from "@testing-library/react";
|
||||
import React from "react";
|
||||
|
||||
afterEach(cleanup);
|
||||
|
||||
const apiGet = vi.fn();
|
||||
const apiPatch = vi.fn();
|
||||
const apiPut = vi.fn();
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: {
|
||||
get: (path: string) => apiGet(path),
|
||||
patch: (path: string, body: unknown) => apiPatch(path, body),
|
||||
put: (path: string, body: unknown) => apiPut(path, body),
|
||||
post: vi.fn(),
|
||||
del: vi.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
vi.mock("@/store/canvas", () => ({
|
||||
useCanvasStore: Object.assign(
|
||||
(selector: (s: unknown) => unknown) => selector({ restartWorkspace: vi.fn(), updateNodeData: vi.fn() }),
|
||||
{ getState: () => ({ restartWorkspace: vi.fn(), updateNodeData: vi.fn() }) },
|
||||
),
|
||||
}));
|
||||
|
||||
vi.mock("../AgentCardSection", () => ({
|
||||
AgentCardSection: () => <div data-testid="agent-card-stub" />,
|
||||
}));
|
||||
|
||||
import { ConfigTab } from "../ConfigTab";
|
||||
|
||||
function wireApi(templates: Array<{ id: string; name?: string; runtime?: string; models?: unknown[]; displayable?: boolean }>) {
|
||||
apiGet.mockImplementation((path: string) => {
|
||||
if (path === "/workspaces/ws-adk") return Promise.resolve({ runtime: "google-adk" });
|
||||
if (path === "/workspaces/ws-adk/model") return Promise.resolve({ model: "vertex:gemini-2.5-pro" });
|
||||
if (path === "/workspaces/ws-adk/files/config.yaml") return Promise.resolve({ content: "name: adk\nruntime: google-adk\n" });
|
||||
if (path === "/templates") return Promise.resolve(templates);
|
||||
return Promise.reject(new Error(`unmocked api.get: ${path}`));
|
||||
});
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
apiGet.mockReset();
|
||||
apiPatch.mockReset();
|
||||
apiPut.mockReset();
|
||||
});
|
||||
|
||||
describe("ConfigTab — google-adk runtime (SSOT dropdown)", () => {
|
||||
it("shows google-adk selected in the runtime dropdown (#ssot-fix)", async () => {
|
||||
wireApi([
|
||||
{ id: "claude-code", name: "Claude Code", runtime: "claude-code", models: [] },
|
||||
{ id: "google-adk", name: "Google ADK", runtime: "google-adk", models: [] },
|
||||
]);
|
||||
render(<ConfigTab workspaceId="ws-adk" />);
|
||||
const select = await waitFor(() => screen.getByRole("combobox", { name: /runtime/i }));
|
||||
expect((select as HTMLSelectElement).value).toBe("google-adk");
|
||||
const opts = Array.from((select as HTMLSelectElement).options).map((o) => o.value);
|
||||
expect(opts).toContain("google-adk");
|
||||
});
|
||||
|
||||
it("hides a template flagged displayable:false", async () => {
|
||||
wireApi([
|
||||
{ id: "google-adk", name: "Google ADK", runtime: "google-adk", models: [] },
|
||||
{ id: "legacy", name: "Legacy", runtime: "legacy", models: [], displayable: false },
|
||||
]);
|
||||
render(<ConfigTab workspaceId="ws-adk" />);
|
||||
const select = await waitFor(() => screen.getByRole("combobox", { name: /runtime/i }));
|
||||
const opts = Array.from((select as HTMLSelectElement).options).map((o) => o.value);
|
||||
expect(opts).toContain("google-adk");
|
||||
expect(opts).not.toContain("legacy");
|
||||
});
|
||||
});
|
||||
@@ -1,574 +1,45 @@
|
||||
// @vitest-environment jsdom
|
||||
//
|
||||
// Regression tests for ConfigTab Provider override (Option B PR-5).
|
||||
// internal#718 P4 closure — ConfigTab.provider.test.tsx is retired.
|
||||
//
|
||||
// What this pins: a free-text Provider combobox in the Runtime section
|
||||
// that lets the operator override the model→provider derivation hermes-
|
||||
// agent does internally. Without this UI, a fresh signup whose Hermes
|
||||
// workspace defaults to a model with no clean vendor prefix (e.g.
|
||||
// `nousresearch/hermes-4-70b`) hits the runtime's own preflight error:
|
||||
// "No LLM provider configured. Run `hermes model` to select a
|
||||
// provider, or run `hermes setup` for first-time configuration."
|
||||
// — even though tasks #195-198 wired the entire downstream pipe so a
|
||||
// non-empty provider WOULD flow through canvas → workspace-server →
|
||||
// CP user-data → workspace config.yaml → hermes adapter.
|
||||
// This 574-line suite exercised the canvas-side LLM provider override
|
||||
// flow: load the existing override from GET /workspaces/:id/provider,
|
||||
// edit the dropdown, Save → PUT /workspaces/:id/provider, and the
|
||||
// provider→billing_mode linkage on Save. All three server endpoints
|
||||
// behind those flows are retired in internal#718 P4 closure:
|
||||
//
|
||||
// Hongming Wang hit this on hongming.moleculesai.app at signup
|
||||
// 2026-05-01T17:35Z. Backend PRs were green, the gap was the missing
|
||||
// UI to set the value.
|
||||
// - workspace-server SetProvider / GetProvider (PUT/GET
|
||||
// /workspaces/:id/provider) → both return 410 Gone with a
|
||||
// PROVIDER_ENDPOINT_RETIRED structured body.
|
||||
// - workspace-server setProviderSecret (the writer into
|
||||
// workspace_secrets.LLM_PROVIDER) — removed; row never written.
|
||||
// - The LLM_PROVIDER workspace_secret itself — migrated away in
|
||||
// 20260528000000_drop_llm_provider_workspace_secret.up.sql.
|
||||
//
|
||||
// Each test pins one invariant. If any fails, the bug is back.
|
||||
// ConfigTab still renders the provider dropdown for display (the user
|
||||
// can preview the derived provider locally), but Save no longer
|
||||
// round-trips the value. The replacement contract is that the provider
|
||||
// is DERIVED at every decision point from (runtime, model) via the
|
||||
// registry — see internal/providers/derive_provider.go.
|
||||
//
|
||||
// The original suite's coverage is replaced by:
|
||||
//
|
||||
// - workspace-server: TestPutProvider_410Gone +
|
||||
// TestGetProvider_410Gone + TestProviderEndpointGone_BodyShape in
|
||||
// internal/handlers/llm_provider_removal_p4_test.go.
|
||||
// - workspace-server: TestWorkspaceCreate_FirstDeploy_OnlyPersistsMODEL
|
||||
// in internal/handlers/workspace_provision_shared_test.go.
|
||||
// - registry: TestDeriveProvider_RealManifest in
|
||||
// internal/providers/derive_provider_test.go.
|
||||
//
|
||||
// Restore from git history if any aspect of the legacy LLM_PROVIDER
|
||||
// flow needs to be revisited (it should not — the retirement is
|
||||
// permanent).
|
||||
|
||||
import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";
|
||||
import { render, screen, cleanup, waitFor, fireEvent } from "@testing-library/react";
|
||||
import React from "react";
|
||||
import { describe, it } from "vitest";
|
||||
|
||||
afterEach(cleanup);
|
||||
|
||||
const apiGet = vi.fn();
|
||||
const apiPatch = vi.fn();
|
||||
const apiPut = vi.fn();
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: {
|
||||
get: (path: string) => apiGet(path),
|
||||
patch: (path: string, body: unknown) => apiPatch(path, body),
|
||||
put: (path: string, body: unknown) => apiPut(path, body),
|
||||
post: vi.fn(),
|
||||
del: vi.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
// Shared store stub — `updateNodeData` is exposed so a test can assert the
|
||||
// node-data flush happens after a successful PATCH (regression: previously
|
||||
// the DB updated but the canvas badge stayed stale until full hydrate).
|
||||
const storeUpdateNodeData = vi.fn();
|
||||
const storeRestartWorkspace = vi.fn();
|
||||
vi.mock("@/store/canvas", () => ({
|
||||
useCanvasStore: Object.assign(
|
||||
(selector: (s: unknown) => unknown) => selector({ restartWorkspace: storeRestartWorkspace, updateNodeData: storeUpdateNodeData }),
|
||||
{ getState: () => ({ restartWorkspace: storeRestartWorkspace, updateNodeData: storeUpdateNodeData }) },
|
||||
),
|
||||
}));
|
||||
|
||||
vi.mock("../AgentCardSection", () => ({
|
||||
AgentCardSection: () => <div data-testid="agent-card-stub" />,
|
||||
}));
|
||||
|
||||
import { ConfigTab } from "../ConfigTab";
|
||||
|
||||
// wireApi — same shape as ConfigTab.hermes.test.tsx, extended with the
|
||||
// /provider endpoint. Each test sets `providerValue` to the value the
|
||||
// GET endpoint returns; "missing" means the endpoint rejects (older
|
||||
// workspace-server pre-PR-2 — must not crash the tab).
|
||||
function wireApi(opts: {
|
||||
workspaceRuntime?: string;
|
||||
workspaceModel?: string;
|
||||
configYamlContent?: string | null;
|
||||
templates?: Array<{ id: string; name?: string; runtime?: string; models?: unknown[]; providers?: string[] }>;
|
||||
providerValue?: string | "missing";
|
||||
}) {
|
||||
apiGet.mockImplementation((path: string) => {
|
||||
if (path === `/workspaces/ws-test`) {
|
||||
return Promise.resolve({ runtime: opts.workspaceRuntime ?? "" });
|
||||
}
|
||||
if (path === `/workspaces/ws-test/model`) {
|
||||
return Promise.resolve({ model: opts.workspaceModel ?? "" });
|
||||
}
|
||||
if (path === `/workspaces/ws-test/provider`) {
|
||||
if (opts.providerValue === "missing") {
|
||||
return Promise.reject(new Error("404"));
|
||||
}
|
||||
return Promise.resolve({ provider: opts.providerValue ?? "", source: opts.providerValue ? "workspace_secrets" : "default" });
|
||||
}
|
||||
if (path === `/workspaces/ws-test/files/config.yaml`) {
|
||||
if (opts.configYamlContent === null) return Promise.reject(new Error("not found"));
|
||||
return Promise.resolve({ content: opts.configYamlContent ?? "" });
|
||||
}
|
||||
if (path === "/templates") {
|
||||
return Promise.resolve(opts.templates ?? []);
|
||||
}
|
||||
return Promise.reject(new Error(`unmocked api.get: ${path}`));
|
||||
});
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
apiGet.mockReset();
|
||||
apiPatch.mockReset();
|
||||
apiPut.mockReset();
|
||||
storeUpdateNodeData.mockReset();
|
||||
storeRestartWorkspace.mockReset();
|
||||
});
|
||||
|
||||
describe("ConfigTab — Provider override (Option B PR-5)", () => {
|
||||
// Empty provider on load is the legitimate default ("auto-derive
|
||||
// from model slug prefix"), NOT an error. The endpoint returning
|
||||
// {provider: "", source: "default"} is the documented happy-path
|
||||
// shape — if the form treated that as "load failed" we'd lose the
|
||||
// ability to render the input at all on fresh workspaces.
|
||||
it("renders an empty Provider input when no override is set", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "hermes",
|
||||
workspaceModel: "nousresearch/hermes-4-70b",
|
||||
configYamlContent: "name: ws\nruntime: hermes\n",
|
||||
providerValue: "",
|
||||
});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const input = await screen.findByTestId("provider-input");
|
||||
expect((input as HTMLInputElement).value).toBe("");
|
||||
});
|
||||
|
||||
// Pre-existing override loads back into the field on mount. Without
|
||||
// this, an operator who set provider=openrouter yesterday would see
|
||||
// the field blank today, conclude the value didn't stick, and
|
||||
// re-save — the resulting PUT-with-same-value would auto-restart
|
||||
// the workspace for nothing.
|
||||
it("loads an existing provider override from the server", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "hermes",
|
||||
workspaceModel: "nousresearch/hermes-4-70b",
|
||||
configYamlContent: "name: ws\nruntime: hermes\n",
|
||||
providerValue: "openrouter",
|
||||
});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const input = await screen.findByTestId("provider-input");
|
||||
await waitFor(() => expect((input as HTMLInputElement).value).toBe("openrouter"));
|
||||
});
|
||||
|
||||
// Old workspace-server (pre-PR-2) returns a 404 on /provider. The
|
||||
// tab must keep loading — the fallback is "" (auto-derive), same as
|
||||
// a fresh workspace.
|
||||
it("falls back to empty provider when the endpoint is missing", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "hermes",
|
||||
workspaceModel: "nousresearch/hermes-4-70b",
|
||||
configYamlContent: "name: ws\nruntime: hermes\n",
|
||||
providerValue: "missing",
|
||||
});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const input = await screen.findByTestId("provider-input");
|
||||
expect((input as HTMLInputElement).value).toBe("");
|
||||
// Tab should be fully rendered, not stuck in loading or error state.
|
||||
expect(screen.queryByText(/Loading config/i)).toBeNull();
|
||||
});
|
||||
|
||||
// Setting a value + Save must PUT to the right endpoint with the
|
||||
// right body shape. Server-side handler (workspace-server
|
||||
// handlers/secrets.go:SetProvider) reads body.provider — any other
|
||||
// key gets silently ignored and the workspace_secrets row stays
|
||||
// unset. This regression would manifest as "Save → Restart →
|
||||
// workspace still says No LLM provider configured."
|
||||
it("PUTs the new provider to /workspaces/:id/provider on Save", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "hermes",
|
||||
workspaceModel: "nousresearch/hermes-4-70b",
|
||||
configYamlContent: "name: ws\nruntime: hermes\n",
|
||||
providerValue: "",
|
||||
});
|
||||
apiPut.mockResolvedValue({ status: "saved", provider: "anthropic" });
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const input = await screen.findByTestId("provider-input");
|
||||
|
||||
fireEvent.change(input, { target: { value: "anthropic" } });
|
||||
expect((input as HTMLInputElement).value).toBe("anthropic");
|
||||
|
||||
const saveBtn = screen.getByRole("button", { name: /^save$/i });
|
||||
fireEvent.click(saveBtn);
|
||||
|
||||
await waitFor(() => {
|
||||
const providerCalls = apiPut.mock.calls.filter(([path]) => path === "/workspaces/ws-test/provider");
|
||||
expect(providerCalls.length).toBe(1);
|
||||
expect(providerCalls[0][1]).toEqual({ provider: "anthropic" });
|
||||
});
|
||||
});
|
||||
|
||||
// No-change Save must NOT PUT /provider. The server-side SetProvider
|
||||
// auto-restarts the workspace on every successful PUT — re-writing
|
||||
// an unchanged value would cost the user a ~30s reboot every time
|
||||
// they tweak some other field.
|
||||
it("does not PUT /provider when the value is unchanged", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "hermes",
|
||||
workspaceModel: "nousresearch/hermes-4-70b",
|
||||
configYamlContent: "name: ws\nruntime: hermes\ntier: 2\n",
|
||||
providerValue: "openrouter",
|
||||
});
|
||||
apiPut.mockResolvedValue({});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
await screen.findByTestId("provider-input");
|
||||
|
||||
// Click Save without touching the provider field. Trigger another
|
||||
// dirty-marker (tier change) so Save is enabled — the test is
|
||||
// about NOT touching /provider, not about Save being disabled.
|
||||
const tierSelect = screen.getByLabelText(/tier/i) as HTMLSelectElement;
|
||||
fireEvent.change(tierSelect, { target: { value: "3" } });
|
||||
|
||||
const saveBtn = screen.getByRole("button", { name: /^save$/i });
|
||||
fireEvent.click(saveBtn);
|
||||
|
||||
await waitFor(() => {
|
||||
// Some PUT(s) may fire (e.g. /model). Just assert /provider is NOT among them.
|
||||
const providerCalls = apiPut.mock.calls.filter(([path]) => path === "/workspaces/ws-test/provider");
|
||||
expect(providerCalls.length).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
// The dropdown's suggestion list MUST come from the runtime's own
|
||||
// template (via /templates → runtime_config.providers), not a
|
||||
// hardcoded canvas-side enum. This is the "Native + pluggable
|
||||
// runtime" invariant: a new runtime declaring its own provider
|
||||
// taxonomy in its config.yaml gets a working dropdown without ANY
|
||||
// canvas-side change.
|
||||
//
|
||||
// Pinned by checking that suggestions surfaced in the datalist
|
||||
// exactly mirror what the templates endpoint returned for the
|
||||
// matching runtime. If a future contributor reintroduces a
|
||||
// PROVIDER_SUGGESTIONS-style hardcoded list and the datalist
|
||||
// contents don't follow the template, this test fails.
|
||||
it("populates the provider datalist from the matched runtime's templates entry", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "hermes",
|
||||
workspaceModel: "nousresearch/hermes-4-70b",
|
||||
configYamlContent: "name: ws\nruntime: hermes\n",
|
||||
providerValue: "",
|
||||
templates: [
|
||||
{
|
||||
id: "hermes",
|
||||
name: "Hermes",
|
||||
runtime: "hermes",
|
||||
models: [],
|
||||
// The provider list every runtime adapter ships in its own
|
||||
// config.yaml. Canvas must surface THIS, not its own list.
|
||||
providers: ["nous", "openrouter", "anthropic", "minimax-cn"],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const input = await screen.findByTestId("provider-input");
|
||||
const listId = (input as HTMLInputElement).getAttribute("list");
|
||||
expect(listId).toBeTruthy();
|
||||
await waitFor(() => {
|
||||
const datalist = document.getElementById(listId!);
|
||||
expect(datalist).not.toBeNull();
|
||||
const optionValues = Array.from(datalist!.querySelectorAll("option")).map(
|
||||
(o) => (o as HTMLOptionElement).value,
|
||||
);
|
||||
// Order matters — most-common-first is part of the contract so
|
||||
// the demo flow lands on a working choice without scrolling.
|
||||
expect(optionValues).toEqual(["nous", "openrouter", "anthropic", "minimax-cn"]);
|
||||
});
|
||||
});
|
||||
|
||||
// Fallback path: when a template hasn't migrated to the explicit
|
||||
// `providers:` field yet, suggestions are derived from model slug
|
||||
// prefixes. Still adapter-driven (the slugs come from the template's
|
||||
// `models:` list), just inferred. This keeps existing templates
|
||||
// working while the platform team migrates them one at a time.
|
||||
it("renders vendor-grouped provider dropdown when template ships models", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "hermes",
|
||||
workspaceModel: "anthropic/claude-opus-4-7",
|
||||
configYamlContent: "name: ws\nruntime: hermes\n",
|
||||
providerValue: "",
|
||||
templates: [
|
||||
{
|
||||
id: "hermes",
|
||||
name: "Hermes",
|
||||
runtime: "hermes",
|
||||
models: [
|
||||
{ id: "anthropic/claude-opus-4-7", required_env: ["ANTHROPIC_API_KEY"] },
|
||||
{ id: "openai/gpt-4o", required_env: ["OPENROUTER_API_KEY"] },
|
||||
{ id: "anthropic/claude-sonnet-4-5", required_env: ["ANTHROPIC_API_KEY"] }, // dup vendor — must dedupe
|
||||
{ id: "nousresearch/hermes-4-70b", required_env: ["HERMES_API_KEY"] },
|
||||
],
|
||||
// No `providers:` field → ProviderModelSelector derives vendors
|
||||
// from model id prefixes via its own buildProviderCatalog.
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
// With models present, the new vendor-aware dropdown renders.
|
||||
// Provider entries dedupe by vendor → 3 unique vendors here
|
||||
// (anthropic, openai, nousresearch).
|
||||
const select = await screen.findByTestId("provider-select") as HTMLSelectElement;
|
||||
await waitFor(() => {
|
||||
const optionTexts = Array.from(select.options)
|
||||
.map((o) => o.text)
|
||||
.filter((t) => !t.startsWith("—")); // strip placeholder
|
||||
// Labels are vendor display names, but vendor identity is what
|
||||
// matters for dedupe. Assert each expected vendor surfaces once.
|
||||
expect(optionTexts.some((t) => t.startsWith("Anthropic API"))).toBe(true);
|
||||
expect(optionTexts.some((t) => t.startsWith("OpenAI"))).toBe(true);
|
||||
expect(optionTexts.some((t) => t.startsWith("Nous Research"))).toBe(true);
|
||||
expect(optionTexts.length).toBe(3); // dedupe pin
|
||||
});
|
||||
});
|
||||
|
||||
// Empty string is a legitimate save target — it clears the override
|
||||
// (the server-side endpoint deletes the workspace_secrets row).
|
||||
// Operators who picked "anthropic" yesterday and want to revert to
|
||||
// auto-derive today should be able to do so by clearing the field
|
||||
// and clicking Save. Without this PUT path, the only way to clear
|
||||
// would be a direct DB edit.
|
||||
it("PUTs an empty string when the operator clears a previously-set provider", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "hermes",
|
||||
workspaceModel: "anthropic:claude-opus-4-7",
|
||||
configYamlContent: "name: ws\nruntime: hermes\n",
|
||||
providerValue: "openrouter",
|
||||
});
|
||||
apiPut.mockResolvedValue({ status: "cleared" });
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const input = await screen.findByTestId("provider-input");
|
||||
await waitFor(() => expect((input as HTMLInputElement).value).toBe("openrouter"));
|
||||
|
||||
fireEvent.change(input, { target: { value: "" } });
|
||||
|
||||
const saveBtn = screen.getByRole("button", { name: /^save$/i });
|
||||
fireEvent.click(saveBtn);
|
||||
|
||||
await waitFor(() => {
|
||||
const providerCalls = apiPut.mock.calls.filter(([path]) => path === "/workspaces/ws-test/provider");
|
||||
expect(providerCalls.length).toBe(1);
|
||||
expect(providerCalls[0][1]).toEqual({ provider: "" });
|
||||
});
|
||||
});
|
||||
|
||||
// Display-vs-storage drift regression (2026-05-03 incident, workspace
|
||||
// e13aebd8…). User deployed claude-code with MiniMax-M2 stored in
|
||||
// MODEL_PROVIDER. The container env (MODEL=MiniMax-M2) and chat
|
||||
// worked correctly, but the Config tab showed "Claude Code
|
||||
// subscription / Claude Sonnet (OAuth)" — i.e. the template's
|
||||
// runtime_config.model: sonnet default — because currentModelId
|
||||
// reads runtime_config.model first and loadConfig was overriding
|
||||
// only the top-level config.model field. The merged shape was:
|
||||
// { model: "MiniMax-M2", runtime_config: { model: "sonnet" } }
|
||||
// and currentModelId picked "sonnet". Fix: loadConfig propagates
|
||||
// wsMetadataModel into BOTH places so the form is a single source
|
||||
// of truth (DB-backed MODEL_PROVIDER). Pinning the merged-path
|
||||
// branch with the exact reproducing shape: claude-code template
|
||||
// YAML has runtime_config.model: sonnet; live workspace's
|
||||
// MODEL_PROVIDER is MiniMax-M2; tab must show the latter.
|
||||
it("prefers MODEL_PROVIDER over the template's runtime_config.model on load", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "claude-code",
|
||||
workspaceModel: "MiniMax-M2",
|
||||
configYamlContent: "name: ws\nruntime: claude-code\nruntime_config:\n model: sonnet\n",
|
||||
providerValue: "",
|
||||
templates: [
|
||||
{
|
||||
id: "claude-code-default",
|
||||
name: "Claude Code",
|
||||
runtime: "claude-code",
|
||||
models: [
|
||||
{ id: "sonnet", name: "Claude Sonnet (OAuth)", required_env: ["CLAUDE_CODE_OAUTH_TOKEN"] },
|
||||
{ id: "MiniMax-M2", name: "MiniMax M2", required_env: ["MINIMAX_API_KEY"] },
|
||||
{ id: "MiniMax-M2.7", name: "MiniMax M2.7", required_env: ["MINIMAX_API_KEY"] },
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const modelSelect = (await screen.findByTestId("model-select")) as HTMLSelectElement;
|
||||
await waitFor(() => expect(modelSelect.value).toBe("MiniMax-M2"));
|
||||
|
||||
// Provider dropdown should also reflect MiniMax (back-derived from
|
||||
// the model slug since LLM_PROVIDER is unset). Without the fix,
|
||||
// the selector falls back to the first catalog entry whose first
|
||||
// model matches "sonnet" → anthropic-oauth bucket → "Claude Code
|
||||
// subscription".
|
||||
const providerSelect = screen.getByTestId("provider-select") as HTMLSelectElement;
|
||||
const selectedOption = providerSelect.options[providerSelect.selectedIndex];
|
||||
expect(selectedOption.textContent ?? "").toMatch(/MiniMax/);
|
||||
});
|
||||
|
||||
// Sibling pin to the display-fix above. The display fix mirrors
|
||||
// wsMetadataModel into runtime_config.model so the selector renders
|
||||
// the live value; that mirror means handleSave's old YAML-vs-form
|
||||
// diff would always be non-zero on a no-op save (YAML default
|
||||
// "sonnet" vs. mirrored "MiniMax-M2") and PUT /model — which
|
||||
// server-side SetModel chains into an auto-restart. handleSave now
|
||||
// diffs against the loaded MODEL_PROVIDER instead. Pin: an
|
||||
// unrelated edit (tier change) must NOT touch /model when the
|
||||
// model itself didn't change.
|
||||
it("does not PUT /model on a no-op save when only an unrelated field changed", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "claude-code",
|
||||
workspaceModel: "MiniMax-M2",
|
||||
configYamlContent: "name: ws\nruntime: claude-code\ntier: 2\nruntime_config:\n model: sonnet\n",
|
||||
providerValue: "",
|
||||
templates: [
|
||||
{
|
||||
id: "claude-code-default",
|
||||
name: "Claude Code",
|
||||
runtime: "claude-code",
|
||||
models: [
|
||||
{ id: "sonnet", name: "Claude Sonnet", required_env: ["CLAUDE_CODE_OAUTH_TOKEN"] },
|
||||
{ id: "MiniMax-M2", name: "MiniMax M2", required_env: ["MINIMAX_API_KEY"] },
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
apiPut.mockResolvedValue({});
|
||||
apiPatch.mockResolvedValue({});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const tierSelect = (await screen.findByLabelText(/tier/i)) as HTMLSelectElement;
|
||||
fireEvent.change(tierSelect, { target: { value: "3" } });
|
||||
|
||||
const saveBtn = screen.getByRole("button", { name: /^save$/i });
|
||||
fireEvent.click(saveBtn);
|
||||
|
||||
await waitFor(() => {
|
||||
const tierPatches = apiPatch.mock.calls.filter(([path, body]) =>
|
||||
path === "/workspaces/ws-test" && (body as { tier?: number }).tier === 3,
|
||||
);
|
||||
expect(tierPatches.length).toBe(1);
|
||||
});
|
||||
// Spurious /model PUT would fire here without the originalModel
|
||||
// diff baseline. The model itself didn't change, so /model must
|
||||
// stay untouched (otherwise SetModel auto-restarts).
|
||||
const modelPuts = apiPut.mock.calls.filter(([path]) => path === "/workspaces/ws-test/model");
|
||||
expect(modelPuts.length).toBe(0);
|
||||
});
|
||||
|
||||
// Save-then-stale-badge regression (2026-05-03 incident). User
|
||||
// selected T3 in the Tier dropdown, hit Save & Restart, the workspace
|
||||
// PATCH succeeded (`tier: 3` in DB), but the canvas header pill kept
|
||||
// showing "TIER T2" until a full hydrate. Root cause: handleSave
|
||||
// sent the PATCH to workspace-server but never pushed the same
|
||||
// change into useCanvasStore.updateNodeData, so every UI surface
|
||||
// reading from the store kept its stale value. Pin: a successful
|
||||
// tier PATCH must mirror into the store so the badge updates
|
||||
// synchronously with the response.
|
||||
it("flushes the dbPatch into useCanvasStore.updateNodeData after a successful PATCH", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "claude-code",
|
||||
workspaceModel: "MiniMax-M2",
|
||||
configYamlContent: "name: ws\nruntime: claude-code\ntier: 2\nruntime_config:\n model: sonnet\n",
|
||||
providerValue: "",
|
||||
templates: [
|
||||
{
|
||||
id: "claude-code-default",
|
||||
name: "Claude Code",
|
||||
runtime: "claude-code",
|
||||
models: [{ id: "sonnet", name: "Sonnet", required_env: ["CLAUDE_CODE_OAUTH_TOKEN"] }],
|
||||
},
|
||||
],
|
||||
});
|
||||
apiPatch.mockResolvedValue({ status: "updated" });
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const tierSelect = (await screen.findByLabelText(/tier/i)) as HTMLSelectElement;
|
||||
fireEvent.change(tierSelect, { target: { value: "3" } });
|
||||
|
||||
const saveBtn = screen.getByRole("button", { name: /^save$/i });
|
||||
fireEvent.click(saveBtn);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(apiPatch.mock.calls.some(([p]) => p === "/workspaces/ws-test")).toBe(true);
|
||||
});
|
||||
// Without the store flush, the badge would keep reading tier=2
|
||||
// from useCanvasStore.nodes until a full hydrate. Pin: handleSave
|
||||
// pushes the same fields it PATCHed.
|
||||
expect(storeUpdateNodeData).toHaveBeenCalledWith(
|
||||
"ws-test",
|
||||
expect.objectContaining({ tier: 3 }),
|
||||
);
|
||||
});
|
||||
|
||||
// Failure-gating sibling pin to the store-flush test above. The
|
||||
// production code places `updateNodeData` AFTER `await api.patch(...)`
|
||||
// inside the same `if (Object.keys(dbPatch).length > 0)` block, so a
|
||||
// PATCH rejection should throw before the store call. Without this
|
||||
// pin, a future refactor that wraps the PATCH in try/catch and
|
||||
// unconditionally calls updateNodeData would ship green — and then
|
||||
// the badge would lie when the server actually rejected the change.
|
||||
// Codified review feedback from PR #2545 (Agent 2).
|
||||
it("does NOT flush into useCanvasStore.updateNodeData when the PATCH rejects", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "claude-code",
|
||||
workspaceModel: "MiniMax-M2",
|
||||
configYamlContent: "name: ws\nruntime: claude-code\ntier: 2\nruntime_config:\n model: sonnet\n",
|
||||
providerValue: "",
|
||||
templates: [
|
||||
{
|
||||
id: "claude-code-default",
|
||||
name: "Claude Code",
|
||||
runtime: "claude-code",
|
||||
models: [{ id: "sonnet", name: "Sonnet", required_env: ["CLAUDE_CODE_OAUTH_TOKEN"] }],
|
||||
},
|
||||
],
|
||||
});
|
||||
apiPatch.mockRejectedValue(new Error("500 from workspace-server"));
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const tierSelect = (await screen.findByLabelText(/tier/i)) as HTMLSelectElement;
|
||||
fireEvent.change(tierSelect, { target: { value: "3" } });
|
||||
|
||||
const saveBtn = screen.getByRole("button", { name: /^save$/i });
|
||||
fireEvent.click(saveBtn);
|
||||
|
||||
// Wait for handleSave to settle (succeeds-or-fails). PATCH must
|
||||
// have been attempted; the error swallow inside handleSave keeps
|
||||
// saving=false in finally.
|
||||
await waitFor(() => {
|
||||
expect(apiPatch.mock.calls.some(([p]) => p === "/workspaces/ws-test")).toBe(true);
|
||||
});
|
||||
// Critically: the store must NOT have been told about the failed
|
||||
// change. Otherwise the badge would lie about a write the server
|
||||
// rejected.
|
||||
const tierFlushes = storeUpdateNodeData.mock.calls.filter(([, body]) =>
|
||||
typeof (body as { tier?: number }).tier === "number",
|
||||
);
|
||||
expect(tierFlushes.length).toBe(0);
|
||||
});
|
||||
|
||||
// Pin the hermes/pre-#240 edge case: workspace where MODEL_PROVIDER
|
||||
// was never written but YAML has runtime_config.model: "something".
|
||||
// originalModel must reflect the rendered baseline (the YAML value),
|
||||
// not the empty MODEL_PROVIDER, so an unrelated save (tier change)
|
||||
// doesn't fire a /model PUT and trigger an auto-restart. Codified
|
||||
// review feedback from PR #2545 (Agent 1, "Important").
|
||||
it("does not PUT /model when MODEL_PROVIDER is empty and the user only edited an unrelated field", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "hermes",
|
||||
workspaceModel: "", // legacy workspace — never went through the picker
|
||||
configYamlContent:
|
||||
"name: ws\nruntime: hermes\ntier: 2\nruntime_config:\n model: nousresearch/hermes-4-70b\n",
|
||||
providerValue: "",
|
||||
templates: [
|
||||
{
|
||||
id: "hermes",
|
||||
name: "Hermes",
|
||||
runtime: "hermes",
|
||||
models: [{ id: "nousresearch/hermes-4-70b", name: "Hermes 4 70B", required_env: ["HERMES_API_KEY"] }],
|
||||
providers: ["nous"],
|
||||
},
|
||||
],
|
||||
});
|
||||
apiPut.mockResolvedValue({});
|
||||
apiPatch.mockResolvedValue({});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const tierSelect = (await screen.findByLabelText(/tier/i)) as HTMLSelectElement;
|
||||
fireEvent.change(tierSelect, { target: { value: "3" } });
|
||||
|
||||
const saveBtn = screen.getByRole("button", { name: /^save$/i });
|
||||
fireEvent.click(saveBtn);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(apiPatch.mock.calls.some(([p]) => p === "/workspaces/ws-test")).toBe(true);
|
||||
});
|
||||
const modelPuts = apiPut.mock.calls.filter(([path]) => path === "/workspaces/ws-test/model");
|
||||
expect(modelPuts.length).toBe(0);
|
||||
describe("ConfigTab provider override — retired (internal#718 P4)", () => {
|
||||
it.skip("LLM_PROVIDER override flow is retired; see file header for the replacement coverage", () => {
|
||||
// intentionally empty
|
||||
});
|
||||
});
|
||||
|
||||
@@ -0,0 +1,78 @@
|
||||
// @vitest-environment jsdom
|
||||
//
|
||||
// internal#718 P3 (retire-list #5) — the billing-mode the Config tab shows /
|
||||
// sends must reflect the DERIVED provider per the registry, not the hardcoded
|
||||
// billingModeForProvider("" | "platform" → platform_managed else byok) rule.
|
||||
// When the runtime is registry-backed, billingModeForSelectedProvider reads the
|
||||
// registry-served billing_mode off the provider catalog entry. The hardcoded
|
||||
// rule remains only as the fallback for non-registry runtimes / older backends.
|
||||
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { billingModeForSelectedProvider, billingModeForProvider } from "../ConfigTab";
|
||||
import {
|
||||
buildProviderCatalogFromRegistry,
|
||||
type RegistryProvider,
|
||||
type RegistryModel,
|
||||
} from "../../ProviderModelSelector";
|
||||
|
||||
const REGISTRY_PROVIDERS: RegistryProvider[] = [
|
||||
{ name: "anthropic-oauth", display_name: "Claude Code subscription", auth_env: ["CLAUDE_CODE_OAUTH_TOKEN"], billing_mode: "byok" },
|
||||
{ name: "platform", display_name: "Platform", auth_env: ["ANTHROPIC_API_KEY"], billing_mode: "platform_managed" },
|
||||
// DISCRIMINATING fixture (review #7790): a provider whose registry-served
|
||||
// billing_mode DISAGREES with the hardcoded name-based rule. Its name is not
|
||||
// "platform"/"" so billingModeForProvider() would call it "byok", yet the
|
||||
// registry serves "platform_managed" (the federation-ready shape the SSOT is
|
||||
// built for — a managed provider that isn't literally named "platform").
|
||||
// billingModeForSelectedProvider MUST return the REGISTRY value here; the
|
||||
// only way to get "platform_managed" out is to honor the catalog, so this
|
||||
// case fails if the impl ever regresses to the hardcoded rule.
|
||||
{ name: "managed-federated", display_name: "Managed (federated)", auth_env: [], billing_mode: "platform_managed" },
|
||||
];
|
||||
const REGISTRY_MODELS: RegistryModel[] = [
|
||||
{ id: "sonnet", provider: "anthropic-oauth", billing_mode: "byok" },
|
||||
{ id: "anthropic/claude-opus-4-7", provider: "platform", billing_mode: "platform_managed" },
|
||||
// model bucketed under the disagreeing provider so the catalog builds an
|
||||
// entry for it (buildProviderCatalogFromRegistry only emits a provider entry
|
||||
// for providers that own at least one model).
|
||||
{ id: "managed/some-model", provider: "managed-federated", billing_mode: "platform_managed" },
|
||||
];
|
||||
|
||||
describe("billingModeForSelectedProvider (registry-driven)", () => {
|
||||
const catalog = buildProviderCatalogFromRegistry(REGISTRY_PROVIDERS, REGISTRY_MODELS);
|
||||
|
||||
it("reads platform_managed from the registry for the platform provider", () => {
|
||||
expect(billingModeForSelectedProvider("platform", catalog)).toBe("platform_managed");
|
||||
});
|
||||
|
||||
it("reads byok from the registry for a BYOK provider", () => {
|
||||
// anthropic-oauth derives to byok via the REGISTRY. (Note: the hardcoded
|
||||
// rule would ALSO say byok for this non-'platform' name, so on its own this
|
||||
// assertion does NOT prove the registry is authoritative — it agrees either
|
||||
// way. The registry-WINS proof is the disagreement case below.)
|
||||
expect(billingModeForSelectedProvider("anthropic-oauth", catalog)).toBe("byok");
|
||||
});
|
||||
|
||||
it("lets the registry billing_mode WIN when it disagrees with the hardcoded rule", () => {
|
||||
// 'managed-federated' is not '' / 'platform', so the legacy name-based rule
|
||||
// classifies it byok — but the registry serves platform_managed. The
|
||||
// registry is the SSOT, so billingModeForSelectedProvider must return
|
||||
// platform_managed. This is the discriminating case: it FAILS if the impl
|
||||
// regresses to billingModeForProvider (which would return byok here).
|
||||
expect(billingModeForProvider("managed-federated")).toBe("byok"); // sanity: the rules genuinely disagree
|
||||
expect(billingModeForSelectedProvider("managed-federated", catalog)).toBe("platform_managed");
|
||||
});
|
||||
|
||||
it("falls back to the hardcoded rule when no registry catalog is supplied", () => {
|
||||
// Non-registry runtime / older backend → catalog empty/undefined → the
|
||||
// legacy mapping still applies ('' | 'platform' → platform_managed).
|
||||
expect(billingModeForSelectedProvider("", undefined)).toBe("platform_managed");
|
||||
expect(billingModeForSelectedProvider("platform", undefined)).toBe("platform_managed");
|
||||
expect(billingModeForSelectedProvider("minimax", undefined)).toBe("byok");
|
||||
});
|
||||
|
||||
it("falls back to the hardcoded rule when the provider is not in the registry catalog", () => {
|
||||
// A provider string the registry catalog doesn't carry (stale saved
|
||||
// value) → fall back to the legacy rule rather than guessing.
|
||||
expect(billingModeForSelectedProvider("some-byo-vendor", catalog)).toBe("byok");
|
||||
});
|
||||
});
|
||||
@@ -297,6 +297,25 @@ describe("DetailsTab — delete workflow", () => {
|
||||
expect(mockSelectNode).toHaveBeenCalledWith(null);
|
||||
});
|
||||
|
||||
// internal#734: checking "also erase saved data" adds &erase_data=true so the
|
||||
// server prunes the data volume. Default (unchecked) must NOT send it.
|
||||
it("checking erase-saved-data sends erase_data=true on delete", async () => {
|
||||
mockApi.del.mockResolvedValue(undefined);
|
||||
render(<DetailsTab workspaceId="ws-1" data={data()} />);
|
||||
await flush();
|
||||
fireEvent.click(screen.getByRole("button", { name: /delete workspace/i }));
|
||||
await flush();
|
||||
fireEvent.click(screen.getByRole("checkbox", { name: /erase saved data/i }));
|
||||
const confirmBtn = Array.from(document.querySelectorAll("button")).find(
|
||||
(b) => b.textContent === "Confirm Delete",
|
||||
) as HTMLButtonElement;
|
||||
fireEvent(confirmBtn, new MouseEvent("click", { bubbles: true }));
|
||||
await flush();
|
||||
expect(mockApi.del).toHaveBeenCalledWith("/workspaces/ws-1?confirm=true&erase_data=true", {
|
||||
headers: { "X-Confirm-Name": "Test Workspace" },
|
||||
});
|
||||
});
|
||||
|
||||
it("cancelling delete returns to view mode", async () => {
|
||||
mockApi.del.mockResolvedValue(undefined);
|
||||
render(<DetailsTab workspaceId="ws-1" data={data()} />);
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
const RUNTIME_NAMES: Record<string, string> = {
|
||||
"claude-code": "Claude Code",
|
||||
codex: "Codex",
|
||||
"google-adk": "Google ADK",
|
||||
hermes: "Hermes",
|
||||
openclaw: "OpenClaw",
|
||||
kimi: "Kimi",
|
||||
|
||||
@@ -368,6 +368,9 @@ export interface WorkspaceCompute {
|
||||
width?: number;
|
||||
height?: number;
|
||||
};
|
||||
// internal#734: per-workspace durable-data choice. "persist" | "ephemeral" |
|
||||
// undefined (auto). Controls whether the data volume survives recreate.
|
||||
data_persistence?: string;
|
||||
}
|
||||
|
||||
let socket: ReconnectingSocket | null = null;
|
||||
|
||||
@@ -1,5 +1,16 @@
|
||||
# Running a Gemini CLI Workspace on Molecule AI
|
||||
|
||||
> **⚠️ Accuracy correction (2026-05-29):** this page is **aspirational, not
|
||||
> shipped.** There is **no `gemini-cli` runtime** in `manifest.json` or the
|
||||
> provisioner's `knownRuntimes`, and the "PR #379" cited below is unrelated (a
|
||||
> CI-workflow-cleanup PR, not a gemini-cli adapter). Do not follow this as-is.
|
||||
>
|
||||
> **For Gemini on Molecule, use the real `google-adk` runtime instead** — see
|
||||
> [`google-adk-runtime.md`](./google-adk-runtime.md) (ADK engine + Gemini on
|
||||
> Vertex AI/AI Studio), implemented in PR
|
||||
> [`molecule-ai-workspace-template-google-adk#1`](https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-google-adk) per RFC `internal#730`.
|
||||
> This gemini-cli page is retained only until it's either implemented for real or removed.
|
||||
|
||||
Molecule AI now ships a `gemini-cli` runtime adapter alongside the existing `claude-code` adapter. This tutorial walks you from zero to a running Gemini agent workspace in under five minutes.
|
||||
|
||||
## What you'll need
|
||||
|
||||
@@ -1,74 +1,69 @@
|
||||
# Running a Google ADK Workspace on Molecule AI
|
||||
|
||||
Google's Agent Development Kit (ADK) is now a first-class runtime on Molecule AI. This tutorial walks you from zero to a running ADK agent workspace — one that persists per-conversation session state and sits alongside your Claude Code and Gemini CLI workers in the same A2A network.
|
||||
> **Status (2026-05-29):** the `google-adk` runtime is **landing**, not yet on
|
||||
> `main`. It's implemented in the template repo
|
||||
> [`molecule-ai-workspace-template-google-adk`](https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-google-adk)
|
||||
> (PR **#1**) with platform registration in molecule-core PR **#2003** and the
|
||||
> validator allowlist in molecule-ci PR **#26**. Design + approval: RFC
|
||||
> [`internal#730`](https://git.moleculesai.app/molecule-ai/internal/issues/730).
|
||||
> Remove this banner once those PRs merge.
|
||||
>
|
||||
> **Doc-accuracy note:** a prior version of this page claimed ADK was "already
|
||||
> first-class" and cited "PR #550" — that PR is unrelated (a MemoryTab test
|
||||
> suite). No `google-adk` adapter existed at that time. This rewrite reflects
|
||||
> the real implementation.
|
||||
|
||||
## What you'll need
|
||||
Google's Agent Development Kit (ADK) runs as a Molecule AI workspace runtime:
|
||||
ADK is the **agent engine** (`LlmAgent` + `Runner`), and the workspace
|
||||
participates in Molecule's A2A org like any other runtime.
|
||||
|
||||
- A Molecule AI account with at least one provisioned tenant
|
||||
- A `GOOGLE_API_KEY` from [aistudio.google.com](https://aistudio.google.com) (or Vertex AI credentials — see below)
|
||||
- `curl` + `jq`
|
||||
## How it actually works
|
||||
|
||||
## Setup
|
||||
- **ADK = engine only.** The adapter builds an ADK `LlmAgent` from the
|
||||
workspace config (model + system prompt + tools) and drives its `Runner`.
|
||||
It installs `google-adk[mcp]==2.1.0` and **never** the `[a2a]` extra — ADK's
|
||||
a2a layer pins `a2a-sdk<0.4`, which is incompatible with the platform's
|
||||
`a2a-sdk>=1.0`. (Verified: `google-adk[mcp]==2.1.0` + `a2a-sdk 1.0.3` coexist.)
|
||||
- **A2A** is provided by the platform's a2a-1.x server; a Molecule-authored
|
||||
executor bridges ADK's `Runner` event stream onto it, one ADK session per
|
||||
A2A `context_id`.
|
||||
- **Tools** reach the agent via ADK's native `McpToolset` pointed at the
|
||||
workspace's `a2a_mcp_server` — the same MCP surface the CLI runtimes use
|
||||
(`delegate_task`, `commit_memory`, `list_peers`, …). No LangChain.
|
||||
|
||||
## Auth — Vertex AI via ADC (keyless), or an AI Studio key
|
||||
|
||||
The runtime supports both google-genai auth paths:
|
||||
|
||||
- **Vertex AI + Application Default Credentials (recommended; required if your
|
||||
org disallows API keys).** Set `model: vertex:gemini-2.5-pro` and provide
|
||||
`GOOGLE_CLOUD_PROJECT`; the adapter sets `GOOGLE_GENAI_USE_VERTEXAI=1` and
|
||||
google-genai authenticates via ADC — no API key. (Locally:
|
||||
`gcloud auth application-default login`.)
|
||||
- **AI Studio API key** (where your org permits API keys): set
|
||||
`model: google_genai:gemini-2.5-pro` and `GOOGLE_API_KEY`.
|
||||
|
||||
## Create a workspace
|
||||
|
||||
```bash
|
||||
# 1. Store your Google API key as a global secret
|
||||
curl -s -X PUT http://localhost:8080/settings/secrets \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"key":"GOOGLE_API_KEY","value":"YOUR-AI-STUDIO-KEY"}' | jq .
|
||||
|
||||
# 2. Create a google-adk workspace
|
||||
WS=$(curl -s -X POST http://localhost:8080/workspaces \
|
||||
# Vertex AI + ADC (keyless)
|
||||
curl -s -X POST http://localhost:8080/workspaces \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"name": "adk-agent",
|
||||
"role": "Google ADK inference worker",
|
||||
"runtime": "google-adk",
|
||||
"model": "google:gemini-2.0-flash"
|
||||
}' | jq -r '.id')
|
||||
echo "Workspace: $WS"
|
||||
|
||||
# 3. Wait for ready (~30s)
|
||||
until curl -s http://localhost:8080/workspaces/$WS | jq -r '.status' | grep -q ready; do
|
||||
echo "Waiting..."; sleep 5
|
||||
done
|
||||
|
||||
# 4. Send your first task
|
||||
curl -s -X POST http://localhost:8080/workspaces/$WS/a2a \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"jsonrpc":"2.0","id":"1","method":"message/send",
|
||||
"params":{"message":{"role":"user","parts":[{"kind":"text",
|
||||
"text":"Summarise the ADK architecture in 3 bullet points."}]}}}' \
|
||||
| jq '.result.parts[0].text'
|
||||
|
||||
# 5. Multi-turn — session state is preserved across calls
|
||||
curl -s -X POST http://localhost:8080/workspaces/$WS/a2a \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"jsonrpc":"2.0","id":"2","method":"message/send",
|
||||
"params":{"message":{"role":"user","parts":[{"kind":"text",
|
||||
"text":"Now give me a one-line TL;DR of what you just said."}]}}}' \
|
||||
| jq '.result.parts[0].text'
|
||||
|
||||
# 6. Vertex AI alternative — set these instead of GOOGLE_API_KEY
|
||||
# curl -X PUT .../secrets -d '{"key":"GOOGLE_GENAI_USE_VERTEXAI","value":"1"}'
|
||||
# curl -X PUT .../secrets -d '{"key":"GOOGLE_CLOUD_PROJECT","value":"my-project"}'
|
||||
# curl -X PUT .../secrets -d '{"key":"GOOGLE_CLOUD_LOCATION","value":"us-central1"}'
|
||||
"model": "vertex:gemini-2.5-pro",
|
||||
"runtime_config": {"required_env": ["GOOGLE_CLOUD_PROJECT"]}
|
||||
}'
|
||||
```
|
||||
|
||||
## Expected output
|
||||
|
||||
After step 4, ADK streams the Gemini response through its event bus, filters for `is_final_response()` events, and returns the agent's reply as a standard A2A text part. Step 5 should reference the prior answer — the adapter ties each A2A `context_id` to an `InMemorySessionService` session, so conversation state is isolated per task context and survives across calls within the same session.
|
||||
|
||||
## How it works
|
||||
|
||||
The `google-adk` adapter wraps Google ADK's runner/session model behind the same `AgentExecutor` interface used by every other Molecule AI runtime. On each turn, `GoogleADKA2AExecutor` calls `runner.run_async()` with the incoming message wrapped in a `google.genai.types.Content` object, then drains the event stream until it collects a final-response event. The `google:` model prefix is stripped before being passed to ADK — so `google:gemini-2.0-flash` in your workspace config becomes `gemini-2.0-flash` in the ADK `LlmAgent`. Error class names are sanitized before leaving the executor; raw Google SDK stack traces never reach the A2A caller.
|
||||
|
||||
## Mixed-runtime teams
|
||||
|
||||
ADK workspaces participate in the same A2A network as Claude Code, Gemini CLI, Hermes, and LangGraph workers. An orchestrator can delegate long-context summarisation to a `google-adk` worker (Gemini 1.5 Pro's 1M token window) while routing tool-use tasks to a `claude-code` worker — with no provider-specific code in the orchestrator itself. Add an ADK peer with `POST /workspaces`, set `GOOGLE_API_KEY`, and it's available for `delegate_task` immediately.
|
||||
Send it a task via the A2A proxy (`POST /workspaces/:id/a2a`, JSON-RPC
|
||||
`message/send`) and it replies through the ADK `Runner`. Verified end-to-end:
|
||||
a Gemini 2.5 round-trip on Vertex via ADC returns through the built image.
|
||||
|
||||
## Related
|
||||
|
||||
- PR #550: [feat(adapters): add google-adk runtime adapter](https://git.moleculesai.app/molecule-ai/molecule-core/pull/550)
|
||||
- Template + adapter: [`molecule-ai-workspace-template-google-adk`](https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-google-adk) (PR #1)
|
||||
- Platform registration: molecule-core PR #2003 · validator: molecule-ci PR #26
|
||||
- Design/approval: RFC [`internal#730`](https://git.moleculesai.app/molecule-ai/internal/issues/730)
|
||||
- [Google ADK (adk-python)](https://github.com/google/adk-python)
|
||||
- [Gemini CLI runtime tutorial](./gemini-cli-runtime.md)
|
||||
- [Platform API reference](../api-reference.md)
|
||||
|
||||
@@ -29,6 +29,7 @@
|
||||
{"name": "hermes", "repo": "molecule-ai/molecule-ai-workspace-template-hermes", "ref": "main"},
|
||||
{"name": "openclaw", "repo": "molecule-ai/molecule-ai-workspace-template-openclaw", "ref": "main"},
|
||||
{"name": "codex", "repo": "molecule-ai/molecule-ai-workspace-template-codex", "ref": "main"},
|
||||
{"name": "google-adk", "repo": "molecule-ai/molecule-ai-workspace-template-google-adk", "ref": "main"},
|
||||
{"name": "seo-agent", "repo": "molecule-ai/molecule-ai-workspace-template-seo-agent", "ref": "main"}
|
||||
],
|
||||
"org_templates": [
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
#!/usr/bin/env bash
|
||||
# E2E test: A2A round-trip parity across all four runtimes.
|
||||
# E2E test: A2A round-trip parity across all five runtimes.
|
||||
#
|
||||
# Validates that for each of {claude-code, hermes, codex, openclaw}:
|
||||
# Validates that for each of {claude-code, hermes, codex, openclaw, google-adk}:
|
||||
# 1. A workspace can be provisioned + brought online
|
||||
# 2. The adapter responds to A2A message/send
|
||||
# 3. The reply contains expected content (echo of the prompt)
|
||||
# 4. A SECOND message preserves session state where the runtime
|
||||
# supports it (currently: hermes via plugin path)
|
||||
# supports it (currently: hermes via plugin path; google-adk via
|
||||
# ADK InMemorySessionService keyed on A2A context_id)
|
||||
#
|
||||
# Targets a SaaS tenant subdomain. Provisions workspaces in the calling
|
||||
# tenant, runs the round-trip, deletes them on success.
|
||||
@@ -16,6 +17,10 @@
|
||||
# (e.g. https://demo-tenant.staging.moleculesai.app)
|
||||
# - $OPENROUTER_API_KEY (or $HERMES_API_KEY) for non-claude runtimes
|
||||
# - $OPENAI_API_KEY for claude-code peer
|
||||
# - $GOOGLE_API_KEY (AI Studio) for google-adk — the org disallows API
|
||||
# keys in PROD (Vertex+ADC there), but CI auths Gemini with an
|
||||
# AI-Studio key (config model google_genai:gemini-2.5-pro). Vertex
|
||||
# stays supported; this is the keyed CI path only.
|
||||
# - SaaS edge requires Origin header — see auto-memory
|
||||
# reference_saas_waf_origin_header.md
|
||||
#
|
||||
@@ -24,12 +29,13 @@
|
||||
# ./scripts/test-all-runtimes-a2a-e2e.sh
|
||||
#
|
||||
# Skip individual runtimes:
|
||||
# SKIP_HERMES=1 SKIP_OPENCLAW=1 ./scripts/test-all-runtimes-a2a-e2e.sh
|
||||
# SKIP_HERMES=1 SKIP_OPENCLAW=1 SKIP_GOOGLE_ADK=1 ./scripts/test-all-runtimes-a2a-e2e.sh
|
||||
set -euo pipefail
|
||||
|
||||
PLATFORM="${PLATFORM:-${1:-http://localhost:8080}}"
|
||||
HERMES_PROVIDER_KEY="${OPENROUTER_API_KEY:-${HERMES_API_KEY:-}}"
|
||||
PEER_OPENAI_KEY="${OPENAI_API_KEY:-}"
|
||||
GOOGLE_ADK_KEY="${GOOGLE_API_KEY:-}"
|
||||
# SaaS auth chain — TENANT_ADMIN_TOKEN + TENANT_ORG_ID required when
|
||||
# hitting *.moleculesai.app (per-tenant ADMIN_TOKEN, NOT
|
||||
# CP_ADMIN_API_TOKEN). Optional for localhost.
|
||||
@@ -48,6 +54,10 @@ if [ -z "$HERMES_PROVIDER_KEY" ] && [ -z "${SKIP_HERMES:-}${SKIP_CODEX:-}${SKIP_
|
||||
echo "FAIL: set OPENROUTER_API_KEY or HERMES_API_KEY for non-claude runtimes"
|
||||
exit 2
|
||||
fi
|
||||
if [ -z "$GOOGLE_ADK_KEY" ] && [ -z "${SKIP_GOOGLE_ADK:-}" ]; then
|
||||
echo "FAIL: set GOOGLE_API_KEY (AI Studio) for google-adk, or SKIP_GOOGLE_ADK=1"
|
||||
exit 2
|
||||
fi
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
@@ -143,7 +153,7 @@ echo "=========================================="
|
||||
echo ""
|
||||
|
||||
# -------------------------------------------------------
|
||||
# 1. Provision the four runtimes (skip via SKIP_* flags)
|
||||
# 1. Provision the five runtimes (skip via SKIP_* flags)
|
||||
# -------------------------------------------------------
|
||||
echo "--- 1. Provision workspaces ---"
|
||||
if [ -z "${SKIP_CLAUDE_CODE:-}" ]; then
|
||||
@@ -162,6 +172,10 @@ if [ -z "${SKIP_OPENCLAW:-}" ]; then
|
||||
WS_IDS[openclaw]=$(provision "ParityOpenClaw" "openclaw" "openclaw peer")
|
||||
echo " openclaw: ${WS_IDS[openclaw]}"
|
||||
fi
|
||||
if [ -z "${SKIP_GOOGLE_ADK:-}" ]; then
|
||||
WS_IDS[google-adk]=$(provision "ParityGoogleADK" "google-adk" "google-adk peer")
|
||||
echo " google-adk: ${WS_IDS[google-adk]}"
|
||||
fi
|
||||
|
||||
# -------------------------------------------------------
|
||||
# 2. Set provider keys
|
||||
@@ -177,6 +191,12 @@ if [ -n "${WS_IDS[claude-code]:-}" ] && [ -n "$PEER_OPENAI_KEY" ]; then
|
||||
set_secret "${WS_IDS[claude-code]}" "OPENAI_API_KEY" "$PEER_OPENAI_KEY"
|
||||
echo " claude-code: OPENAI_API_KEY set"
|
||||
fi
|
||||
if [ -n "${WS_IDS[google-adk]:-}" ] && [ -n "$GOOGLE_ADK_KEY" ]; then
|
||||
# AI-Studio path: the adapter reads GOOGLE_API_KEY natively when the
|
||||
# config model is google_genai:gemini-2.5-pro (see _routing.resolve_model).
|
||||
set_secret "${WS_IDS[google-adk]}" "GOOGLE_API_KEY" "$GOOGLE_ADK_KEY"
|
||||
echo " google-adk: GOOGLE_API_KEY set"
|
||||
fi
|
||||
|
||||
# -------------------------------------------------------
|
||||
# 3. Wait for online
|
||||
@@ -188,6 +208,9 @@ for runtime in "${!WS_IDS[@]}"; do
|
||||
[ -z "$id" ] && continue
|
||||
max=60
|
||||
[ "$runtime" = "hermes" ] && max=120
|
||||
# google-adk's first cold boot pulls a large fresh ADK image — give it
|
||||
# a hermes-class window so a slow first pull doesn't read as "failed".
|
||||
[ "$runtime" = "google-adk" ] && max=180
|
||||
if wait_online "$id" "$runtime" "$max"; then
|
||||
check "$runtime online" "ok" "ok"
|
||||
else
|
||||
@@ -200,7 +223,7 @@ done
|
||||
# -------------------------------------------------------
|
||||
echo ""
|
||||
echo "--- 4. A2A round-trip (first message) ---"
|
||||
for runtime in claude-code hermes codex openclaw; do
|
||||
for runtime in claude-code hermes codex openclaw google-adk; do
|
||||
id="${WS_IDS[$runtime]:-}"
|
||||
[ -z "$id" ] && continue
|
||||
reply=$(a2a_send "$id" "Reply with just the word OK so we know you got this.")
|
||||
@@ -213,7 +236,7 @@ done
|
||||
# -------------------------------------------------------
|
||||
echo ""
|
||||
echo "--- 5. Session continuity (second message recalls first) ---"
|
||||
for runtime in claude-code hermes codex openclaw; do
|
||||
for runtime in claude-code hermes codex openclaw google-adk; do
|
||||
id="${WS_IDS[$runtime]:-}"
|
||||
[ -z "$id" ] && continue
|
||||
# Set up: tell the agent a name.
|
||||
|
||||
Executable
+229
@@ -0,0 +1,229 @@
|
||||
#!/usr/bin/env bash
|
||||
# Real-completion + per-provider liveness + byok-routing assertion helpers
|
||||
# for the staging full-SaaS E2E (tests/e2e/test_staging_full_saas.sh).
|
||||
#
|
||||
# WHY THIS LIB EXISTS (molecule-core#1995 / #1994 follow-on):
|
||||
# The A2A e2e historically asserted only response SHAPE — e.g.
|
||||
# test_a2a_e2e.sh:`check "SEO response has text" '"kind":"text"'`. A fully
|
||||
# BROKEN agent returns its error AS a text part:
|
||||
# {"kind":"text","text":"Agent error (Exception) — see workspace logs..."}
|
||||
# which STILL matches `"kind":"text"` → the shape check PASSES on a broken
|
||||
# agent. That is exactly why the 2026-05-2x drained-key / byok-misroute
|
||||
# failures (agents-team PM + reno marketing erroring on every LLM call)
|
||||
# sailed through CI. "Channel returns text shape" != "agent actually
|
||||
# completed an LLM round-trip".
|
||||
#
|
||||
# These helpers add three load-bearing gates ON TOP of (never replacing) the
|
||||
# existing shape + PONG checks:
|
||||
# 1. a2a_assert_real_completion — deterministic known-answer round-trip
|
||||
# (CONTAINS the expected token AND NOT an error-as-text payload).
|
||||
# 2. provider_liveness_matrix — per-offered-provider cheap completion
|
||||
# probe, providers sourced from the providers.yaml SSOT runtimes block.
|
||||
# 3. assert_byok_not_platform_proxy — #1994 regression guard: a
|
||||
# byok-resolving workspace must NOT resolve to platform_managed.
|
||||
#
|
||||
# Conventions: reuses the host script's fail()/ok()/log() + tenant_call().
|
||||
# Source this AFTER those are defined. BASH 4+.
|
||||
|
||||
# Error-as-text trap markers. If the agent's text part contains ANY of
|
||||
# these, the "round-trip" did not really complete — the agent surfaced an
|
||||
# error AS text. This is the negative assertion that makes a broken agent
|
||||
# FAIL instead of slipping through the shape check.
|
||||
#
|
||||
# Kept as an array (not a single regex) so a new failure signature is a
|
||||
# one-line append + the failure message can name which marker matched.
|
||||
A2A_ERROR_AS_TEXT_MARKERS=(
|
||||
"Agent error"
|
||||
"Exception"
|
||||
"error result"
|
||||
"MISSING_BYOK_CREDENTIAL"
|
||||
)
|
||||
|
||||
# a2a_completion_error_marker <agent_text>
|
||||
# Echoes the first error-as-text marker found in <agent_text> (case-
|
||||
# insensitive), or nothing if clean. Exit 0 if a marker matched, 1 if not.
|
||||
# Pure string scan — no LLM, no network — so it is deterministic and is the
|
||||
# unit under the fail-direction proof in test_completion_assert_unit.sh.
|
||||
a2a_completion_error_marker() {
|
||||
local text="$1"
|
||||
local upper marker
|
||||
upper=$(printf '%s' "$text" | tr '[:lower:]' '[:upper:]')
|
||||
for marker in "${A2A_ERROR_AS_TEXT_MARKERS[@]}"; do
|
||||
if printf '%s' "$upper" | grep -qF -- "$(printf '%s' "$marker" | tr '[:lower:]' '[:upper:]')"; then
|
||||
printf '%s' "$marker"
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
# a2a_assert_real_completion <agent_text> <expected_token> <context_label>
|
||||
# The CORE gate. Asserts the agent text:
|
||||
# (a) does NOT contain any error-as-text marker (broken-agent trap), AND
|
||||
# (b) CONTAINS <expected_token> (case-insensitive) — proving a real LLM
|
||||
# round-trip produced the deterministic known answer.
|
||||
# Calls fail() (which exits) on either violation. This MUST fail on an
|
||||
# error-as-text payload — that is the property test_completion_assert_unit.sh
|
||||
# pins.
|
||||
a2a_assert_real_completion() {
|
||||
local text="$1"
|
||||
local expected="$2"
|
||||
local ctx="${3:-A2A}"
|
||||
|
||||
if [ -z "$text" ]; then
|
||||
fail "$ctx — real-completion gate: agent returned EMPTY text (no round-trip)."
|
||||
fi
|
||||
|
||||
local hit
|
||||
if hit=$(a2a_completion_error_marker "$text"); then
|
||||
fail "$ctx — real-completion gate: agent returned an ERROR-AS-TEXT payload (matched '$hit'). A broken agent that surfaces its error as a text part is NOT a completed round-trip. This is the trap the shape-only check missed (#1994). Raw: ${text:0:200}"
|
||||
fi
|
||||
|
||||
# Known-answer: real LLM round-trip yields the deterministic token. A
|
||||
# prompt-echo / truncated-context / wrong-auth pipeline won't.
|
||||
if ! printf '%s' "$text" | tr '[:lower:]' '[:upper:]' | grep -qF -- "$(printf '%s' "$expected" | tr '[:lower:]' '[:upper:]')"; then
|
||||
fail "$ctx — real-completion gate: reply did NOT contain expected known-answer token '$expected'. The channel returned a text shape but no real completion. Raw: ${text:0:200}"
|
||||
fi
|
||||
|
||||
ok "$ctx — real completion verified (contains '$expected', no error-as-text). Reply: \"${text:0:80}\""
|
||||
}
|
||||
|
||||
# offered_platform_models_for_runtime <runtime>
|
||||
# Emits, one per line, the platform-servable model ids the providers.yaml
|
||||
# SSOT (runtimes.<runtime>.providers[name=platform].models) declares for
|
||||
# <runtime>. This is the SSOT-driven offered/platform-servable matrix — NOT
|
||||
# a hardcoded provider list — so a provider added/removed in providers.yaml
|
||||
# automatically changes the matrix this probe exercises.
|
||||
#
|
||||
# Reads the embedded copy at workspace-server/internal/providers/providers.yaml
|
||||
# (the same file go:embed compiles into the binary). Requires python3 +
|
||||
# PyYAML (already a test-harness dep). On parse failure, emits nothing and
|
||||
# returns 1 so the caller can fail loud rather than silently skip.
|
||||
offered_platform_models_for_runtime() {
|
||||
local runtime="$1"
|
||||
local yaml_path="${PROVIDERS_YAML_PATH:-}"
|
||||
if [ -z "$yaml_path" ]; then
|
||||
# This lib lives at tests/e2e/lib/ -> repo root is three dirs up
|
||||
# (lib -> e2e -> tests -> repo-root).
|
||||
yaml_path="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)/workspace-server/internal/providers/providers.yaml"
|
||||
fi
|
||||
if [ ! -f "$yaml_path" ]; then
|
||||
log " [provider-matrix] providers.yaml SSOT not found at $yaml_path"
|
||||
return 1
|
||||
fi
|
||||
RUNTIME_REF="$runtime" python3 - "$yaml_path" <<'PY'
|
||||
import os, sys
|
||||
try:
|
||||
import yaml
|
||||
except Exception as e: # PyYAML missing — fail loud, do not silently skip.
|
||||
sys.stderr.write(f"PyYAML required for provider-matrix SSOT read: {e}\n")
|
||||
sys.exit(2)
|
||||
rt = os.environ["RUNTIME_REF"]
|
||||
with open(sys.argv[1]) as f:
|
||||
doc = yaml.safe_load(f)
|
||||
native = (doc.get("runtimes") or {}).get(rt) or {}
|
||||
for pref in native.get("providers", []) or []:
|
||||
if pref.get("name") == "platform":
|
||||
for m in pref.get("models", []) or []:
|
||||
print(m)
|
||||
PY
|
||||
}
|
||||
|
||||
# provider_liveness_matrix <runtime> <probe_fn>
|
||||
# For each platform-servable model the SSOT lists for <runtime>, calls
|
||||
# <probe_fn> <model_id> which must echo the agent text (or empty) and return
|
||||
# 0 on a non-error completion, non-zero otherwise. Logs a per-model pass/fail
|
||||
# matrix. Returns 0 only if EVERY probed model produced a non-error
|
||||
# completion; non-zero (and a recorded matrix) otherwise.
|
||||
#
|
||||
# Purpose: exercise each offered provider's AUTH + ROUTING path so a drained
|
||||
# key / wrong base-URL / byok-misroute fails the gate (the #1994 class). The
|
||||
# probe_fn is expected to use minimal max_tokens.
|
||||
#
|
||||
# This helper does the SSOT read + matrix bookkeeping; the host script
|
||||
# supplies probe_fn (it owns workspace ids + tenant_call wiring).
|
||||
provider_liveness_matrix() {
|
||||
local runtime="$1"
|
||||
local probe_fn="$2"
|
||||
local models model rc total=0 passed=0
|
||||
local -a results=()
|
||||
|
||||
models=$(offered_platform_models_for_runtime "$runtime") || {
|
||||
fail "provider-liveness: could not read offered-provider matrix from providers.yaml SSOT for runtime=$runtime"
|
||||
}
|
||||
if [ -z "$models" ]; then
|
||||
log " [provider-matrix] runtime=$runtime offers no platform-servable models in the SSOT — nothing to probe (not a failure)."
|
||||
return 0
|
||||
fi
|
||||
|
||||
log " [provider-matrix] SSOT offered platform models for $runtime:"
|
||||
while IFS= read -r model; do
|
||||
[ -z "$model" ] && continue
|
||||
log " - $model"
|
||||
done <<<"$models"
|
||||
|
||||
while IFS= read -r model; do
|
||||
[ -z "$model" ] && continue
|
||||
total=$((total + 1))
|
||||
set +e
|
||||
"$probe_fn" "$model"
|
||||
rc=$?
|
||||
set -e
|
||||
if [ "$rc" = "0" ]; then
|
||||
passed=$((passed + 1))
|
||||
results+=("PASS $model")
|
||||
elif [ "$rc" = "75" ]; then
|
||||
# 75 (EX_TEMPFAIL convention) = probe skipped (key/runtime not
|
||||
# available in this lane). Not counted toward pass/fail — logged.
|
||||
total=$((total - 1))
|
||||
results+=("SKIP $model (probe unavailable in this lane)")
|
||||
else
|
||||
results+=("FAIL $model")
|
||||
fi
|
||||
done <<<"$models"
|
||||
|
||||
log " [provider-matrix] result matrix (runtime=$runtime):"
|
||||
local line
|
||||
for line in "${results[@]}"; do
|
||||
log " $line"
|
||||
done
|
||||
log " [provider-matrix] $passed/$total probed providers completed without error"
|
||||
|
||||
if [ "$passed" != "$total" ]; then
|
||||
return 1
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
# assert_byok_not_platform_proxy <billing_mode_json> <context_label>
|
||||
# #1994 regression guard. Given the JSON body from
|
||||
# GET /admin/workspaces/:id/llm-billing-mode (same derived resolver the
|
||||
# provision-time strip gate uses), asserts the workspace resolves to BYOK
|
||||
# and NOT platform_managed. A regression of #1994 (byok workspace baked to
|
||||
# platform_managed → routed through the platform proxy → platform LLM key
|
||||
# drained) flips resolved_mode to "platform_managed" and trips this gate.
|
||||
# Calls fail() (exits) on violation.
|
||||
assert_byok_not_platform_proxy() {
|
||||
local body="$1"
|
||||
local ctx="${2:-byok-guard}"
|
||||
local mode prov
|
||||
mode=$(printf '%s' "$body" | python3 -c "import json,sys
|
||||
try: print(json.load(sys.stdin).get('resolved_mode',''))
|
||||
except Exception: print('')" 2>/dev/null || echo "")
|
||||
prov=$(printf '%s' "$body" | python3 -c "import json,sys
|
||||
try:
|
||||
d=json.load(sys.stdin); v=d.get('provider_selection')
|
||||
print(v if v is not None else '')
|
||||
except Exception: print('')" 2>/dev/null || echo "")
|
||||
|
||||
if [ -z "$mode" ]; then
|
||||
fail "$ctx — byok-routing guard: could not read resolved_mode from billing-mode response. Raw: ${body:0:200}"
|
||||
fi
|
||||
if [ "$mode" = "platform_managed" ]; then
|
||||
fail "$ctx — byok-routing guard TRIPPED (#1994 regression): a byok-configured workspace resolved to 'platform_managed' (provider_selection=$prov) → it would route through the platform proxy and drain the platform LLM key. Expected resolved_mode=byok. Raw: ${body:0:200}"
|
||||
fi
|
||||
if [ "$mode" != "byok" ]; then
|
||||
fail "$ctx — byok-routing guard: unexpected resolved_mode='$mode' (expected 'byok'). provider_selection=$prov. Raw: ${body:0:200}"
|
||||
fi
|
||||
ok "$ctx — byok-routing guard: workspace resolves byok (provider_selection=$prov), NOT platform-proxy. #1994 stays fixed."
|
||||
}
|
||||
@@ -8,6 +8,34 @@ TIMEOUT="${A2A_TIMEOUT:-120}" # seconds per A2A call (override via A2A_TIMEOUT
|
||||
|
||||
# shellcheck source=_lib.sh
|
||||
source "$(dirname "$0")/_lib.sh"
|
||||
# molecule-core#1995 (#1994 follow-on): real-completion assertion helpers.
|
||||
# Adds a NEGATIVE error-as-text check on top of the shape checks below, so a
|
||||
# broken agent that returns its error AS a text part
|
||||
# ({"kind":"text","text":"Agent error (Exception) ..."}) — which STILL
|
||||
# matches the shape check `"kind":"text"` — now FAILS instead of passing.
|
||||
# shellcheck source=lib/completion_assert.sh
|
||||
source "$(dirname "$0")/lib/completion_assert.sh"
|
||||
|
||||
# check_no_error_as_text <desc> <agent_text>
|
||||
# Additive negative gate: PASS only if the agent text carries NO
|
||||
# error-as-text marker (Agent error / Exception / error result /
|
||||
# MISSING_BYOK_CREDENTIAL). Uses the same scanner as the staging
|
||||
# real-completion gate so the trap is closed consistently across lanes.
|
||||
check_no_error_as_text() {
|
||||
local desc="$1"
|
||||
local text="$2"
|
||||
local hit
|
||||
if hit=$(a2a_completion_error_marker "$text"); then
|
||||
echo "FAIL: $desc"
|
||||
echo " agent returned an error-AS-text payload (matched '$hit') — a broken"
|
||||
echo " agent that surfaces its error as a text part is NOT a real reply."
|
||||
echo " got: $(echo "$text" | head -3)"
|
||||
FAIL=$((FAIL + 1))
|
||||
else
|
||||
echo "PASS: $desc"
|
||||
PASS=$((PASS + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
check() {
|
||||
local desc="$1"
|
||||
@@ -81,6 +109,8 @@ check "JSON-RPC response has result" '"result"' "$R"
|
||||
check "Response has agent role" '"role":"agent"' "$R"
|
||||
check "Response has text part" '"kind":"text"' "$R"
|
||||
TEXT=$(echo "$R" | python3 -c "import sys,json; r=json.load(sys.stdin); print(r['result']['parts'][0]['text'][:200])" 2>/dev/null || echo "PARSE_ERROR")
|
||||
# Negative gate (#1994): the text part must not BE an error.
|
||||
check_no_error_as_text "Echo reply is not an error-as-text payload" "$TEXT"
|
||||
echo " Agent said: $TEXT"
|
||||
echo ""
|
||||
|
||||
@@ -92,6 +122,11 @@ R=$(a2a_send "$SEO_ID" "What SEO skills do you have?")
|
||||
check "SEO agent responds" '"result"' "$R"
|
||||
check "SEO response has text" '"kind":"text"' "$R"
|
||||
TEXT=$(echo "$R" | python3 -c "import sys,json; r=json.load(sys.stdin); print(r['result']['parts'][0]['text'][:200])" 2>/dev/null || echo "PARSE_ERROR")
|
||||
# Negative gate (#1994): a broken SEO agent that returns "Agent error
|
||||
# (Exception) ..." AS text still matches the `"kind":"text"` shape check
|
||||
# above — THAT is the gap that let drained-key/byok-misroute failures pass
|
||||
# CI. This makes that case FAIL.
|
||||
check_no_error_as_text "SEO reply is not an error-as-text payload" "$TEXT"
|
||||
echo " SEO Agent said: $TEXT"
|
||||
echo ""
|
||||
|
||||
|
||||
+43
-25
@@ -73,7 +73,15 @@ else
|
||||
fi
|
||||
|
||||
# Test 4: Create workspace B (needs bearer — tokens now exist in DB)
|
||||
R=$(acurl -X POST "$BASE/workspaces" -H "Content-Type: application/json" -d '{"name":"Summarizer Agent","tier":1,"runtime":"external","external":true}')
|
||||
# #1953 cross-tenant isolation: Summarizer is created as a CHILD of Echo so the
|
||||
# two live in the SAME org (Echo is the org root; Summarizer hangs off it via
|
||||
# parent_id). The peer-discovery tests below assert same-org peer enumeration
|
||||
# (Echo sees its child, the child sees its parent). Previously both were created
|
||||
# parent_id=NULL — two DISTINCT org roots — and "peers" only listed each other
|
||||
# via the `WHERE parent_id IS NULL` branch that returned every tenant's org root.
|
||||
# That branch WAS the cross-tenant leak (#1953) and is now removed, so two org
|
||||
# roots no longer see each other; the assertions must run inside one org.
|
||||
R=$(acurl -X POST "$BASE/workspaces" -H "Content-Type: application/json" -d "{\"name\":\"Summarizer Agent\",\"tier\":1,\"runtime\":\"external\",\"external\":true,\"parent_id\":\"$ECHO_ID\"}")
|
||||
check "POST /workspaces (create summarizer)" '"status":"awaiting_agent"' "$R"
|
||||
SUM_ID=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])")
|
||||
|
||||
@@ -133,21 +141,23 @@ check "Heartbeat updated uptime" '"uptime_seconds":120' "$R"
|
||||
R=$(curl -s "$BASE/registry/discover/$ECHO_ID")
|
||||
check "GET /registry/discover/:id (missing caller rejected)" 'X-Workspace-ID header is required' "$R"
|
||||
|
||||
# Test 12: Discover (from sibling — allowed)
|
||||
# Test 12: Discover (from same-org child — allowed)
|
||||
R=$(curl -s "$BASE/registry/discover/$ECHO_ID" -H "X-Workspace-ID: $SUM_ID" -H "Authorization: Bearer $SUM_TOKEN")
|
||||
check "GET /registry/discover/:id (sibling)" '"url"' "$R"
|
||||
check "GET /registry/discover/:id (same-org)" '"url"' "$R"
|
||||
|
||||
# Test 13: Peers (root siblings see each other)
|
||||
# Test 13: Peers — same-org parent/child see each other (#1953). Echo is the org
|
||||
# root and lists its child Summarizer; Summarizer lists its parent Echo. A
|
||||
# cross-org workspace would NOT appear here (see cross_tenant_isolation_test.go).
|
||||
R=$(curl -s "$BASE/registry/$ECHO_ID/peers" -H "Authorization: Bearer $ECHO_TOKEN")
|
||||
check "GET /registry/:id/peers (has summarizer)" '"Summarizer' "$R"
|
||||
|
||||
R=$(curl -s "$BASE/registry/$SUM_ID/peers" -H "Authorization: Bearer $SUM_TOKEN")
|
||||
check "GET /registry/:id/peers (has echo)" '"Echo Agent"' "$R"
|
||||
|
||||
# Test 14: Check access (root siblings)
|
||||
# Test 14: Check access (same-org parent↔child — allowed)
|
||||
R=$(curl -s -X POST "$BASE/registry/check-access" -H "Content-Type: application/json" \
|
||||
-d "{\"caller_id\":\"$ECHO_ID\",\"target_id\":\"$SUM_ID\"}")
|
||||
check "POST /registry/check-access (siblings allowed)" '"allowed":true' "$R"
|
||||
check "POST /registry/check-access (same-org allowed)" '"allowed":true' "$R"
|
||||
|
||||
# Test 15: PATCH workspace (update position)
|
||||
R=$(acurl -X PATCH "$BASE/workspaces/$ECHO_ID" -H "Content-Type: application/json" -d '{"x":100,"y":200}')
|
||||
@@ -289,32 +299,40 @@ R=$(curl -s "$BASE/workspaces" -H "Authorization: Bearer $ECHO_TOKEN")
|
||||
check "current_task in list response" '"current_task"' "$R"
|
||||
|
||||
# Test 21: Delete
|
||||
R=$(acurl -X DELETE "$BASE/workspaces/$ECHO_ID?confirm=true" \
|
||||
-H "Authorization: Bearer $ECHO_TOKEN" \
|
||||
-H "X-Confirm-Name: Echo Agent v2")
|
||||
check "DELETE /workspaces/:id" '"status":"removed"' "$R"
|
||||
|
||||
R=$(curl -s "$BASE/workspaces" -H "Authorization: Bearer $SUM_TOKEN")
|
||||
COUNT=$(echo "$R" | python3 -c "import sys,json; print(len(json.load(sys.stdin)))")
|
||||
check "List after delete (count=1)" "1" "$COUNT"
|
||||
|
||||
# Test 22: Bundle round-trip — export → delete → import → verify same config
|
||||
echo ""
|
||||
echo "--- Bundle Round-Trip Test ---"
|
||||
|
||||
# Export the summarizer workspace (#165 / PR #167 — admin-gated)
|
||||
# #1953: Summarizer is now a CHILD of Echo (same-org, for the peer-discovery
|
||||
# tests above). DELETE on the *parent* (Echo) cascade-removes its descendants
|
||||
# (CascadeDelete walks the recursive `parent_id` CTE), so deleting Echo first
|
||||
# would also remove Summarizer and the "one survives" assertion would see 0.
|
||||
# Delete the CHILD (Summarizer) here instead: a child delete does NOT cascade
|
||||
# upward, so the parent Echo survives and count=1 holds. The bundle round-trip
|
||||
# below needs Summarizer's exported config, so capture it BEFORE this delete.
|
||||
BUNDLE=$(curl -s "$BASE/bundles/export/$SUM_ID" -H "Authorization: Bearer $SUM_TOKEN")
|
||||
check "GET /bundles/export/:id" '"name":"Summarizer Agent"' "$BUNDLE"
|
||||
|
||||
# Capture original config for comparison
|
||||
ORIG_NAME=$(echo "$BUNDLE" | python3 -c "import sys,json; print(json.load(sys.stdin)['name'])")
|
||||
ORIG_TIER=$(echo "$BUNDLE" | python3 -c "import sys,json; print(json.load(sys.stdin)['tier'])")
|
||||
|
||||
# Delete the workspace — use SUM_TOKEN (per-workspace) for WorkspaceAuth
|
||||
# and ADMIN_TOKEN for the AdminAuth layer.
|
||||
R=$(curl -s -X DELETE "$BASE/workspaces/$SUM_ID?confirm=true" \
|
||||
R=$(acurl -X DELETE "$BASE/workspaces/$SUM_ID?confirm=true" \
|
||||
-H "Authorization: Bearer $SUM_TOKEN" \
|
||||
-H "X-Confirm-Name: Summarizer Agent")
|
||||
check "DELETE /workspaces/:id" '"status":"removed"' "$R"
|
||||
|
||||
# Parent Echo must survive a child delete — list as Echo and expect count=1.
|
||||
R=$(curl -s "$BASE/workspaces" -H "Authorization: Bearer $ECHO_TOKEN")
|
||||
COUNT=$(echo "$R" | python3 -c "import sys,json; print(len(json.load(sys.stdin)))")
|
||||
check "List after delete (count=1)" "1" "$COUNT"
|
||||
|
||||
# Test 22: Bundle round-trip — export → delete → import → verify same config.
|
||||
# Summarizer's bundle was captured above; now delete the parent Echo (the only
|
||||
# remaining workspace) so the import lands in a clean org, then re-import the
|
||||
# Summarizer bundle.
|
||||
echo ""
|
||||
echo "--- Bundle Round-Trip Test ---"
|
||||
|
||||
# Delete the remaining parent Echo — use ECHO_TOKEN (per-workspace) for
|
||||
# WorkspaceAuth and ADMIN_TOKEN for the AdminAuth layer.
|
||||
R=$(acurl -X DELETE "$BASE/workspaces/$ECHO_ID?confirm=true" \
|
||||
-H "Authorization: Bearer $ECHO_TOKEN" \
|
||||
-H "X-Confirm-Name: Echo Agent v2")
|
||||
check "Delete before re-import" '"status":"removed"' "$R"
|
||||
|
||||
# After deleting both workspaces, all per-workspace tokens are revoked.
|
||||
|
||||
Executable
+111
@@ -0,0 +1,111 @@
|
||||
#!/usr/bin/env bash
|
||||
# Fail-direction / load-bearing proof for lib/completion_assert.sh.
|
||||
#
|
||||
# This is the watch-it-FAIL counterpart the dev-SOP Phase 3 requires: it
|
||||
# proves the new real-completion + byok gates actually CATCH a broken agent,
|
||||
# not just pass on a good one. It runs entirely offline (no LLM, no network,
|
||||
# no provisioning) — pure assertion logic — so it can run on every PR in the
|
||||
# fast lane (e2e-api.yml unit-shell step) and locally via `bash`.
|
||||
#
|
||||
# The decisive case is `error-as-text payload MUST FAIL`: that is the exact
|
||||
# trap (#1994) the historical shape-only check missed. If a refactor weakens
|
||||
# a2a_assert_real_completion to a substring/shape check, THIS test goes red.
|
||||
set -uo pipefail
|
||||
|
||||
HERE="$(cd "$(dirname "$0")" && pwd)"
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
# Minimal stand-ins for the host script's helpers. fail() must NOT exit the
|
||||
# whole harness here — we want to assert that it WAS called. We trap it by
|
||||
# running the assertion in a subshell and checking the subshell's exit code:
|
||||
# the real fail() exits 1, ok() exits 0 implicitly.
|
||||
log() { echo "[unit] $*"; }
|
||||
ok() { echo "[unit] OK: $*"; }
|
||||
fail() { echo "[unit] FAIL-CALLED: $*" >&2; exit 1; }
|
||||
|
||||
# shellcheck source=lib/completion_assert.sh
|
||||
source "$HERE/lib/completion_assert.sh"
|
||||
|
||||
expect_pass() {
|
||||
local desc="$1"; shift
|
||||
if ( "$@" ) >/dev/null 2>&1; then
|
||||
echo "PASS: $desc (assertion accepted, as expected)"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo "FAIL: $desc — expected the assertion to ACCEPT, but it rejected"
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
expect_fail() {
|
||||
local desc="$1"; shift
|
||||
if ( "$@" ) >/dev/null 2>&1; then
|
||||
echo "FAIL: $desc — expected the assertion to REJECT, but it accepted (gate NOT load-bearing!)"
|
||||
FAIL=$((FAIL + 1))
|
||||
else
|
||||
echo "PASS: $desc (assertion rejected, as expected)"
|
||||
PASS=$((PASS + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
echo "=== completion_assert.sh fail-direction proof ==="
|
||||
|
||||
# ---- a2a_assert_real_completion ----
|
||||
# Good: real known-answer reply passes.
|
||||
expect_pass "real PINEAPPLE reply passes" \
|
||||
a2a_assert_real_completion "PINEAPPLE" "PINEAPPLE" "unit"
|
||||
expect_pass "case-insensitive known answer passes" \
|
||||
a2a_assert_real_completion "pineapple" "PINEAPPLE" "unit"
|
||||
expect_pass "known answer with minor wrapping passes" \
|
||||
a2a_assert_real_completion "Sure: PINEAPPLE" "PINEAPPLE" "unit"
|
||||
|
||||
# DECISIVE: the error-as-text trap. Each MUST fail — these are the payloads a
|
||||
# broken agent returns that the old shape-only `"kind":"text"` check passed.
|
||||
expect_fail "Agent error as text payload MUST fail" \
|
||||
a2a_assert_real_completion "Agent error (Exception) — see workspace logs for details." "PINEAPPLE" "unit"
|
||||
expect_fail "bare Exception as text MUST fail" \
|
||||
a2a_assert_real_completion "Traceback ... Exception: boom" "PINEAPPLE" "unit"
|
||||
expect_fail "error result as text MUST fail" \
|
||||
a2a_assert_real_completion "tool returned error result" "PINEAPPLE" "unit"
|
||||
expect_fail "MISSING_BYOK_CREDENTIAL as text MUST fail" \
|
||||
a2a_assert_real_completion "MISSING_BYOK_CREDENTIAL: set your own key" "PINEAPPLE" "unit"
|
||||
# Error-as-text that ALSO happens to contain the token still fails (error
|
||||
# marker takes precedence — a real completion never carries these markers).
|
||||
expect_fail "error-as-text containing the token still fails" \
|
||||
a2a_assert_real_completion "Agent error: could not produce PINEAPPLE" "PINEAPPLE" "unit"
|
||||
# Empty text fails.
|
||||
expect_fail "empty text fails" \
|
||||
a2a_assert_real_completion "" "PINEAPPLE" "unit"
|
||||
# Wrong/echoed content (no token, no error) fails — shape-OK but not a real
|
||||
# completion.
|
||||
expect_fail "wrong content without token fails" \
|
||||
a2a_assert_real_completion "Reply with exactly the word PINEAPPLE and nothing else." "BANANA" "unit"
|
||||
|
||||
# ---- assert_byok_not_platform_proxy (#1994 guard) ----
|
||||
expect_pass "byok resolution passes the guard" \
|
||||
assert_byok_not_platform_proxy '{"resolved_mode":"byok","provider_selection":"minimax","source":"derived_provider"}' "unit"
|
||||
# DECISIVE: a platform_managed resolution on a byok workspace = the #1994
|
||||
# regression. MUST fail.
|
||||
expect_fail "platform_managed resolution trips the #1994 guard" \
|
||||
assert_byok_not_platform_proxy '{"resolved_mode":"platform_managed","provider_selection":"platform","source":"derived_provider"}' "unit"
|
||||
expect_fail "missing resolved_mode trips the guard" \
|
||||
assert_byok_not_platform_proxy '{"provider_selection":"x"}' "unit"
|
||||
expect_fail "disabled mode trips the guard (not byok)" \
|
||||
assert_byok_not_platform_proxy '{"resolved_mode":"disabled"}' "unit"
|
||||
|
||||
# ---- a2a_completion_error_marker (the scanner under the gate) ----
|
||||
if hit=$(a2a_completion_error_marker "all good PINEAPPLE"); then
|
||||
echo "FAIL: clean text wrongly flagged as error marker ($hit)"; FAIL=$((FAIL + 1))
|
||||
else
|
||||
echo "PASS: clean text has no error marker"; PASS=$((PASS + 1))
|
||||
fi
|
||||
if hit=$(a2a_completion_error_marker "An Exception occurred"); then
|
||||
echo "PASS: error marker detected ($hit)"; PASS=$((PASS + 1))
|
||||
else
|
||||
echo "FAIL: error marker NOT detected in 'An Exception occurred'"; FAIL=$((FAIL + 1))
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=== Results: $PASS passed, $FAIL failed ==="
|
||||
[ "$FAIL" -eq 0 ]
|
||||
@@ -99,6 +99,12 @@ source "$(dirname "$0")/lib/model_slug.sh"
|
||||
# shellcheck disable=SC1091
|
||||
# shellcheck source=lib/aws_leak_check.sh
|
||||
source "$(dirname "$0")/lib/aws_leak_check.sh"
|
||||
# shellcheck disable=SC1091
|
||||
# shellcheck source=lib/completion_assert.sh
|
||||
# molecule-core#1995 (#1994 follow-on): real-completion + per-provider
|
||||
# liveness + byok-routing assertion helpers. Adds gates that FAIL on an
|
||||
# error-as-text payload (the trap the shape-only A2A checks missed).
|
||||
source "$(dirname "$0")/lib/completion_assert.sh"
|
||||
|
||||
CURL_COMMON=(-sS --fail-with-body --max-time 30)
|
||||
E2E_TMP_FILES=()
|
||||
@@ -867,6 +873,182 @@ fi
|
||||
|
||||
ok "A2A parent round-trip succeeded: \"${AGENT_TEXT:0:80}\""
|
||||
|
||||
# ─── 8b. Real-completion known-answer round-trip (CORE GATE, #1994) ────
|
||||
# The existing PONG check + generic error grep above already do a lot, but
|
||||
# this stanza is the canonical real-completion gate the #1994 follow-on
|
||||
# adds: a DETERMINISTIC known-answer prompt asserted via
|
||||
# a2a_assert_real_completion, which FAILS on an error-as-text payload
|
||||
# ({"kind":"text","text":"Agent error (Exception) ..."}). That payload
|
||||
# matches the historical shape-only check `"kind":"text"` and so passed CI
|
||||
# on a fully broken agent (drained-key / byok-misroute, 2026-05-2x). This
|
||||
# gate makes that case RED. Reuses the same cold-start retry-on-transient
|
||||
# (502/503/504) loop the PONG probe uses — retry-once-on-network, never on
|
||||
# agent-error. Single round-trip → the one place we spend a non-trivial
|
||||
# token budget (default backend MiniMax — cheap token plan).
|
||||
KA_PAYLOAD=$(python3 -c "
|
||||
import json, uuid
|
||||
print(json.dumps({
|
||||
'jsonrpc': '2.0',
|
||||
'method': 'message/send',
|
||||
'id': 'e2e-known-answer-1',
|
||||
'params': {
|
||||
'message': {
|
||||
'role': 'user',
|
||||
'messageId': f'e2e-{uuid.uuid4().hex[:8]}',
|
||||
'parts': [{'kind': 'text', 'text': 'Reply with exactly the word PINEAPPLE and nothing else.'}]
|
||||
}
|
||||
}
|
||||
}))
|
||||
")
|
||||
KA_TMP=$(mktemp -t known_answer_a2a.XXXXXX)
|
||||
KA_RESP=""
|
||||
for KA_ATTEMPT in $(seq 1 6); do
|
||||
: >"$KA_TMP"
|
||||
set +e
|
||||
KA_CODE=$(tenant_call POST "/workspaces/$PARENT_ID/a2a" \
|
||||
--max-time 90 \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$KA_PAYLOAD" \
|
||||
-o "$KA_TMP" \
|
||||
-w '%{http_code}' \
|
||||
2>/dev/null)
|
||||
KA_RC=$?
|
||||
set -e
|
||||
KA_CODE=${KA_CODE:-000}
|
||||
KA_RESP=$(cat "$KA_TMP" 2>/dev/null || echo "")
|
||||
if [ "$KA_RC" = "0" ] && [ "$KA_CODE" -ge 200 ] && [ "$KA_CODE" -lt 300 ]; then
|
||||
break
|
||||
fi
|
||||
KA_SAFE_BODY=$(printf '%s' "$KA_RESP" | sanitize_http_body)
|
||||
# Retry ONLY on transient transport errors — never on an agent-level
|
||||
# error (those must surface and fail the gate).
|
||||
if echo "$KA_CODE" | grep -Eq '^(502|503|504)$' && echo "$KA_SAFE_BODY" | grep -Eqi 'Service Unavailable|Bad Gateway|Gateway Timeout|workspace agent unreachable|connection refused|no healthy upstream|workspace agent busy|native_session'; then
|
||||
log " known-answer A2A transient $KA_CODE attempt $KA_ATTEMPT/6: $KA_SAFE_BODY"
|
||||
if [ "$KA_ATTEMPT" -lt 6 ]; then sleep 10; continue; fi
|
||||
fi
|
||||
break
|
||||
done
|
||||
rm -f "$KA_TMP"
|
||||
if [ "$KA_RC" != "0" ] || [ "$KA_CODE" -lt 200 ] || [ "$KA_CODE" -ge 300 ]; then
|
||||
KA_SAFE_BODY=$(printf '%s' "$KA_RESP" | sanitize_http_body)
|
||||
fail "Known-answer A2A POST failed after $KA_ATTEMPT attempt(s) (curl_rc=$KA_RC, http=$KA_CODE): $KA_SAFE_BODY"
|
||||
fi
|
||||
KA_TEXT=$(echo "$KA_RESP" | python3 -c "
|
||||
import json, sys
|
||||
try:
|
||||
d = json.load(sys.stdin)
|
||||
parts = d.get('result', {}).get('parts', [])
|
||||
print(parts[0].get('text', '') if parts else '')
|
||||
except Exception:
|
||||
print('')
|
||||
" 2>/dev/null || echo "")
|
||||
# CORE GATE: contains PINEAPPLE (real round-trip) AND no error-as-text.
|
||||
a2a_assert_real_completion "$KA_TEXT" "PINEAPPLE" "A2A known-answer (parent, $RUNTIME/$MODEL_SLUG)"
|
||||
|
||||
# ─── 8c. byok-routing regression guard (#1994) ─────────────────────────
|
||||
# The parent was provisioned with the customer's OWN vendor key
|
||||
# (MINIMAX_API_KEY / ANTHROPIC_API_KEY in SECRETS_JSON) → it must resolve
|
||||
# BYOK, not platform_managed. #1994 was exactly the inverse: a byok
|
||||
# workspace baked platform_managed on (re-)provision → routed through the
|
||||
# platform proxy → drained the platform LLM key. We read the SAME derived
|
||||
# resolver the provision-time strip gate uses
|
||||
# (GET /admin/workspaces/:id/llm-billing-mode) and assert resolved_mode!=
|
||||
# platform_managed. A regression flips it RED.
|
||||
#
|
||||
# Only meaningful when the parent actually carries a byok credential; the
|
||||
# OpenAI/hermes path uses a different env shape, and the no-key path is
|
||||
# legitimately platform_managed (the CTO default). Gate on the same
|
||||
# E2E_*_API_KEY presence the SECRETS_JSON branch keyed off.
|
||||
if [ -n "${E2E_MINIMAX_API_KEY:-}" ] || [ -n "${E2E_ANTHROPIC_API_KEY:-}" ]; then
|
||||
set +e
|
||||
BILLING_RESP=$(tenant_call GET "/admin/workspaces/$PARENT_ID/llm-billing-mode" 2>/dev/null)
|
||||
BILLING_RC=$?
|
||||
set -e
|
||||
if [ "$BILLING_RC" != "0" ] || [ -z "$BILLING_RESP" ]; then
|
||||
fail "byok-routing guard: GET /admin/workspaces/$PARENT_ID/llm-billing-mode failed (rc=$BILLING_RC). Body: ${BILLING_RESP:0:200}"
|
||||
fi
|
||||
assert_byok_not_platform_proxy "$BILLING_RESP" "byok-guard (parent, $RUNTIME/$MODEL_SLUG)"
|
||||
else
|
||||
log "8c. byok-routing guard skipped — parent carries no own-vendor key (OpenAI/no-key path is legitimately platform_managed)."
|
||||
fi
|
||||
|
||||
# ─── 8d. Per-offered-provider liveness matrix (SSOT-driven, #1994 class) ─
|
||||
# For each platform-servable model the providers.yaml SSOT
|
||||
# (runtimes.<runtime>.providers[platform].models) declares for this
|
||||
# runtime, send a minimal max_tokens-bounded "say ok" probe and assert a
|
||||
# NON-ERROR completion. Purpose: exercise each offered provider's AUTH +
|
||||
# ROUTING path so a drained key / wrong base-URL / byok-misroute fails the
|
||||
# gate (the #1994 class). Providers/models come from the SSOT — not a
|
||||
# hardcoded list — so the matrix tracks providers.yaml automatically.
|
||||
#
|
||||
# This lane provisions ONE parent workspace with ONE configured key, so we
|
||||
# can only truly drive the providers that key authenticates. Probing a
|
||||
# model whose provider key is absent in this lane is reported SKIP (rc=75),
|
||||
# not FAIL — keeping the gate deterministic + low-flake. The matrix still
|
||||
# proves the configured provider's full auth+routing path end-to-end, and
|
||||
# logs the offered set so over/under-offer drift is visible in the CI log.
|
||||
provider_liveness_probe() {
|
||||
local model_id="$1"
|
||||
# Map the SSOT platform model id (e.g. minimax/MiniMax-M2.7) to the
|
||||
# vendor namespace token to decide whether THIS lane has its key.
|
||||
local vendor="${model_id%%/*}"
|
||||
case "$vendor" in
|
||||
minimax) [ -n "${E2E_MINIMAX_API_KEY:-}" ] || return 75 ;;
|
||||
anthropic) [ -n "${E2E_ANTHROPIC_API_KEY:-}" ] || return 75 ;;
|
||||
openai) [ -n "${E2E_OPENAI_API_KEY:-}" ] || return 75 ;;
|
||||
*) return 75 ;; # kimi/moonshot etc. — no key wired in this lane
|
||||
esac
|
||||
local probe_payload
|
||||
probe_payload=$(python3 -c "
|
||||
import json, uuid
|
||||
print(json.dumps({
|
||||
'jsonrpc': '2.0',
|
||||
'method': 'message/send',
|
||||
'id': 'e2e-liveness-' + uuid.uuid4().hex[:6],
|
||||
'params': {
|
||||
'message': {
|
||||
'role': 'user',
|
||||
'messageId': f'e2e-{uuid.uuid4().hex[:8]}',
|
||||
'parts': [{'kind': 'text', 'text': 'Reply with exactly: ok'}],
|
||||
},
|
||||
'configuration': {'max_tokens': 4}
|
||||
}
|
||||
}))
|
||||
")
|
||||
local tmp code rc resp
|
||||
tmp=$(mktemp -t liveness_a2a.XXXXXX)
|
||||
set +e
|
||||
code=$(tenant_call POST "/workspaces/$PARENT_ID/a2a" \
|
||||
--max-time 60 \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$probe_payload" \
|
||||
-o "$tmp" -w '%{http_code}' 2>/dev/null)
|
||||
rc=$?
|
||||
set -e
|
||||
resp=$(cat "$tmp" 2>/dev/null || echo "")
|
||||
rm -f "$tmp"
|
||||
if [ "$rc" != "0" ] || [ "${code:-000}" -lt 200 ] || [ "${code:-000}" -ge 300 ]; then
|
||||
log " probe $model_id: HTTP ${code:-000} rc=$rc"
|
||||
return 1
|
||||
fi
|
||||
local text
|
||||
text=$(echo "$resp" | python3 -c "
|
||||
import json,sys
|
||||
try:
|
||||
d=json.load(sys.stdin); p=d.get('result',{}).get('parts',[])
|
||||
print(p[0].get('text','') if p else '')
|
||||
except Exception: print('')" 2>/dev/null || echo "")
|
||||
if [ -z "$text" ] || a2a_completion_error_marker "$text" >/dev/null; then
|
||||
log " probe $model_id: error-as-text or empty: ${text:0:120}"
|
||||
return 1
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
if ! provider_liveness_matrix "$RUNTIME" provider_liveness_probe; then
|
||||
fail "Per-provider liveness matrix: at least one offered provider failed its auth+routing probe (see matrix above). This is the #1994 class — a drained key / wrong base-URL / byok-misroute."
|
||||
fi
|
||||
ok "Per-provider liveness matrix passed (all probed offered providers completed without error)"
|
||||
|
||||
# ─── 9. HMA + peers + activity (full mode) ─────────────────────────────
|
||||
if [ "$MODE" = "full" ]; then
|
||||
log "9/11 Writing + reading HMA memory on parent..."
|
||||
|
||||
@@ -0,0 +1,271 @@
|
||||
// Command gen-providers is the codegen half of the provider-registry SSOT
|
||||
// machinery on the molecule-core side (internal#718 P2-A, CTO 2026-05-27
|
||||
// "Distribution = SDK via codegen + verify-CI"). It is the byte-for-byte mirror
|
||||
// of molecule-controlplane's cmd/gen-providers (the canonical generator). It
|
||||
// reads core's SYNCED COPY of the schema — internal/providers/providers.yaml
|
||||
// (via the providers loader, so it shares the SAME parse + validation as the
|
||||
// runtime) — and emits a checked-in Go artifact:
|
||||
//
|
||||
// internal/providers/gen/registry_gen.go
|
||||
//
|
||||
// The artifact is a deterministic projection of the merged registry: the
|
||||
// provider catalog + per-runtime native sets as Go literals, plus the schema
|
||||
// version and a content fingerprint. It is core's leaf of the multi-language SDK
|
||||
// layer the RFC calls for (Go(CP+core)/TS(canvas)/Python(adapters)).
|
||||
//
|
||||
// CONTRACT for P2-A (zero behavior change): the generated artifact is
|
||||
// checked-in + drift-gated ONLY. NO production code path imports
|
||||
// internal/providers/gen — the gen-import-boundary test pins that. P2-B wires
|
||||
// the billing/credential decision onto the LOADER (DeriveProvider/IsPlatform),
|
||||
// not the raw gen literals. The generator is the build-time half;
|
||||
// verify-providers-gen.yml is the CI half that regenerates and fails RED on any
|
||||
// diff (drift or hand-edit); sync-providers-yaml.yml gates the synced copy
|
||||
// against the controlplane canonical.
|
||||
//
|
||||
// Usage:
|
||||
//
|
||||
// go run ./cmd/gen-providers # write the artifact in place
|
||||
// go run ./cmd/gen-providers -check # exit non-zero if the on-disk
|
||||
// # artifact differs from a fresh gen
|
||||
// # (the CI drift gate)
|
||||
// go run ./cmd/gen-providers -o PATH # write to a specific path
|
||||
//
|
||||
//go:generate go run ../gen-providers -o ../../internal/providers/gen/registry_gen.go
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"flag"
|
||||
"fmt"
|
||||
"go/format"
|
||||
"os"
|
||||
"sort"
|
||||
"strconv"
|
||||
"text/template"
|
||||
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/providers"
|
||||
)
|
||||
|
||||
// defaultOutPath is the checked-in artifact location, relative to the repo
|
||||
// root (the directory `go run ./cmd/gen-providers` is invoked from).
|
||||
const defaultOutPath = "internal/providers/gen/registry_gen.go"
|
||||
|
||||
func main() {
|
||||
var (
|
||||
outPath string
|
||||
check bool
|
||||
)
|
||||
flag.StringVar(&outPath, "o", defaultOutPath, "output path for the generated artifact")
|
||||
flag.BoolVar(&check, "check", false, "verify the on-disk artifact matches a fresh generation; exit 1 on drift")
|
||||
flag.Parse()
|
||||
|
||||
generated, err := render()
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "gen-providers: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if check {
|
||||
existing, err := os.ReadFile(outPath)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "gen-providers -check: cannot read %s: %v\n", outPath, err)
|
||||
fmt.Fprintln(os.Stderr, "Run `go generate ./...` (or `go run ./cmd/gen-providers`) and commit the result.")
|
||||
os.Exit(1)
|
||||
}
|
||||
if !bytes.Equal(existing, generated) {
|
||||
fmt.Fprintf(os.Stderr, "gen-providers -check: DRIFT — %s is out of sync with providers.yaml.\n", outPath)
|
||||
fmt.Fprintln(os.Stderr, "The generated artifact was hand-edited or providers.yaml changed without regen.")
|
||||
fmt.Fprintln(os.Stderr, "Fix: run `go generate ./...` (or `go run ./cmd/gen-providers`) and commit.")
|
||||
os.Exit(1)
|
||||
}
|
||||
fmt.Println("gen-providers -check: OK — artifact in sync with providers.yaml")
|
||||
return
|
||||
}
|
||||
|
||||
if err := os.WriteFile(outPath, generated, 0o644); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "gen-providers: write %s: %v\n", outPath, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
fmt.Printf("gen-providers: wrote %s\n", outPath)
|
||||
}
|
||||
|
||||
// render loads the manifest and produces the gofmt'd artifact bytes.
|
||||
func render() ([]byte, error) {
|
||||
m, err := providers.LoadManifest()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("load manifest: %w", err)
|
||||
}
|
||||
|
||||
// Deterministic ordering: providers in catalog order is already stable
|
||||
// (slice). Runtimes is a map — sort its keys so the artifact is
|
||||
// reproducible regardless of Go map iteration order.
|
||||
runtimeNames := make([]string, 0, len(m.Runtimes))
|
||||
for rt := range m.Runtimes {
|
||||
runtimeNames = append(runtimeNames, rt)
|
||||
}
|
||||
sort.Strings(runtimeNames)
|
||||
|
||||
type genProvider struct {
|
||||
Name string
|
||||
DisplayName string
|
||||
Protocol string
|
||||
AuthMode string
|
||||
AuthEnv []string
|
||||
ModelPrefixMatch string
|
||||
IsPlatform bool
|
||||
// UpstreamVendor is the proxy's upstream-vendor key for this entry
|
||||
// (internal#718 P1, CONVERGED) — empty for entries the proxy does not
|
||||
// route to an upstream. A plain scalar (no pointer), so both the rendered
|
||||
// literal and the fingerprint stay deterministic.
|
||||
UpstreamVendor string
|
||||
}
|
||||
type genRef struct {
|
||||
Name string
|
||||
Models []string
|
||||
}
|
||||
type genRuntime struct {
|
||||
Name string
|
||||
Providers []genRef
|
||||
}
|
||||
|
||||
data := struct {
|
||||
SchemaVersion int
|
||||
Fingerprint string
|
||||
Providers []genProvider
|
||||
Runtimes []genRuntime
|
||||
}{
|
||||
SchemaVersion: providers.SchemaVersion(),
|
||||
}
|
||||
|
||||
for _, p := range m.Providers {
|
||||
gp := genProvider{
|
||||
Name: p.Name,
|
||||
DisplayName: p.DisplayName,
|
||||
Protocol: string(p.Protocol),
|
||||
AuthMode: p.AuthMode,
|
||||
AuthEnv: p.AuthEnv,
|
||||
ModelPrefixMatch: p.ModelPrefixMatch,
|
||||
IsPlatform: p.IsPlatform(),
|
||||
UpstreamVendor: p.UpstreamVendor,
|
||||
}
|
||||
data.Providers = append(data.Providers, gp)
|
||||
}
|
||||
for _, rt := range runtimeNames {
|
||||
native := m.Runtimes[rt]
|
||||
gr := genRuntime{Name: rt}
|
||||
for _, ref := range native.Providers {
|
||||
gr.Providers = append(gr.Providers, genRef{Name: ref.Name, Models: ref.Models})
|
||||
}
|
||||
data.Runtimes = append(data.Runtimes, gr)
|
||||
}
|
||||
|
||||
// Fingerprint pins the artifact to the data it was generated from. It is
|
||||
// derived from the structured projection (schema version + providers +
|
||||
// runtimes), NOT the raw YAML bytes, so a comment-only YAML edit does not
|
||||
// churn the artifact while any data change does.
|
||||
data.Fingerprint = fingerprint(data.SchemaVersion, data.Providers, data.Runtimes)
|
||||
|
||||
var buf bytes.Buffer
|
||||
if err := artifactTmpl.Execute(&buf, data); err != nil {
|
||||
return nil, fmt.Errorf("execute template: %w", err)
|
||||
}
|
||||
formatted, err := format.Source(buf.Bytes())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("gofmt generated source: %w\n----\n%s", err, buf.String())
|
||||
}
|
||||
return formatted, nil
|
||||
}
|
||||
|
||||
// fingerprint is a stable content hash of the structured projection. Any
|
||||
// fields below this function references must be kept in sync with the
|
||||
// template's emitted data so the hash and the literals never diverge.
|
||||
func fingerprint(schema int, provs any, runtimes any) string {
|
||||
h := sha256.New()
|
||||
fmt.Fprintf(h, "schema=%d\n", schema)
|
||||
fmt.Fprintf(h, "%#v\n%#v\n", provs, runtimes)
|
||||
return hex.EncodeToString(h.Sum(nil))[:16]
|
||||
}
|
||||
|
||||
func quote(s string) string { return strconv.Quote(s) }
|
||||
|
||||
func quoteSlice(ss []string) string {
|
||||
var b bytes.Buffer
|
||||
b.WriteString("[]string{")
|
||||
for i, s := range ss {
|
||||
if i > 0 {
|
||||
b.WriteString(", ")
|
||||
}
|
||||
b.WriteString(strconv.Quote(s))
|
||||
}
|
||||
b.WriteString("}")
|
||||
return b.String()
|
||||
}
|
||||
|
||||
var artifactTmpl = template.Must(template.New("artifact").Funcs(template.FuncMap{
|
||||
"quote": quote,
|
||||
"quoteSlice": quoteSlice,
|
||||
}).Parse(`// Code generated by cmd/gen-providers; DO NOT EDIT.
|
||||
//
|
||||
// Source of truth: internal/providers/providers.yaml (schema_version {{.SchemaVersion}}).
|
||||
// Regenerate with: go generate ./... (or: go run ./cmd/gen-providers)
|
||||
// The verify-providers-gen CI workflow fails RED if this file drifts from
|
||||
// providers.yaml or is hand-edited. internal#718 P0 — checked-in + drift-
|
||||
// gated ONLY; no production path imports this package yet (that is P1+).
|
||||
|
||||
package gen
|
||||
|
||||
// SchemaVersion is the providers.yaml schema this artifact was generated
|
||||
// against. It is the semver'd contract version (the MAJOR component for the
|
||||
// public extension contract; see internal/providers/README.md).
|
||||
const SchemaVersion = {{.SchemaVersion}}
|
||||
|
||||
// Fingerprint is a stable content hash of the generated projection (schema
|
||||
// version + provider catalog + runtime native sets). It changes iff the
|
||||
// registry DATA changes (comment-only YAML edits do not churn it).
|
||||
const Fingerprint = {{quote .Fingerprint}}
|
||||
|
||||
// GenProvider is the generated projection of one provider catalog entry —
|
||||
// the subset a downstream consumer needs to derive + display a provider.
|
||||
type GenProvider struct {
|
||||
Name string
|
||||
DisplayName string
|
||||
Protocol string
|
||||
AuthMode string
|
||||
AuthEnv []string
|
||||
ModelPrefixMatch string
|
||||
// IsPlatform marks the closed, core-only platform-managed provider.
|
||||
IsPlatform bool
|
||||
// UpstreamVendor is the proxy's upstream-vendor key for this entry
|
||||
// (internal#718 P1, CONVERGED); empty for providers the proxy does not
|
||||
// route to an upstream vendor. ResolveUpstream maps a model id's namespace
|
||||
// token to the entry whose UpstreamVendor equals it.
|
||||
UpstreamVendor string
|
||||
}
|
||||
|
||||
// GenRuntimeRef is one native provider a runtime supports + its exact models.
|
||||
type GenRuntimeRef struct {
|
||||
Name string
|
||||
Models []string
|
||||
}
|
||||
|
||||
// Providers is the full provider catalog, in providers.yaml declaration order.
|
||||
var Providers = []GenProvider{
|
||||
{{- range .Providers}}
|
||||
{Name: {{quote .Name}}, DisplayName: {{quote .DisplayName}}, Protocol: {{quote .Protocol}}, AuthMode: {{quote .AuthMode}}, AuthEnv: {{quoteSlice .AuthEnv}}, ModelPrefixMatch: {{quote .ModelPrefixMatch}}, IsPlatform: {{.IsPlatform}}{{if .UpstreamVendor}}, UpstreamVendor: {{quote .UpstreamVendor}}{{end}}},
|
||||
{{- end}}
|
||||
}
|
||||
|
||||
// Runtimes maps each runtime to its native provider+model set, runtime names
|
||||
// sorted for a deterministic artifact.
|
||||
var Runtimes = map[string][]GenRuntimeRef{
|
||||
{{- range .Runtimes}}
|
||||
{{quote .Name}}: {
|
||||
{{- range .Providers}}
|
||||
{Name: {{quote .Name}}, Models: {{quoteSlice .Models}}},
|
||||
{{- end}}
|
||||
},
|
||||
{{- end}}
|
||||
}
|
||||
`))
|
||||
@@ -0,0 +1,121 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// repoRoot walks up from the test's working dir (cmd/gen-providers) to the
|
||||
// module root so the test can locate the checked-in artifact regardless of
|
||||
// where `go test` is invoked from.
|
||||
func repoRoot(t *testing.T) string {
|
||||
t.Helper()
|
||||
dir, err := os.Getwd()
|
||||
if err != nil {
|
||||
t.Fatalf("getwd: %v", err)
|
||||
}
|
||||
for i := 0; i < 6; i++ {
|
||||
if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
|
||||
return dir
|
||||
}
|
||||
dir = filepath.Dir(dir)
|
||||
}
|
||||
t.Fatal("could not locate repo root (go.mod) from cmd/gen-providers")
|
||||
return ""
|
||||
}
|
||||
|
||||
// TestArtifactInSync is the drift gate's Go-test counterpart: the checked-in
|
||||
// internal/providers/gen/registry_gen.go MUST byte-equal a fresh render. If a
|
||||
// future edit changes providers.yaml without regenerating, OR hand-edits the
|
||||
// artifact, this flips red — the same signal the verify-providers-gen CI
|
||||
// workflow emits, but caught locally by `go test ./...` too.
|
||||
func TestArtifactInSync(t *testing.T) {
|
||||
generated, err := render()
|
||||
if err != nil {
|
||||
t.Fatalf("render() error = %v", err)
|
||||
}
|
||||
artifactPath := filepath.Join(repoRoot(t), defaultOutPath)
|
||||
onDisk, err := os.ReadFile(artifactPath)
|
||||
if err != nil {
|
||||
t.Fatalf("read checked-in artifact %s: %v (run `go generate ./...` and commit)", artifactPath, err)
|
||||
}
|
||||
if !bytes.Equal(onDisk, generated) {
|
||||
t.Fatalf("DRIFT: %s is out of sync with providers.yaml.\n"+
|
||||
"Run `go generate ./...` (or `go run ./cmd/gen-providers`) and commit the result.", defaultOutPath)
|
||||
}
|
||||
}
|
||||
|
||||
// TestDriftGateCatchesMutation is the load-bearing-gate proof (per the SOP
|
||||
// fail-direction discipline). The original P0 version was TAUTOLOGICAL
|
||||
// (internal#718 P1 review carry-over): it appended bytes to an in-memory copy
|
||||
// and asserted the copy differed from the original — true by construction,
|
||||
// touching neither the on-disk artifact nor the actual in-sync comparison the
|
||||
// gate runs. This version exercises the REAL gate: it writes a MUTATED artifact
|
||||
// to disk and re-runs the SAME comparison TestArtifactInSync / `-check` perform
|
||||
// (`render()` bytes vs the on-disk file), asserting it now reports drift — then
|
||||
// restores the original. So the test would fail if the gate were vacuous (e.g.
|
||||
// if the comparison ignored content), not merely if append changes bytes.
|
||||
func TestDriftGateCatchesMutation(t *testing.T) {
|
||||
generated, err := render()
|
||||
if err != nil {
|
||||
t.Fatalf("render() error = %v", err)
|
||||
}
|
||||
artifactPath := filepath.Join(repoRoot(t), defaultOutPath)
|
||||
original, err := os.ReadFile(artifactPath)
|
||||
if err != nil {
|
||||
t.Fatalf("read checked-in artifact %s: %v", artifactPath, err)
|
||||
}
|
||||
// Precondition: the tree is in sync (so the mutation is what flips the gate,
|
||||
// not pre-existing drift).
|
||||
if !bytes.Equal(original, generated) {
|
||||
t.Fatalf("precondition failed: %s already drifted from render() — run `go generate ./...`", defaultOutPath)
|
||||
}
|
||||
|
||||
// Restore the pristine artifact no matter how the test exits.
|
||||
t.Cleanup(func() {
|
||||
if err := os.WriteFile(artifactPath, original, 0o644); err != nil {
|
||||
t.Fatalf("CRITICAL: failed to restore %s after mutation: %v", artifactPath, err)
|
||||
}
|
||||
})
|
||||
|
||||
// Mutate the ON-DISK artifact (simulating a hand-edit / a providers.yaml
|
||||
// change that wasn't regenerated).
|
||||
mutated := append(append([]byte(nil), original...), []byte("\n// injected drift\n")...)
|
||||
if err := os.WriteFile(artifactPath, mutated, 0o644); err != nil {
|
||||
t.Fatalf("write mutated artifact: %v", err)
|
||||
}
|
||||
|
||||
// Re-run the EXACT in-sync comparison the gate uses: fresh render vs the
|
||||
// (now mutated) on-disk file. It MUST report drift.
|
||||
onDiskAfter, err := os.ReadFile(artifactPath)
|
||||
if err != nil {
|
||||
t.Fatalf("re-read mutated artifact: %v", err)
|
||||
}
|
||||
freshRender, err := render()
|
||||
if err != nil {
|
||||
t.Fatalf("render() after mutation error = %v", err)
|
||||
}
|
||||
if bytes.Equal(onDiskAfter, freshRender) {
|
||||
t.Fatal("drift gate did NOT detect a mutated on-disk artifact — gate is not load-bearing")
|
||||
}
|
||||
}
|
||||
|
||||
// TestRenderDeterministic proves regeneration is idempotent: two renders of
|
||||
// the same manifest produce byte-identical output (sorted runtime keys, stable
|
||||
// catalog order). A non-deterministic generator would make the drift gate
|
||||
// flap on Go map iteration order.
|
||||
func TestRenderDeterministic(t *testing.T) {
|
||||
a, err := render()
|
||||
if err != nil {
|
||||
t.Fatalf("render() #1 error = %v", err)
|
||||
}
|
||||
b, err := render()
|
||||
if err != nil {
|
||||
t.Fatalf("render() #2 error = %v", err)
|
||||
}
|
||||
if !bytes.Equal(a, b) {
|
||||
t.Fatal("render() is non-deterministic — two runs differ; the drift gate would flap")
|
||||
}
|
||||
}
|
||||
@@ -36,6 +36,7 @@ import (
|
||||
"time"
|
||||
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/channels"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/codexauth"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/crypto"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/events"
|
||||
@@ -334,6 +335,20 @@ func main() {
|
||||
pendinguploads.StartSweeper(c, pendinguploads.NewPostgres(db.DB), 0)
|
||||
})
|
||||
|
||||
// Codex shared-OAuth central refresher — the SINGLE owner of the rotating
|
||||
// refresh_token for the global codex (ChatGPT/Codex subscription) credential
|
||||
// (global_secrets key CODEX_AUTH_JSON). Multiple codex workspaces share ONE
|
||||
// ChatGPT-Pro OAuth token; OpenAI's refresh_token is single-use, so letting
|
||||
// each per-agent app-server refresh on its own 401 burned the seed within
|
||||
// seconds (a refresh storm). This goroutine is structurally single-flight
|
||||
// (one goroutine + a package mutex), refreshes only within a safety margin
|
||||
// of expiry, POSTs the refresh_token at most once per due cycle, and writes
|
||||
// the rotated blob back — workspaces now only GET the current token (see the
|
||||
// codex template's codex_auth_sync.sh). INERT when no CODEX_AUTH_JSON exists.
|
||||
go supervised.RunWithRecover(ctx, "codex-auth-refresher", func(c context.Context) {
|
||||
codexauth.StartCodexAuthRefresher(c, db.DB)
|
||||
})
|
||||
|
||||
// Provision-timeout sweep — flips workspaces that have been stuck in
|
||||
// status='provisioning' past the timeout window to 'failed' and emits
|
||||
// WORKSPACE_PROVISION_TIMEOUT. Without this the UI banner is cosmetic
|
||||
|
||||
@@ -0,0 +1,463 @@
|
||||
// Package codexauth owns the SINGLE, platform-side refresh of the global
|
||||
// codex (ChatGPT/Codex subscription) OAuth credential stored in the
|
||||
// global_secrets table under key CODEX_AUTH_JSON.
|
||||
//
|
||||
// THE PROBLEM IT FIXES (agents-team prod, 2026-05-31)
|
||||
//
|
||||
// Multiple codex workspaces share ONE ChatGPT-Pro OAuth token (the global
|
||||
// secret CODEX_AUTH_JSON). OpenAI's refresh_token is SINGLE-USE: every refresh
|
||||
// rotates it and invalidates the prior one. When each per-agent codex
|
||||
// app-server refreshed independently on a 401, the siblings' in-flight tokens
|
||||
// were invalidated within seconds — a refresh storm that burned the seed and
|
||||
// wedged every codex agent.
|
||||
//
|
||||
// THE FIX (two halves; this is the core half)
|
||||
//
|
||||
// 1. The per-workspace codex app-server NO LONGER refreshes (the template's
|
||||
// OAuth POST is gated off by default — see the codex template's
|
||||
// codex_auth_sync.sh / CODEX_AUTH_REFRESH_OWNER gate). Workspaces only ever
|
||||
// GET the current token and write it to auth.json.
|
||||
// 2. ONE owner refreshes the rotating refresh_token: this background goroutine
|
||||
// in the platform. It is structurally single-flight (one goroutine + a
|
||||
// package mutex), refreshes ONLY when the access_token is within a safety
|
||||
// margin of expiry, POSTs the refresh_token at most ONCE per due cycle, and
|
||||
// writes the rotated blob back to global_secrets. On a permanent failure
|
||||
// (the seed was already burned by an out-of-band login) it logs ONCE and
|
||||
// backs off — it never hot-loops a dead refresh_token.
|
||||
//
|
||||
// Billing-mode resolution and the byok strip are UNTOUCHED by this package.
|
||||
package codexauth
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/crypto"
|
||||
)
|
||||
|
||||
const (
|
||||
// CodexAuthSecretKey is the global_secrets key holding the shared codex
|
||||
// ChatGPT/Codex subscription OAuth blob (auth.json contents).
|
||||
CodexAuthSecretKey = "CODEX_AUTH_JSON"
|
||||
|
||||
// oauthTokenURL is OpenAI's OAuth token endpoint. The ONLY endpoint this
|
||||
// package ever POSTs to, and only for a due refresh.
|
||||
oauthTokenURL = "https://auth.openai.com/oauth/token"
|
||||
|
||||
// codexOAuthClientID is the public Codex CLI OAuth client id (the same id
|
||||
// the codex CLI sends). Not a secret.
|
||||
codexOAuthClientID = "app_EMoamEEZ73f0CkXaXp7hrann"
|
||||
|
||||
// refreshSafetyMargin is how far ahead of access_token expiry a refresh is
|
||||
// considered DUE. A token expiring within this window is refreshed now; one
|
||||
// expiring later is left untouched (skip-when-fresh). Generous so a slow
|
||||
// tick can never let the shared token lapse for the fleet.
|
||||
refreshSafetyMargin = 15 * time.Minute
|
||||
|
||||
// defaultInterval is how often the loop wakes to check due-ness. The check
|
||||
// is cheap (decrypt + JWT exp parse) and only POSTs when actually due.
|
||||
defaultInterval = 5 * time.Minute
|
||||
|
||||
// permanentFailureBackoff is how long the loop waits after a PERMANENT
|
||||
// refresh failure (invalid_grant / "refresh token already used"). The seed
|
||||
// is burned until a human re-seeds a fresh login; there is nothing to retry,
|
||||
// so we back off hard rather than hammer the dead token.
|
||||
permanentFailureBackoff = 1 * time.Hour
|
||||
)
|
||||
|
||||
// SecretStore is the minimal global_secrets surface the refresher needs. The
|
||||
// production implementation (postgresStore) is backed by *sql.DB; tests inject
|
||||
// a fake. It is deliberately tiny — read one key, write one key — so the test
|
||||
// double is trivial and the refresher never reaches for the package-global DB.
|
||||
type SecretStore interface {
|
||||
// Get returns the decrypted secret value and true, or ("", false) when the
|
||||
// key is absent. A non-nil error is a real read failure (not absence).
|
||||
Get(ctx context.Context, key string) (value string, found bool, err error)
|
||||
// Put encrypts and upserts value under key, bumping the row's updated_at
|
||||
// (the "last_refresh" timestamp). It is the rotated-blob write-back.
|
||||
Put(ctx context.Context, key, value string) error
|
||||
}
|
||||
|
||||
// httpDoer is the http client seam (real *http.Client in prod, fake transport
|
||||
// in tests). Tests NEVER hit the network.
|
||||
type httpDoer interface {
|
||||
Do(req *http.Request) (*http.Response, error)
|
||||
}
|
||||
|
||||
// refresher is the single-owner refresh engine. The package-level mutex makes
|
||||
// the refresh structurally single-flight: even if two refreshOnce calls raced
|
||||
// (they cannot in prod — one goroutine drives it — but a test or a future
|
||||
// caller might), only one POSTs at a time, and the access-token freshness
|
||||
// re-check inside the lock means the second sees a freshly-rotated token and
|
||||
// skips. One goroutine + this mutex = single-flight by construction.
|
||||
type refresher struct {
|
||||
store SecretStore
|
||||
client httpDoer
|
||||
now func() time.Time
|
||||
|
||||
// permanentlyFailed records that the current seed's refresh_token was
|
||||
// rejected as already-used/invalid. While set, refreshOnce is INERT (it
|
||||
// will not re-POST the dead token) until the secret value CHANGES (a human
|
||||
// re-seed), detected by comparing the stored blob. This is the anti-storm
|
||||
// latch — it lives on the struct, not globally, so it resets if the seed is
|
||||
// replaced out of band.
|
||||
failedSeed string // the auth-json blob that failed; "" = no known failure
|
||||
}
|
||||
|
||||
// mu serializes refreshOnce across the process. Package-level so the
|
||||
// single-flight guarantee holds regardless of how many refresher values exist
|
||||
// (in prod there is exactly one).
|
||||
var mu sync.Mutex
|
||||
|
||||
// oauthTokens is the token trio inside auth.json (and the OAuth response).
|
||||
type oauthTokens struct {
|
||||
AccessToken string `json:"access_token"`
|
||||
RefreshToken string `json:"refresh_token"`
|
||||
IDToken string `json:"id_token,omitempty"`
|
||||
}
|
||||
|
||||
// StartCodexAuthRefresher launches the single background refresher goroutine.
|
||||
// It returns immediately; the loop runs until ctx is cancelled. Wire it under
|
||||
// supervised.RunWithRecover in main.go like the other Start* sweeps.
|
||||
//
|
||||
// db may be nil only in tests that drive refreshOnce directly; in prod it is
|
||||
// the server's *sql.DB. The loop is INERT (logs once, keeps ticking) whenever
|
||||
// CODEX_AUTH_JSON is absent — a deployment with no shared codex seed pays only
|
||||
// a cheap periodic read.
|
||||
func StartCodexAuthRefresher(ctx context.Context, db *sql.DB) {
|
||||
r := &refresher{
|
||||
store: &postgresStore{db: db},
|
||||
client: &http.Client{Timeout: 30 * time.Second},
|
||||
now: time.Now,
|
||||
}
|
||||
r.run(ctx, defaultInterval)
|
||||
}
|
||||
|
||||
// run is the tick loop. It checks due-ness every interval and on a permanent
|
||||
// failure waits permanentFailureBackoff before the next check (never a tight
|
||||
// retry of a burned token).
|
||||
func (r *refresher) run(ctx context.Context, interval time.Duration) {
|
||||
// Check once promptly on boot, then on the interval.
|
||||
for {
|
||||
wait := interval
|
||||
if perm := r.refreshOnce(ctx); perm {
|
||||
// Permanent failure this cycle — the seed is burned. Back off hard;
|
||||
// a human must re-seed. We keep ticking (a re-seed CHANGES the blob,
|
||||
// which clears the latch) but slowly.
|
||||
wait = permanentFailureBackoff
|
||||
}
|
||||
|
||||
timer := time.NewTimer(wait)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
timer.Stop()
|
||||
log.Printf("codexauth: context done; stopping refresher")
|
||||
return
|
||||
case <-timer.C:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// refreshOnce performs ONE due-check + at most one refresh POST. It returns
|
||||
// permanentFailure=true iff the refresh_token was permanently rejected this
|
||||
// cycle (the caller backs off). All other outcomes (inert/skip/rotated/transient
|
||||
// error) return false.
|
||||
//
|
||||
// It is single-flight: the package mutex is held for the whole read→decide→
|
||||
// POST→write-back so two callers cannot both POST the (single-use) refresh_token.
|
||||
func (r *refresher) refreshOnce(ctx context.Context) (permanentFailure bool) {
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
|
||||
blob, found, err := r.store.Get(ctx, CodexAuthSecretKey)
|
||||
if err != nil {
|
||||
log.Printf("codexauth: read CODEX_AUTH_JSON failed: %v (skipping this cycle)", err)
|
||||
return false
|
||||
}
|
||||
if !found || strings.TrimSpace(blob) == "" {
|
||||
// INERT: no shared codex seed in this deployment. Cheap no-op.
|
||||
log.Printf("codexauth: no CODEX_AUTH_JSON in global_secrets — refresher inert")
|
||||
// A previously-failed seed that has since been DELETED clears the latch.
|
||||
r.failedSeed = ""
|
||||
return false
|
||||
}
|
||||
|
||||
// Anti-storm latch: if THIS exact blob already failed permanently, do not
|
||||
// re-POST its dead refresh_token. A re-seed changes the blob and clears it.
|
||||
if r.failedSeed != "" && r.failedSeed == blob {
|
||||
return false
|
||||
}
|
||||
if r.failedSeed != "" && r.failedSeed != blob {
|
||||
// The seed changed out of band (human re-login) — give it a fresh chance.
|
||||
r.failedSeed = ""
|
||||
}
|
||||
|
||||
tokens, err := parseTokens(blob)
|
||||
if err != nil {
|
||||
log.Printf("codexauth: CODEX_AUTH_JSON is not parseable codex auth json: %v (skipping)", err)
|
||||
return false
|
||||
}
|
||||
if tokens.RefreshToken == "" {
|
||||
log.Printf("codexauth: CODEX_AUTH_JSON carries no refresh_token (skipping)")
|
||||
return false
|
||||
}
|
||||
|
||||
// Skip-when-fresh: only refresh within the safety margin of expiry. A blob
|
||||
// with an unparseable/absent access_token exp is treated as DUE (better to
|
||||
// refresh a token we cannot date than let the fleet lapse).
|
||||
exp, haveExp := jwtExp(tokens.AccessToken)
|
||||
if haveExp {
|
||||
remaining := exp.Sub(r.now())
|
||||
if remaining > refreshSafetyMargin {
|
||||
// Fresh — nothing to do. No POST.
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// DUE: POST the refresh_token ONCE.
|
||||
newTokens, perm, err := r.doRefresh(ctx, tokens.RefreshToken)
|
||||
if err != nil {
|
||||
if perm {
|
||||
// Permanent: the seed is burned. Latch it so we don't re-POST, log
|
||||
// ONCE, and DO NOT write anything back.
|
||||
log.Printf("codexauth: PERMANENT refresh failure (refresh_token rejected): %v — "+
|
||||
"NOT writing back; the shared CODEX_AUTH_JSON seed is burned and must be re-seeded "+
|
||||
"via a fresh codex login. Backing off.", err)
|
||||
r.failedSeed = blob
|
||||
return true
|
||||
}
|
||||
// Transient (network/5xx): no write-back, retry next cycle (no backoff).
|
||||
log.Printf("codexauth: transient refresh error: %v (will retry next cycle)", err)
|
||||
return false
|
||||
}
|
||||
|
||||
// Success: merge the rotated trio into the blob (preserving every other
|
||||
// field) and write it back encrypted, bumping updated_at (last_refresh).
|
||||
rotated, err := mergeTokens(blob, newTokens)
|
||||
if err != nil {
|
||||
log.Printf("codexauth: failed to merge rotated tokens into auth json: %v (NOT writing back)", err)
|
||||
return false
|
||||
}
|
||||
if err := r.store.Put(ctx, CodexAuthSecretKey, rotated); err != nil {
|
||||
log.Printf("codexauth: write-back of rotated CODEX_AUTH_JSON failed: %v", err)
|
||||
return false
|
||||
}
|
||||
r.failedSeed = "" // success clears any stale latch
|
||||
log.Printf("codexauth: rotated shared CODEX_AUTH_JSON (single-owner refresh)")
|
||||
return false
|
||||
}
|
||||
|
||||
// doRefresh POSTs the refresh_token to OpenAI's OAuth endpoint exactly once and
|
||||
// returns the rotated trio. permanent=true marks an unrecoverable rejection
|
||||
// (HTTP 400 invalid_grant / "refresh token already used") so the caller latches
|
||||
// and backs off instead of retrying.
|
||||
func (r *refresher) doRefresh(ctx context.Context, refreshToken string) (tokens oauthTokens, permanent bool, err error) {
|
||||
body, _ := json.Marshal(map[string]string{
|
||||
"grant_type": "refresh_token",
|
||||
"client_id": codexOAuthClientID,
|
||||
"refresh_token": refreshToken,
|
||||
})
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, oauthTokenURL, strings.NewReader(string(body)))
|
||||
if err != nil {
|
||||
return oauthTokens{}, false, err
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
req.Header.Set("Accept", "application/json")
|
||||
|
||||
resp, err := r.client.Do(req)
|
||||
if err != nil {
|
||||
return oauthTokens{}, false, err // transient: network
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 1<<20))
|
||||
|
||||
if resp.StatusCode == http.StatusOK {
|
||||
var t oauthTokens
|
||||
if err := json.Unmarshal(respBody, &t); err != nil {
|
||||
return oauthTokens{}, false, fmt.Errorf("decode token response: %w", err)
|
||||
}
|
||||
if t.AccessToken == "" {
|
||||
return oauthTokens{}, false, fmt.Errorf("token response missing access_token")
|
||||
}
|
||||
return t, false, nil
|
||||
}
|
||||
|
||||
// Non-200. A 400 (and any body naming invalid_grant / already-used) is a
|
||||
// PERMANENT rejection of the refresh_token. 401/403 likewise mean the seed
|
||||
// is no good. Everything else (429/5xx/network-shaped) is transient.
|
||||
lowerBody := strings.ToLower(string(respBody))
|
||||
isInvalidGrant := strings.Contains(lowerBody, "invalid_grant") ||
|
||||
strings.Contains(lowerBody, "refresh token already used") ||
|
||||
strings.Contains(lowerBody, "already been used") ||
|
||||
strings.Contains(lowerBody, "token has been revoked")
|
||||
switch {
|
||||
case resp.StatusCode == http.StatusBadRequest && isInvalidGrant:
|
||||
return oauthTokens{}, true, fmt.Errorf("oauth %d: %s", resp.StatusCode, strings.TrimSpace(string(respBody)))
|
||||
case resp.StatusCode == http.StatusUnauthorized || resp.StatusCode == http.StatusForbidden:
|
||||
return oauthTokens{}, true, fmt.Errorf("oauth %d: %s", resp.StatusCode, strings.TrimSpace(string(respBody)))
|
||||
default:
|
||||
return oauthTokens{}, false, fmt.Errorf("oauth %d: %s", resp.StatusCode, strings.TrimSpace(string(respBody)))
|
||||
}
|
||||
}
|
||||
|
||||
// parseTokens extracts the OAuth trio from an auth.json blob, accepting both
|
||||
// the nested `{"tokens":{...}}` shape the codex CLI writes and a flat top-level
|
||||
// shape some seeds use.
|
||||
func parseTokens(blob string) (oauthTokens, error) {
|
||||
var top map[string]json.RawMessage
|
||||
if err := json.Unmarshal([]byte(blob), &top); err != nil {
|
||||
return oauthTokens{}, err
|
||||
}
|
||||
if nested, ok := top["tokens"]; ok {
|
||||
var t oauthTokens
|
||||
if err := json.Unmarshal(nested, &t); err != nil {
|
||||
return oauthTokens{}, fmt.Errorf("decode nested tokens: %w", err)
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
var t oauthTokens
|
||||
if err := json.Unmarshal([]byte(blob), &t); err != nil {
|
||||
return oauthTokens{}, err
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
|
||||
// mergeTokens writes the rotated trio back into the original blob in-place,
|
||||
// preserving the blob's shape (nested-vs-flat) and every other field. A field
|
||||
// in the OAuth response that is empty (e.g. id_token omitted) does NOT clobber
|
||||
// the existing value.
|
||||
func mergeTokens(blob string, rotated oauthTokens) (string, error) {
|
||||
var top map[string]json.RawMessage
|
||||
if err := json.Unmarshal([]byte(blob), &top); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
applyTo := func(m map[string]json.RawMessage) error {
|
||||
setStr := func(key, val string) error {
|
||||
if val == "" {
|
||||
return nil // don't clobber an existing value with an empty one
|
||||
}
|
||||
b, err := json.Marshal(val)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m[key] = b
|
||||
return nil
|
||||
}
|
||||
if err := setStr("access_token", rotated.AccessToken); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setStr("refresh_token", rotated.RefreshToken); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setStr("id_token", rotated.IDToken); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if nestedRaw, ok := top["tokens"]; ok {
|
||||
var nested map[string]json.RawMessage
|
||||
if err := json.Unmarshal(nestedRaw, &nested); err != nil {
|
||||
return "", fmt.Errorf("decode nested tokens for merge: %w", err)
|
||||
}
|
||||
if err := applyTo(nested); err != nil {
|
||||
return "", err
|
||||
}
|
||||
nb, err := json.Marshal(nested)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
top["tokens"] = nb
|
||||
} else {
|
||||
if err := applyTo(top); err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
out, err := json.Marshal(top)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(out), nil
|
||||
}
|
||||
|
||||
// jwtExp decodes the `exp` claim (Unix seconds) from a JWT access token WITHOUT
|
||||
// verifying the signature (we only need the expiry to decide due-ness; the
|
||||
// token's validity is OpenAI's to enforce). Returns ok=false when the token is
|
||||
// not a parseable 3-part JWT or carries no numeric exp.
|
||||
func jwtExp(token string) (time.Time, bool) {
|
||||
parts := strings.Split(token, ".")
|
||||
if len(parts) != 3 {
|
||||
return time.Time{}, false
|
||||
}
|
||||
payload, err := base64.RawURLEncoding.DecodeString(parts[1])
|
||||
if err != nil {
|
||||
// Some encoders pad; tolerate standard base64url with padding too.
|
||||
payload, err = base64.URLEncoding.DecodeString(parts[1])
|
||||
if err != nil {
|
||||
return time.Time{}, false
|
||||
}
|
||||
}
|
||||
var claims struct {
|
||||
Exp json.Number `json:"exp"`
|
||||
}
|
||||
if err := json.Unmarshal(payload, &claims); err != nil {
|
||||
return time.Time{}, false
|
||||
}
|
||||
secs, err := claims.Exp.Int64()
|
||||
if err != nil || secs <= 0 {
|
||||
return time.Time{}, false
|
||||
}
|
||||
return time.Unix(secs, 0), true
|
||||
}
|
||||
|
||||
// postgresStore is the production SecretStore backed by global_secrets, using
|
||||
// the SAME crypto path the secrets handler uses (DecryptVersioned on read,
|
||||
// Encrypt + CurrentEncryptionVersion on write).
|
||||
type postgresStore struct {
|
||||
db *sql.DB
|
||||
}
|
||||
|
||||
func (s *postgresStore) Get(ctx context.Context, key string) (string, bool, error) {
|
||||
var enc []byte
|
||||
var ver int
|
||||
err := s.db.QueryRowContext(ctx,
|
||||
`SELECT encrypted_value, encryption_version FROM global_secrets WHERE key = $1`, key).
|
||||
Scan(&enc, &ver)
|
||||
if err == sql.ErrNoRows {
|
||||
return "", false, nil
|
||||
}
|
||||
if err != nil {
|
||||
return "", false, err
|
||||
}
|
||||
plain, err := crypto.DecryptVersioned(enc, ver)
|
||||
if err != nil {
|
||||
return "", false, err
|
||||
}
|
||||
return string(plain), true, nil
|
||||
}
|
||||
|
||||
func (s *postgresStore) Put(ctx context.Context, key, value string) error {
|
||||
enc, err := crypto.Encrypt([]byte(value))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ver := crypto.CurrentEncryptionVersion()
|
||||
_, err = s.db.ExecContext(ctx, `
|
||||
INSERT INTO global_secrets (key, encrypted_value, encryption_version)
|
||||
VALUES ($1, $2, $3)
|
||||
ON CONFLICT (key) DO UPDATE
|
||||
SET encrypted_value = $2, encryption_version = $3, updated_at = now()
|
||||
`, key, enc, ver)
|
||||
return err
|
||||
}
|
||||
@@ -0,0 +1,425 @@
|
||||
package codexauth
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// --- test doubles -----------------------------------------------------------
|
||||
|
||||
// fakeStore is an in-memory SecretStore. nil entry = absent key.
|
||||
type fakeStore struct {
|
||||
mu sync.Mutex
|
||||
values map[string]string
|
||||
getErr error
|
||||
putErr error
|
||||
puts int32 // count of successful Put calls
|
||||
}
|
||||
|
||||
func newFakeStore() *fakeStore { return &fakeStore{values: map[string]string{}} }
|
||||
|
||||
func (f *fakeStore) Get(_ context.Context, key string) (string, bool, error) {
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
if f.getErr != nil {
|
||||
return "", false, f.getErr
|
||||
}
|
||||
v, ok := f.values[key]
|
||||
return v, ok, nil
|
||||
}
|
||||
|
||||
func (f *fakeStore) Put(_ context.Context, key, value string) error {
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
if f.putErr != nil {
|
||||
return f.putErr
|
||||
}
|
||||
f.values[key] = value
|
||||
atomic.AddInt32(&f.puts, 1)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *fakeStore) get(key string) string {
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
return f.values[key]
|
||||
}
|
||||
|
||||
// fakeTransport records every request and returns a scripted response. It is
|
||||
// the network seam — tests NEVER make a real request.
|
||||
type fakeTransport struct {
|
||||
mu sync.Mutex
|
||||
calls int32
|
||||
urls []string
|
||||
methods []string
|
||||
bodies []string
|
||||
status int
|
||||
respBody string
|
||||
transport func(*http.Request) (*http.Response, error) // optional override
|
||||
}
|
||||
|
||||
func (t *fakeTransport) Do(req *http.Request) (*http.Response, error) {
|
||||
atomic.AddInt32(&t.calls, 1)
|
||||
t.mu.Lock()
|
||||
t.urls = append(t.urls, req.URL.String())
|
||||
t.methods = append(t.methods, req.Method)
|
||||
if req.Body != nil {
|
||||
b, _ := io.ReadAll(req.Body)
|
||||
t.bodies = append(t.bodies, string(b))
|
||||
} else {
|
||||
t.bodies = append(t.bodies, "")
|
||||
}
|
||||
t.mu.Unlock()
|
||||
|
||||
if t.transport != nil {
|
||||
return t.transport(req)
|
||||
}
|
||||
status := t.status
|
||||
if status == 0 {
|
||||
status = http.StatusOK
|
||||
}
|
||||
return &http.Response{
|
||||
StatusCode: status,
|
||||
Body: io.NopCloser(strings.NewReader(t.respBody)),
|
||||
Header: make(http.Header),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (t *fakeTransport) callCount() int { return int(atomic.LoadInt32(&t.calls)) }
|
||||
|
||||
// --- helpers ----------------------------------------------------------------
|
||||
|
||||
// makeJWT builds an unsigned-but-parseable JWT whose payload carries exp.
|
||||
func makeJWT(exp time.Time) string {
|
||||
header := base64.RawURLEncoding.EncodeToString([]byte(`{"alg":"none","typ":"JWT"}`))
|
||||
payload := base64.RawURLEncoding.EncodeToString([]byte(
|
||||
fmt.Sprintf(`{"exp":%d,"sub":"codex"}`, exp.Unix())))
|
||||
sig := base64.RawURLEncoding.EncodeToString([]byte("sig"))
|
||||
return header + "." + payload + "." + sig
|
||||
}
|
||||
|
||||
// authBlob builds a nested codex auth.json blob with the given tokens.
|
||||
func authBlob(access, refresh string) string {
|
||||
b, _ := json.Marshal(map[string]any{
|
||||
"tokens": map[string]any{
|
||||
"access_token": access,
|
||||
"refresh_token": refresh,
|
||||
"id_token": "id-original",
|
||||
},
|
||||
"OPENAI_API_KEY": nil,
|
||||
"last_refresh": "2026-01-01T00:00:00Z",
|
||||
})
|
||||
return string(b)
|
||||
}
|
||||
|
||||
func newTestRefresher(store SecretStore, client httpDoer, now time.Time) *refresher {
|
||||
return &refresher{
|
||||
store: store,
|
||||
client: client,
|
||||
now: func() time.Time { return now },
|
||||
}
|
||||
}
|
||||
|
||||
func okRefreshResponse(access, refresh string) string {
|
||||
b, _ := json.Marshal(oauthTokens{AccessToken: access, RefreshToken: refresh, IDToken: "id-new"})
|
||||
return string(b)
|
||||
}
|
||||
|
||||
// --- tests ------------------------------------------------------------------
|
||||
|
||||
// TestJWTExpParse covers the exp decode (valid, malformed, missing).
|
||||
func TestJWTExpParse(t *testing.T) {
|
||||
want := time.Now().Add(2 * time.Hour).Truncate(time.Second)
|
||||
got, ok := jwtExp(makeJWT(want))
|
||||
if !ok {
|
||||
t.Fatalf("jwtExp(valid) ok=false, want true")
|
||||
}
|
||||
if !got.Equal(want) {
|
||||
t.Errorf("jwtExp = %v, want %v", got, want)
|
||||
}
|
||||
|
||||
if _, ok := jwtExp("not-a-jwt"); ok {
|
||||
t.Errorf("jwtExp(non-jwt) ok=true, want false")
|
||||
}
|
||||
if _, ok := jwtExp("a.b.c"); ok {
|
||||
t.Errorf("jwtExp(garbage parts) ok=true, want false")
|
||||
}
|
||||
// 3 parts but payload has no exp.
|
||||
noExp := base64.RawURLEncoding.EncodeToString([]byte("{}"))
|
||||
if _, ok := jwtExp("h." + noExp + ".s"); ok {
|
||||
t.Errorf("jwtExp(no exp claim) ok=true, want false")
|
||||
}
|
||||
}
|
||||
|
||||
// TestRefreshOnce_SkipWhenFresh: a token well outside the safety margin is NOT
|
||||
// refreshed — no POST, no write-back.
|
||||
func TestRefreshOnce_SkipWhenFresh(t *testing.T) {
|
||||
now := time.Now()
|
||||
store := newFakeStore()
|
||||
store.values[CodexAuthSecretKey] = authBlob(makeJWT(now.Add(2*time.Hour)), "rt-1")
|
||||
tr := &fakeTransport{status: http.StatusOK, respBody: okRefreshResponse("new-at", "rt-2")}
|
||||
r := newTestRefresher(store, tr, now)
|
||||
|
||||
if perm := r.refreshOnce(context.Background()); perm {
|
||||
t.Fatalf("fresh token: permanentFailure=true, want false")
|
||||
}
|
||||
if tr.callCount() != 0 {
|
||||
t.Errorf("fresh token: %d OAuth POSTs, want 0", tr.callCount())
|
||||
}
|
||||
if atomic.LoadInt32(&store.puts) != 0 {
|
||||
t.Errorf("fresh token: %d write-backs, want 0", store.puts)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRefreshOnce_RotateThenReskip: a token inside the margin is refreshed once
|
||||
// (POST + write-back of the rotated blob); a subsequent call on the now-fresh
|
||||
// rotated token skips (no second POST). Proves rotate→write-back→re-skip.
|
||||
func TestRefreshOnce_RotateThenReskip(t *testing.T) {
|
||||
now := time.Now()
|
||||
store := newFakeStore()
|
||||
// Expires in 5m — inside the 15m safety margin → DUE.
|
||||
store.values[CodexAuthSecretKey] = authBlob(makeJWT(now.Add(5*time.Minute)), "rt-1")
|
||||
// Rotated access token is fresh (2h out); rotated refresh is rt-2.
|
||||
tr := &fakeTransport{status: http.StatusOK, respBody: okRefreshResponse(makeJWT(now.Add(2*time.Hour)), "rt-2")}
|
||||
r := newTestRefresher(store, tr, now)
|
||||
|
||||
if perm := r.refreshOnce(context.Background()); perm {
|
||||
t.Fatalf("due token: permanentFailure=true, want false")
|
||||
}
|
||||
if tr.callCount() != 1 {
|
||||
t.Fatalf("due token: %d OAuth POSTs, want exactly 1", tr.callCount())
|
||||
}
|
||||
if atomic.LoadInt32(&store.puts) != 1 {
|
||||
t.Fatalf("due token: %d write-backs, want exactly 1", store.puts)
|
||||
}
|
||||
|
||||
// The written blob must carry the rotated refresh_token and preserve the
|
||||
// non-token field.
|
||||
rotated := store.get(CodexAuthSecretKey)
|
||||
tokens, err := parseTokens(rotated)
|
||||
if err != nil {
|
||||
t.Fatalf("parse rotated blob: %v", err)
|
||||
}
|
||||
if tokens.RefreshToken != "rt-2" {
|
||||
t.Errorf("rotated refresh_token = %q, want rt-2", tokens.RefreshToken)
|
||||
}
|
||||
if !strings.Contains(rotated, "last_refresh") {
|
||||
t.Errorf("rotated blob dropped the preserved last_refresh field: %s", rotated)
|
||||
}
|
||||
|
||||
// Second call: the rotated access token is fresh → skip, no new POST.
|
||||
if perm := r.refreshOnce(context.Background()); perm {
|
||||
t.Fatalf("re-skip: permanentFailure=true, want false")
|
||||
}
|
||||
if tr.callCount() != 1 {
|
||||
t.Errorf("re-skip: %d total OAuth POSTs, want still 1", tr.callCount())
|
||||
}
|
||||
if atomic.LoadInt32(&store.puts) != 1 {
|
||||
t.Errorf("re-skip: %d total write-backs, want still 1", store.puts)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRefreshOnce_NoSecretInert: absent CODEX_AUTH_JSON → inert (no POST, no
|
||||
// write-back, no error/permanent).
|
||||
func TestRefreshOnce_NoSecretInert(t *testing.T) {
|
||||
store := newFakeStore() // empty
|
||||
tr := &fakeTransport{}
|
||||
r := newTestRefresher(store, tr, time.Now())
|
||||
|
||||
if perm := r.refreshOnce(context.Background()); perm {
|
||||
t.Fatalf("no secret: permanentFailure=true, want false")
|
||||
}
|
||||
if tr.callCount() != 0 {
|
||||
t.Errorf("no secret: %d POSTs, want 0", tr.callCount())
|
||||
}
|
||||
if atomic.LoadInt32(&store.puts) != 0 {
|
||||
t.Errorf("no secret: %d write-backs, want 0", store.puts)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRefreshOnce_PermanentFailNoWriteNoStorm: a 400 invalid_grant must (a) not
|
||||
// write back, (b) return permanentFailure=true, and (c) NOT re-POST on the next
|
||||
// cycle for the same (burned) seed — the anti-storm latch.
|
||||
func TestRefreshOnce_PermanentFailNoWriteNoStorm(t *testing.T) {
|
||||
now := time.Now()
|
||||
store := newFakeStore()
|
||||
store.values[CodexAuthSecretKey] = authBlob(makeJWT(now.Add(1*time.Minute)), "rt-burned")
|
||||
tr := &fakeTransport{
|
||||
status: http.StatusBadRequest,
|
||||
respBody: `{"error":"invalid_grant","error_description":"refresh token already used"}`,
|
||||
}
|
||||
r := newTestRefresher(store, tr, now)
|
||||
|
||||
perm := r.refreshOnce(context.Background())
|
||||
if !perm {
|
||||
t.Fatalf("invalid_grant: permanentFailure=false, want true")
|
||||
}
|
||||
if tr.callCount() != 1 {
|
||||
t.Fatalf("invalid_grant: %d POSTs, want exactly 1", tr.callCount())
|
||||
}
|
||||
if atomic.LoadInt32(&store.puts) != 0 {
|
||||
t.Fatalf("invalid_grant: %d write-backs, want 0 (must NOT persist a failed refresh)", store.puts)
|
||||
}
|
||||
|
||||
// Next cycle, SAME burned seed: must NOT re-POST (anti-storm latch).
|
||||
perm2 := r.refreshOnce(context.Background())
|
||||
if tr.callCount() != 1 {
|
||||
t.Errorf("anti-storm: re-POSTed a burned refresh_token (%d total POSTs, want still 1)", tr.callCount())
|
||||
}
|
||||
_ = perm2 // latched cycle returns false (already-known failure, nothing new)
|
||||
|
||||
// A RE-SEED (blob changes) clears the latch and allows a fresh attempt.
|
||||
store.mu.Lock()
|
||||
store.values[CodexAuthSecretKey] = authBlob(makeJWT(now.Add(1*time.Minute)), "rt-freshly-seeded")
|
||||
store.mu.Unlock()
|
||||
tr.status = http.StatusOK
|
||||
tr.respBody = okRefreshResponse(makeJWT(now.Add(2*time.Hour)), "rt-rotated")
|
||||
if perm := r.refreshOnce(context.Background()); perm {
|
||||
t.Fatalf("post-reseed: permanentFailure=true, want false")
|
||||
}
|
||||
if tr.callCount() != 2 {
|
||||
t.Errorf("post-reseed: %d total POSTs, want 2 (latch should clear on re-seed)", tr.callCount())
|
||||
}
|
||||
}
|
||||
|
||||
// TestRefreshOnce_TransientNoWriteNoLatch: a 5xx is transient — no write-back,
|
||||
// returns false (no hard backoff latch), and a later cycle retries.
|
||||
func TestRefreshOnce_TransientNoWriteNoLatch(t *testing.T) {
|
||||
now := time.Now()
|
||||
store := newFakeStore()
|
||||
store.values[CodexAuthSecretKey] = authBlob(makeJWT(now.Add(1*time.Minute)), "rt-1")
|
||||
tr := &fakeTransport{status: http.StatusServiceUnavailable, respBody: "upstream down"}
|
||||
r := newTestRefresher(store, tr, now)
|
||||
|
||||
if perm := r.refreshOnce(context.Background()); perm {
|
||||
t.Fatalf("503: permanentFailure=true, want false (transient)")
|
||||
}
|
||||
if atomic.LoadInt32(&store.puts) != 0 {
|
||||
t.Errorf("503: %d write-backs, want 0", store.puts)
|
||||
}
|
||||
// Retry next cycle succeeds (no latch on transient).
|
||||
tr.status = http.StatusOK
|
||||
tr.respBody = okRefreshResponse(makeJWT(now.Add(2*time.Hour)), "rt-2")
|
||||
if perm := r.refreshOnce(context.Background()); perm {
|
||||
t.Fatalf("retry after 503: permanentFailure=true, want false")
|
||||
}
|
||||
if tr.callCount() != 2 {
|
||||
t.Errorf("transient retry: %d total POSTs, want 2", tr.callCount())
|
||||
}
|
||||
if atomic.LoadInt32(&store.puts) != 1 {
|
||||
t.Errorf("transient retry: %d write-backs, want 1", store.puts)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRefreshOnce_SingleFlight: concurrent refreshOnce calls on a DUE token must
|
||||
// POST exactly once total — the package mutex serializes them and the second
|
||||
// sees the freshly-rotated (now-fresh) token and skips. Structural single-flight.
|
||||
func TestRefreshOnce_SingleFlight(t *testing.T) {
|
||||
now := time.Now()
|
||||
store := newFakeStore()
|
||||
store.values[CodexAuthSecretKey] = authBlob(makeJWT(now.Add(1*time.Minute)), "rt-1")
|
||||
// Every successful rotation yields a FRESH (2h) access token, so once one
|
||||
// caller rotates, the other sees fresh and skips.
|
||||
tr := &fakeTransport{status: http.StatusOK, respBody: okRefreshResponse(makeJWT(now.Add(2*time.Hour)), "rt-2")}
|
||||
r := newTestRefresher(store, tr, now)
|
||||
|
||||
const n = 16
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(n)
|
||||
for i := 0; i < n; i++ {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
r.refreshOnce(context.Background())
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
if tr.callCount() != 1 {
|
||||
t.Errorf("single-flight: %d OAuth POSTs across %d concurrent calls, want exactly 1", tr.callCount(), n)
|
||||
}
|
||||
if atomic.LoadInt32(&store.puts) != 1 {
|
||||
t.Errorf("single-flight: %d write-backs, want exactly 1", store.puts)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRefreshOnce_PostsExactlyOnceToOAuthEndpoint: when it DOES refresh, the
|
||||
// single POST goes to the OAuth token URL with the refresh_token grant body.
|
||||
func TestRefreshOnce_PostsExactlyOnceToOAuthEndpoint(t *testing.T) {
|
||||
now := time.Now()
|
||||
store := newFakeStore()
|
||||
store.values[CodexAuthSecretKey] = authBlob(makeJWT(now.Add(1*time.Minute)), "rt-secret")
|
||||
tr := &fakeTransport{status: http.StatusOK, respBody: okRefreshResponse(makeJWT(now.Add(2*time.Hour)), "rt-2")}
|
||||
r := newTestRefresher(store, tr, now)
|
||||
|
||||
r.refreshOnce(context.Background())
|
||||
|
||||
if tr.callCount() != 1 {
|
||||
t.Fatalf("%d POSTs, want exactly 1", tr.callCount())
|
||||
}
|
||||
if tr.urls[0] != oauthTokenURL {
|
||||
t.Errorf("POST URL = %q, want %q", tr.urls[0], oauthTokenURL)
|
||||
}
|
||||
if tr.methods[0] != http.MethodPost {
|
||||
t.Errorf("method = %q, want POST", tr.methods[0])
|
||||
}
|
||||
var body map[string]string
|
||||
if err := json.Unmarshal([]byte(tr.bodies[0]), &body); err != nil {
|
||||
t.Fatalf("request body not json: %v (%s)", err, tr.bodies[0])
|
||||
}
|
||||
if body["grant_type"] != "refresh_token" {
|
||||
t.Errorf("grant_type = %q, want refresh_token", body["grant_type"])
|
||||
}
|
||||
if body["refresh_token"] != "rt-secret" {
|
||||
t.Errorf("refresh_token = %q, want rt-secret", body["refresh_token"])
|
||||
}
|
||||
if body["client_id"] != codexOAuthClientID {
|
||||
t.Errorf("client_id = %q, want %q", body["client_id"], codexOAuthClientID)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRefreshOnce_ReadErrorSkips: a store read error is a transient skip (no
|
||||
// POST, no permanent latch).
|
||||
func TestRefreshOnce_ReadErrorSkips(t *testing.T) {
|
||||
store := newFakeStore()
|
||||
store.getErr = fmt.Errorf("db down")
|
||||
tr := &fakeTransport{}
|
||||
r := newTestRefresher(store, tr, time.Now())
|
||||
if perm := r.refreshOnce(context.Background()); perm {
|
||||
t.Errorf("read error: permanentFailure=true, want false")
|
||||
}
|
||||
if tr.callCount() != 0 {
|
||||
t.Errorf("read error: %d POSTs, want 0", tr.callCount())
|
||||
}
|
||||
}
|
||||
|
||||
// TestMergeTokens_PreservesOtherFields proves the rotated write-back keeps every
|
||||
// non-token field and does not clobber id_token with an empty rotated value.
|
||||
func TestMergeTokens_PreservesOtherFields(t *testing.T) {
|
||||
blob := authBlob("old-at", "old-rt")
|
||||
out, err := mergeTokens(blob, oauthTokens{AccessToken: "new-at", RefreshToken: "new-rt"}) // no id_token
|
||||
if err != nil {
|
||||
t.Fatalf("mergeTokens: %v", err)
|
||||
}
|
||||
tokens, err := parseTokens(out)
|
||||
if err != nil {
|
||||
t.Fatalf("parse merged: %v", err)
|
||||
}
|
||||
if tokens.AccessToken != "new-at" || tokens.RefreshToken != "new-rt" {
|
||||
t.Errorf("merged tokens = %+v, want new-at/new-rt", tokens)
|
||||
}
|
||||
if tokens.IDToken != "id-original" {
|
||||
t.Errorf("empty rotated id_token clobbered the original: got %q, want id-original", tokens.IDToken)
|
||||
}
|
||||
if !strings.Contains(out, "last_refresh") {
|
||||
t.Errorf("merge dropped preserved field: %s", out)
|
||||
}
|
||||
}
|
||||
@@ -334,28 +334,39 @@ func (h *WorkspaceHandler) ProxyA2A(c *gin.Context) {
|
||||
c.Data(status, "application/json", respBody)
|
||||
}
|
||||
|
||||
// checkWorkspaceBudget returns a proxyA2AError with 402 when the workspace
|
||||
// has a budget_limit set and monthly_spend has reached or exceeded it.
|
||||
// DB errors are logged and treated as fail-open — a budget check failure
|
||||
// must not block legitimate A2A traffic.
|
||||
// checkWorkspaceBudget returns a proxyA2AError with 402 when the workspace has
|
||||
// exceeded ANY of its configured per-period budget limits (hourly/daily/weekly/
|
||||
// monthly — see budget_periods.go). Per-period spend is the rolling-window sum
|
||||
// over the workspace_spend_events ledger. DB errors are logged and treated as
|
||||
// fail-open — a budget check failure must not block legitimate A2A traffic.
|
||||
func (h *WorkspaceHandler) checkWorkspaceBudget(ctx context.Context, workspaceID string) *proxyA2AError {
|
||||
var budgetLimit sql.NullInt64
|
||||
var monthlySpend int64
|
||||
err := db.DB.QueryRowContext(ctx,
|
||||
`SELECT budget_limit, COALESCE(monthly_spend, 0) FROM workspaces WHERE id = $1`,
|
||||
var limitsRaw []byte
|
||||
if err := db.DB.QueryRowContext(ctx,
|
||||
`SELECT COALESCE(budget_limits, '{}'::jsonb) FROM workspaces WHERE id = $1`,
|
||||
workspaceID,
|
||||
).Scan(&budgetLimit, &monthlySpend)
|
||||
if err != nil {
|
||||
).Scan(&limitsRaw); err != nil {
|
||||
if err != sql.ErrNoRows {
|
||||
log.Printf("ProxyA2A: budget check failed for %s: %v", workspaceID, err)
|
||||
}
|
||||
return nil // fail-open
|
||||
}
|
||||
if budgetLimit.Valid && monthlySpend >= budgetLimit.Int64 {
|
||||
log.Printf("ProxyA2A: budget exceeded for %s (spend=%d limit=%d)", workspaceID, monthlySpend, budgetLimit.Int64)
|
||||
limits := parseBudgetLimits(limitsRaw)
|
||||
if len(limits) == 0 {
|
||||
return nil // no limits configured
|
||||
}
|
||||
spend, err := spendByPeriod(ctx, db.DB, workspaceID)
|
||||
if err != nil {
|
||||
log.Printf("ProxyA2A: budget spend query failed for %s: %v", workspaceID, err)
|
||||
return nil // fail-open
|
||||
}
|
||||
if over := exceededPeriods(limits, spend); len(over) > 0 {
|
||||
log.Printf("ProxyA2A: budget exceeded for %s (periods=%v limits=%v spend=%v)", workspaceID, over, limits, spend)
|
||||
return &proxyA2AError{
|
||||
Status: http.StatusPaymentRequired,
|
||||
Response: gin.H{"error": "workspace budget limit exceeded"},
|
||||
Status: http.StatusPaymentRequired,
|
||||
Response: gin.H{
|
||||
"error": "workspace budget limit exceeded",
|
||||
"exceeded_periods": over,
|
||||
},
|
||||
}
|
||||
}
|
||||
return nil
|
||||
@@ -375,6 +386,30 @@ func (h *WorkspaceHandler) proxyA2ARequest(ctx context.Context, workspaceID stri
|
||||
Response: gin.H{"error": "access denied: workspaces cannot communicate per hierarchy rules"},
|
||||
}
|
||||
}
|
||||
|
||||
// #1953 cross-tenant isolation. CanCommunicate alone does NOT enforce
|
||||
// org boundaries: its "root-level siblings — both have no parent" rule
|
||||
// treats every tenant's org root as a sibling, so a caller that is an
|
||||
// org root could resolve and route a2a to another tenant's org root
|
||||
// (and resolveAgentURL accepts ANY workspace id with no org check).
|
||||
// Gate on the SAME parent_id-chain org scoping the OFFSEC-015 broadcast
|
||||
// fix uses: reject before resolveAgentURL when caller and target are in
|
||||
// different orgs. Fail-closed — a DB error denies cross-org routing.
|
||||
ok, err := sameOrg(ctx, db.DB, callerID, workspaceID)
|
||||
if err != nil {
|
||||
log.Printf("ProxyA2A: org-scope check failed %s → %s: %v — denying", callerID, workspaceID, err)
|
||||
return 0, nil, &proxyA2AError{
|
||||
Status: http.StatusForbidden,
|
||||
Response: gin.H{"error": "access denied: org isolation check failed"},
|
||||
}
|
||||
}
|
||||
if !ok {
|
||||
log.Printf("ProxyA2A: cross-org routing denied %s → %s (#1953)", callerID, workspaceID)
|
||||
return 0, nil, &proxyA2AError{
|
||||
Status: http.StatusForbidden,
|
||||
Response: gin.H{"error": "access denied: target workspace is in a different org"},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Budget enforcement: reject A2A calls when the workspace has exceeded its
|
||||
|
||||
@@ -16,9 +16,9 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/provisioner"
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
@@ -437,6 +437,10 @@ func TestProxyA2A_CallerIDPropagated(t *testing.T) {
|
||||
WithArgs("ws-target").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-target", "ws-parent"))
|
||||
|
||||
// #1953 cross-tenant guard: same-org check after CanCommunicate. Both
|
||||
// workspaces resolve to the same org root → routing allowed.
|
||||
mockSameOrg(mock, "ws-caller", "ws-target", true)
|
||||
|
||||
expectBudgetCheck(mock, "ws-target")
|
||||
|
||||
// Expect activity log with source_id set
|
||||
@@ -465,6 +469,24 @@ func TestProxyA2A_CallerIDPropagated(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// mockSameOrg sets up the two org-root recursive-CTE expectations that the
|
||||
// #1953 cross-tenant guard in proxyA2ARequest runs after CanCommunicate passes.
|
||||
// sameOrg=true returns the SAME root_id for both caller and target (same tenant);
|
||||
// sameOrg=false returns different root_ids (cross-tenant → routing must be denied).
|
||||
func mockSameOrg(mock sqlmock.Sqlmock, caller, target string, sameOrg bool) {
|
||||
callerRoot := "org-root-shared"
|
||||
targetRoot := "org-root-shared"
|
||||
if !sameOrg {
|
||||
targetRoot = "org-root-other-tenant"
|
||||
}
|
||||
mock.ExpectQuery("WITH RECURSIVE org_chain AS").
|
||||
WithArgs(caller).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(callerRoot))
|
||||
mock.ExpectQuery("WITH RECURSIVE org_chain AS").
|
||||
WithArgs(target).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(targetRoot))
|
||||
}
|
||||
|
||||
// mockCanCommunicate sets up sqlmock expectations for CanCommunicate(caller, target).
|
||||
// allowed=true sets up rows that satisfy the access policy (siblings under same parent).
|
||||
// allowed=false sets up rows that don't (different parents).
|
||||
@@ -659,6 +681,9 @@ func TestProxyA2A_CallerIDDerivedFromBearer(t *testing.T) {
|
||||
WithArgs("ws-target").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-target", "ws-parent"))
|
||||
|
||||
// 3b. #1953 cross-tenant guard — same org root → routing allowed.
|
||||
mockSameOrg(mock, "ws-caller", "ws-target", true)
|
||||
|
||||
expectBudgetCheck(mock, "ws-target")
|
||||
|
||||
// 4. activity_logs INSERT — verify source_id arg is the derived ws-caller
|
||||
@@ -2092,6 +2117,10 @@ func (f *fakeCPProv) Stop(_ context.Context, _ string) error {
|
||||
f.stopCalls++
|
||||
return nil
|
||||
}
|
||||
func (f *fakeCPProv) StopAndPrune(_ context.Context, _ string) error {
|
||||
f.stopCalls++
|
||||
return nil
|
||||
}
|
||||
func (f *fakeCPProv) GetConsoleOutput(_ context.Context, _ string) (string, error) {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
@@ -18,8 +18,8 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"github.com/alicebob/miniredis/v2"
|
||||
)
|
||||
|
||||
@@ -209,10 +209,12 @@ func drainSetup(t *testing.T, workspaceID string) (sqlmock.Sqlmock, *WorkspaceHa
|
||||
// Named distinctly from handlers_test.go's expectBudgetCheck (which uses MatchPsql
|
||||
// escaped-regex and cannot be reused with QueryMatcherEqual tests).
|
||||
func expectQueueBudgetCheck(mock sqlmock.Sqlmock, workspaceID string) {
|
||||
// Multi-period (#49): exact-match the budget_limits read; "{}" → no limits →
|
||||
// checkWorkspaceBudget returns early (no spend query).
|
||||
mock.ExpectQuery(
|
||||
"SELECT budget_limit, COALESCE(monthly_spend, 0) FROM workspaces WHERE id = $1",
|
||||
"SELECT COALESCE(budget_limits, '{}'::jsonb) FROM workspaces WHERE id = $1",
|
||||
).WithArgs(workspaceID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}))
|
||||
WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).AddRow([]byte("{}")))
|
||||
}
|
||||
|
||||
// seedRedisURL puts the agent server URL into the Redis cache so resolveAgentURL
|
||||
|
||||
@@ -148,6 +148,125 @@ func (h *AdminSchedulesHealthHandler) Health(c *gin.Context) {
|
||||
c.JSON(http.StatusOK, entries)
|
||||
}
|
||||
|
||||
// orphanScheduleEntry is one row in the Orphans response.
|
||||
type orphanScheduleEntry struct {
|
||||
WorkspaceID string `json:"workspace_id"`
|
||||
WorkspaceStatus string `json:"workspace_status"` // "removed" | "missing"
|
||||
ScheduleID string `json:"schedule_id"`
|
||||
ScheduleName string `json:"schedule_name"`
|
||||
Source string `json:"source"`
|
||||
Enabled bool `json:"enabled"`
|
||||
CronExpr string `json:"cron_expr"`
|
||||
}
|
||||
|
||||
// Orphans handles GET /admin/schedules/orphans — the monitor surface for
|
||||
// internal#2006. Health (above) reports only LIVE workspaces' schedules, so a
|
||||
// schedule left on a removed/recreated workspace silently stops firing and
|
||||
// never appears there. This endpoint lists exactly those orphans (workspace
|
||||
// removed OR missing) so an operator/monitor can alert. Returns 200 + JSON
|
||||
// array (empty when none). Auth via adminAuth() in router.go.
|
||||
func (h *AdminSchedulesHealthHandler) Orphans(c *gin.Context) {
|
||||
ctx := c.Request.Context()
|
||||
rows, err := db.DB.QueryContext(ctx, `
|
||||
SELECT s.workspace_id,
|
||||
CASE WHEN w.id IS NULL THEN 'missing' ELSE 'removed' END AS ws_status,
|
||||
s.id, s.name, COALESCE(s.source, ''), s.enabled, s.cron_expr
|
||||
FROM workspace_schedules s
|
||||
LEFT JOIN workspaces w ON w.id = s.workspace_id
|
||||
WHERE w.id IS NULL OR w.status = 'removed'
|
||||
ORDER BY s.name ASC
|
||||
`)
|
||||
if err != nil {
|
||||
log.Printf("AdminSchedulesOrphans: query error: %v", err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to query orphans"})
|
||||
return
|
||||
}
|
||||
defer rows.Close()
|
||||
out := make([]orphanScheduleEntry, 0)
|
||||
for rows.Next() {
|
||||
var e orphanScheduleEntry
|
||||
if err := rows.Scan(&e.WorkspaceID, &e.WorkspaceStatus, &e.ScheduleID, &e.ScheduleName, &e.Source, &e.Enabled, &e.CronExpr); err != nil {
|
||||
log.Printf("AdminSchedulesOrphans: scan error: %v", err)
|
||||
continue
|
||||
}
|
||||
out = append(out, e)
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
log.Printf("AdminSchedulesOrphans: rows iteration error: %v", err)
|
||||
}
|
||||
c.JSON(http.StatusOK, out)
|
||||
}
|
||||
|
||||
// ReapOrphans handles POST /admin/schedules/reap-orphans — the orphan cleaner
|
||||
// (internal#2006). For every schedule bound to a removed/nonexistent workspace
|
||||
// it re-points runtime-created schedules onto the live successor agent (matched
|
||||
// by role+parent, falling back to name+parent) when one exists and doesn't
|
||||
// already carry a same-named schedule; schedules with no live successor are
|
||||
// disabled (enabled=false) so the scheduler stops firing into a dead workspace.
|
||||
// Idempotent: re-running with no orphans is a no-op. Returns a summary count.
|
||||
// Auth is enforced by the adminAuth() middleware registered in router.go.
|
||||
func (h *AdminSchedulesHealthHandler) ReapOrphans(c *gin.Context) {
|
||||
ctx := c.Request.Context()
|
||||
|
||||
// 1. Re-point runtime schedules onto a live successor (same role+parent,
|
||||
// else same name+parent). Skip names already present on the successor.
|
||||
repointed, err := db.DB.ExecContext(ctx, `
|
||||
WITH orphan AS (
|
||||
SELECT s.id, s.name, s.workspace_id, prev.role AS role, prev.parent_id AS parent_id
|
||||
FROM workspace_schedules s
|
||||
JOIN workspaces prev ON prev.id = s.workspace_id
|
||||
WHERE prev.status = 'removed' AND s.source = 'runtime'
|
||||
),
|
||||
successor AS (
|
||||
SELECT o.id AS schedule_id, o.name AS schedule_name,
|
||||
(
|
||||
SELECT w.id FROM workspaces w
|
||||
WHERE w.status != 'removed'
|
||||
AND w.parent_id IS NOT DISTINCT FROM o.parent_id
|
||||
AND ((o.role IS NOT NULL AND w.role = o.role))
|
||||
ORDER BY w.updated_at DESC NULLS LAST LIMIT 1
|
||||
) AS live_id
|
||||
FROM orphan o
|
||||
)
|
||||
UPDATE workspace_schedules s
|
||||
SET workspace_id = su.live_id, updated_at = now()
|
||||
FROM successor su
|
||||
WHERE s.id = su.schedule_id
|
||||
AND su.live_id IS NOT NULL
|
||||
AND NOT EXISTS (
|
||||
SELECT 1 FROM workspace_schedules t
|
||||
WHERE t.workspace_id = su.live_id AND t.name = su.schedule_name
|
||||
)
|
||||
`)
|
||||
if err != nil {
|
||||
log.Printf("ReapOrphans: re-point error: %v", err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "re-point failed"})
|
||||
return
|
||||
}
|
||||
repointedN, _ := repointed.RowsAffected()
|
||||
|
||||
// 2. Disable any remaining schedules still bound to a removed/missing
|
||||
// workspace (no live successor, or template schedules on a dead row).
|
||||
disabled, err := db.DB.ExecContext(ctx, `
|
||||
UPDATE workspace_schedules s
|
||||
SET enabled = false, updated_at = now()
|
||||
WHERE s.enabled = true
|
||||
AND NOT EXISTS (
|
||||
SELECT 1 FROM workspaces w
|
||||
WHERE w.id = s.workspace_id AND w.status != 'removed'
|
||||
)
|
||||
`)
|
||||
if err != nil {
|
||||
log.Printf("ReapOrphans: disable error: %v", err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "disable failed"})
|
||||
return
|
||||
}
|
||||
disabledN, _ := disabled.RowsAffected()
|
||||
|
||||
log.Printf("ReapOrphans: re-pointed %d, disabled %d orphaned schedule(s)", repointedN, disabledN)
|
||||
c.JSON(http.StatusOK, gin.H{"repointed": repointedN, "disabled": disabledN})
|
||||
}
|
||||
|
||||
// classifyScheduleStatus returns the health status string for a schedule.
|
||||
// - "never_run" — last_run_at is NULL (schedule has never fired)
|
||||
// - "stale" — now - last_run_at > staleThreshold (and threshold > 0)
|
||||
|
||||
@@ -444,3 +444,72 @@ func TestAdminSchedulesHealth_ResponseFields(t *testing.T) {
|
||||
t.Fatalf("unmet expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== Orphans + ReapOrphans (internal#2006) ====================
|
||||
|
||||
// TestAdminSchedulesOrphans verifies the monitor surface lists schedules bound
|
||||
// to a removed/missing workspace (the recreate-orphan failure mode).
|
||||
func TestAdminSchedulesOrphans(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
handler := NewAdminSchedulesHealthHandler()
|
||||
|
||||
mock.ExpectQuery(`LEFT JOIN workspaces`).
|
||||
WillReturnRows(sqlmock.NewRows([]string{
|
||||
"workspace_id", "ws_status", "id", "name", "source", "enabled", "cron_expr",
|
||||
}).AddRow("dead-ws", "removed", "sched-1", "minimax-autonomous-tick", "runtime", false, "*/5 * * * *"))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/admin/schedules/orphans", nil)
|
||||
|
||||
handler.Orphans(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
var resp []orphanScheduleEntry
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("parse response: %v", err)
|
||||
}
|
||||
if len(resp) != 1 {
|
||||
t.Fatalf("expected 1 orphan, got %d", len(resp))
|
||||
}
|
||||
if resp[0].ScheduleName != "minimax-autonomous-tick" || resp[0].WorkspaceStatus != "removed" || resp[0].Source != "runtime" {
|
||||
t.Errorf("unexpected orphan entry: %+v", resp[0])
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Fatalf("unmet expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestReapOrphans verifies the cleaner re-points runtime schedules onto a live
|
||||
// successor then disables any remaining dead-bound schedules, returning counts.
|
||||
func TestReapOrphans(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
handler := NewAdminSchedulesHealthHandler()
|
||||
|
||||
mock.ExpectExec(`UPDATE workspace_schedules s\s+SET workspace_id`).
|
||||
WillReturnResult(sqlmock.NewResult(0, 2))
|
||||
mock.ExpectExec(`UPDATE workspace_schedules s\s+SET enabled = false`).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("POST", "/admin/schedules/reap-orphans", nil)
|
||||
|
||||
handler.ReapOrphans(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
var resp map[string]int64
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("parse response: %v", err)
|
||||
}
|
||||
if resp["repointed"] != 2 || resp["disabled"] != 1 {
|
||||
t.Errorf("expected repointed=2 disabled=1, got %+v", resp)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Fatalf("unmet expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"log"
|
||||
"net/http"
|
||||
|
||||
@@ -12,42 +14,79 @@ import (
|
||||
// BudgetHandler exposes per-workspace budget read/write endpoints.
|
||||
// Routes (all behind WorkspaceAuth middleware):
|
||||
//
|
||||
// GET /workspaces/:id/budget — current budget_limit, monthly_spend, budget_remaining
|
||||
// PATCH /workspaces/:id/budget — set or clear budget_limit
|
||||
// GET /workspaces/:id/budget — per-period limits, spend, remaining
|
||||
// PATCH /workspaces/:id/budget — set/clear per-period limits
|
||||
//
|
||||
// Multi-period (#49): the budget is now four independent rolling windows —
|
||||
// hourly/daily/weekly/monthly (budget_periods.go is the SSOT for the set). The
|
||||
// canonical config is workspaces.budget_limits (JSONB, USD cents per period);
|
||||
// per-period spend is the rolling-window sum over workspace_spend_events. The
|
||||
// legacy single monthly budget_limit / monthly_spend are still emitted (and
|
||||
// budget_limit kept in sync to the monthly period) for back-compat with
|
||||
// pre-deploy canvas/agent builds during the rollout window.
|
||||
type BudgetHandler struct{}
|
||||
|
||||
func NewBudgetHandler() *BudgetHandler { return &BudgetHandler{} }
|
||||
|
||||
// budgetResponse is the canonical JSON shape for both GET and PATCH responses.
|
||||
// periodBudget is the per-period view: configured ceiling (null = no limit),
|
||||
// rolling-window spend, and remaining headroom (null when no limit; may go
|
||||
// negative so callers see how far over a period is).
|
||||
type periodBudget struct {
|
||||
Limit *int64 `json:"limit"`
|
||||
Spend int64 `json:"spend"`
|
||||
Remaining *int64 `json:"remaining"`
|
||||
}
|
||||
|
||||
// budgetResponse is the canonical JSON shape for GET and PATCH.
|
||||
type budgetResponse struct {
|
||||
// BudgetLimit is the monthly spend ceiling in USD cents (null = no limit).
|
||||
// budget_limit=500 means $5.00/month.
|
||||
BudgetLimit *int64 `json:"budget_limit"`
|
||||
// MonthlySpend is the agent's self-reported accumulated LLM API spend
|
||||
// for the current month (USD cents). Incremented via heartbeat.
|
||||
MonthlySpend int64 `json:"monthly_spend"`
|
||||
// BudgetRemaining is null when BudgetLimit is null, otherwise
|
||||
// max(0, budget_limit - monthly_spend). Can be negative — we store the
|
||||
// actual value so callers can see how far over-budget a workspace is.
|
||||
// Periods is keyed by BudgetPeriod ("hourly"/"daily"/"weekly"/"monthly").
|
||||
Periods map[string]periodBudget `json:"periods"`
|
||||
|
||||
// --- back-compat (monthly), for pre-multi-period clients ---
|
||||
BudgetLimit *int64 `json:"budget_limit"`
|
||||
MonthlySpend int64 `json:"monthly_spend"`
|
||||
BudgetRemaining *int64 `json:"budget_remaining"`
|
||||
}
|
||||
|
||||
// buildBudgetResponse assembles the per-period view from the stored limits +
|
||||
// the ledger spend. Single place so GET and PATCH return identical shapes.
|
||||
func buildBudgetResponse(ctx context.Context, workspaceID string, limitsRaw []byte) (budgetResponse, error) {
|
||||
limits := parseBudgetLimits(limitsRaw)
|
||||
spend, err := spendByPeriod(ctx, db.DB, workspaceID)
|
||||
if err != nil {
|
||||
return budgetResponse{}, err
|
||||
}
|
||||
periods := make(map[string]periodBudget, len(budgetPeriods))
|
||||
for _, def := range budgetPeriods {
|
||||
pb := periodBudget{Spend: spend[def.Name]}
|
||||
if lim, ok := limits[def.Name]; ok {
|
||||
l := lim
|
||||
pb.Limit = &l
|
||||
r := lim - spend[def.Name]
|
||||
pb.Remaining = &r
|
||||
}
|
||||
periods[string(def.Name)] = pb
|
||||
}
|
||||
resp := budgetResponse{Periods: periods, MonthlySpend: spend[PeriodMonthly]}
|
||||
if m := periods[string(PeriodMonthly)]; m.Limit != nil {
|
||||
resp.BudgetLimit = m.Limit
|
||||
resp.BudgetRemaining = m.Remaining
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
// GetBudget handles GET /workspaces/:id/budget.
|
||||
// Returns the workspace's current budget ceiling, accumulated spend, and
|
||||
// computed remaining headroom. Both budget_limit and budget_remaining are
|
||||
// null when no limit has been configured for the workspace.
|
||||
func (h *BudgetHandler) GetBudget(c *gin.Context) {
|
||||
workspaceID := c.Param("id")
|
||||
ctx := c.Request.Context()
|
||||
|
||||
var budgetLimit sql.NullInt64
|
||||
var monthlySpend int64
|
||||
var limitsRaw []byte
|
||||
err := db.DB.QueryRowContext(ctx,
|
||||
`SELECT budget_limit, COALESCE(monthly_spend, 0)
|
||||
`SELECT COALESCE(budget_limits, '{}'::jsonb)
|
||||
FROM workspaces
|
||||
WHERE id = $1 AND status != 'removed'`,
|
||||
workspaceID,
|
||||
).Scan(&budgetLimit, &monthlySpend)
|
||||
).Scan(&limitsRaw)
|
||||
if err == sql.ErrNoRows {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
|
||||
return
|
||||
@@ -58,66 +97,80 @@ func (h *BudgetHandler) GetBudget(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
resp := budgetResponse{
|
||||
MonthlySpend: monthlySpend,
|
||||
resp, err := buildBudgetResponse(ctx, workspaceID, limitsRaw)
|
||||
if err != nil {
|
||||
log.Printf("GetBudget: spend query failed for %s: %v", workspaceID, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "query failed"})
|
||||
return
|
||||
}
|
||||
if budgetLimit.Valid {
|
||||
limit := budgetLimit.Int64
|
||||
resp.BudgetLimit = &limit
|
||||
remaining := limit - monthlySpend
|
||||
resp.BudgetRemaining = &remaining
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, resp)
|
||||
}
|
||||
|
||||
// PatchBudget handles PATCH /workspaces/:id/budget.
|
||||
// Accepts {"budget_limit": <int64>} to set a new ceiling, or
|
||||
// {"budget_limit": null} to remove an existing ceiling.
|
||||
// Returns the updated budget state in the same shape as GetBudget.
|
||||
// PatchBudget handles PATCH /workspaces/:id/budget. Accepts EITHER the
|
||||
// multi-period shape
|
||||
//
|
||||
// {"budget_limits": {"hourly": 100, "daily": null, "weekly": 500, "monthly": 2000}}
|
||||
//
|
||||
// (a per-period value of null/absent clears that period; a positive int sets it)
|
||||
// OR the legacy single-monthly shape {"budget_limit": 2000} / {"budget_limit": null}.
|
||||
func (h *BudgetHandler) PatchBudget(c *gin.Context) {
|
||||
workspaceID := c.Param("id")
|
||||
ctx := c.Request.Context()
|
||||
|
||||
// We need to distinguish between "field absent" and "field = null",
|
||||
// so we unmarshal into a raw map first.
|
||||
var raw map[string]interface{}
|
||||
var raw map[string]json.RawMessage
|
||||
if err := c.ShouldBindJSON(&raw); err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body"})
|
||||
return
|
||||
}
|
||||
|
||||
budgetLimitRaw, ok := raw["budget_limit"]
|
||||
if !ok {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limit field is required"})
|
||||
_, hasLimits := raw["budget_limits"]
|
||||
_, hasLegacy := raw["budget_limit"]
|
||||
if !hasLimits && !hasLegacy {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limits or budget_limit field is required"})
|
||||
return
|
||||
}
|
||||
|
||||
// Validate and convert the value. JSON numbers decode as float64.
|
||||
var budgetArg interface{} // nil → SQL NULL, int64 → new ceiling
|
||||
if budgetLimitRaw != nil {
|
||||
switch v := budgetLimitRaw.(type) {
|
||||
case float64:
|
||||
if v < 0 {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limit must be >= 0 (USD cents)"})
|
||||
limits := make(map[BudgetPeriod]int64, len(budgetPeriods))
|
||||
known := make(map[string]bool, len(budgetPeriods))
|
||||
for _, def := range budgetPeriods {
|
||||
known[string(def.Name)] = true
|
||||
}
|
||||
|
||||
if hasLimits {
|
||||
var m map[string]*int64
|
||||
if err := json.Unmarshal(raw["budget_limits"], &m); err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limits must be an object of period→int|null"})
|
||||
return
|
||||
}
|
||||
for k, v := range m {
|
||||
if !known[k] {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "unknown budget period: " + k + " (allowed: hourly, daily, weekly, monthly)"})
|
||||
return
|
||||
}
|
||||
cv := int64(v)
|
||||
budgetArg = cv
|
||||
case int64:
|
||||
if v < 0 {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limit must be >= 0 (USD cents)"})
|
||||
if v == nil {
|
||||
continue // clear this period (null = no limit)
|
||||
}
|
||||
if *v < 0 {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "budget limit for " + k + " must be >= 0 (USD cents)"})
|
||||
return
|
||||
}
|
||||
budgetArg = v
|
||||
default:
|
||||
limits[BudgetPeriod(k)] = *v // 0 is valid = block-all for this period
|
||||
}
|
||||
} else { // legacy single-monthly
|
||||
var v *int64
|
||||
if err := json.Unmarshal(raw["budget_limit"], &v); err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limit must be an integer (USD cents) or null"})
|
||||
return
|
||||
}
|
||||
if v != nil {
|
||||
if *v < 0 {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limit must be >= 0 (USD cents)"})
|
||||
return
|
||||
}
|
||||
limits[PeriodMonthly] = *v // 0 is valid = block-all (legacy semantics)
|
||||
}
|
||||
}
|
||||
// budgetArg == nil means "clear the ceiling"
|
||||
|
||||
// Existence check — return 404 for non-existent / removed workspaces.
|
||||
// Existence check — 404 for non-existent / removed workspaces.
|
||||
var exists bool
|
||||
if err := db.DB.QueryRowContext(ctx,
|
||||
`SELECT EXISTS(SELECT 1 FROM workspaces WHERE id = $1 AND status != 'removed')`,
|
||||
@@ -127,38 +180,28 @@ func (h *BudgetHandler) PatchBudget(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
// Persist: budget_limits is the SSOT; keep the legacy budget_limit column
|
||||
// synced to the monthly period so pre-deploy enforcement paths stay coherent
|
||||
// during the rollout window.
|
||||
var legacyMonthly interface{}
|
||||
if m, ok := limits[PeriodMonthly]; ok {
|
||||
legacyMonthly = m
|
||||
}
|
||||
encoded := encodeBudgetLimits(limits)
|
||||
if _, err := db.DB.ExecContext(ctx,
|
||||
`UPDATE workspaces SET budget_limit = $2, updated_at = now() WHERE id = $1`,
|
||||
workspaceID, budgetArg,
|
||||
`UPDATE workspaces SET budget_limits = $2, budget_limit = $3, updated_at = now() WHERE id = $1`,
|
||||
workspaceID, encoded, legacyMonthly,
|
||||
); err != nil {
|
||||
log.Printf("PatchBudget: update failed for %s: %v", workspaceID, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "update failed"})
|
||||
return
|
||||
}
|
||||
|
||||
// Re-read the current state so the response reflects exactly what is in
|
||||
// the DB, including the monthly_spend the agent has already accumulated.
|
||||
var newLimit sql.NullInt64
|
||||
var monthlySpend int64
|
||||
if err := db.DB.QueryRowContext(ctx,
|
||||
`SELECT budget_limit, COALESCE(monthly_spend, 0) FROM workspaces WHERE id = $1`,
|
||||
workspaceID,
|
||||
).Scan(&newLimit, &monthlySpend); err != nil {
|
||||
resp, err := buildBudgetResponse(ctx, workspaceID, encoded)
|
||||
if err != nil {
|
||||
log.Printf("PatchBudget: re-read failed for %s: %v", workspaceID, err)
|
||||
// Still success — just omit the echo.
|
||||
c.JSON(http.StatusOK, gin.H{"status": "updated"})
|
||||
return
|
||||
}
|
||||
|
||||
resp := budgetResponse{
|
||||
MonthlySpend: monthlySpend,
|
||||
}
|
||||
if newLimit.Valid {
|
||||
limit := newLimit.Int64
|
||||
resp.BudgetLimit = &limit
|
||||
remaining := limit - monthlySpend
|
||||
resp.BudgetRemaining = &remaining
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, resp)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,160 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"strconv"
|
||||
"time"
|
||||
)
|
||||
|
||||
// budget_periods.go — SINGLE SOURCE OF TRUTH for the multi-period per-workspace
|
||||
// LLM budget (#49 follow-up). The supported periods, their rolling windows, the
|
||||
// per-period spend computation (from the workspace_spend_events ledger), and the
|
||||
// over-budget decision all live here so the config endpoint (GetBudget/PatchBudget),
|
||||
// the display, and enforcement (checkWorkspaceBudget) can never drift.
|
||||
//
|
||||
// Spend model: the heartbeat records each observed spend INCREMENT into
|
||||
// workspace_spend_events (recordSpendDelta). Per-period spend is a rolling-window
|
||||
// SUM over that ledger — so the SERVER owns windowing (the agent keeps reporting
|
||||
// its cumulative figure unchanged). Rolling (not calendar) windows: no fragile
|
||||
// month-boundary reset, and "monthly" = a 30-day trailing window.
|
||||
|
||||
// BudgetPeriod is one of the supported rolling budget windows.
|
||||
type BudgetPeriod string
|
||||
|
||||
const (
|
||||
PeriodHourly BudgetPeriod = "hourly"
|
||||
PeriodDaily BudgetPeriod = "daily"
|
||||
PeriodWeekly BudgetPeriod = "weekly"
|
||||
PeriodMonthly BudgetPeriod = "monthly"
|
||||
)
|
||||
|
||||
// budgetPeriodDef pairs a period with its rolling window.
|
||||
type budgetPeriodDef struct {
|
||||
Name BudgetPeriod
|
||||
Window time.Duration
|
||||
}
|
||||
|
||||
// budgetPeriods is the canonical ordered list. ADD A PERIOD = one line here;
|
||||
// every consumer iterates this slice, so nothing else needs to change.
|
||||
var budgetPeriods = []budgetPeriodDef{
|
||||
{PeriodHourly, time.Hour},
|
||||
{PeriodDaily, 24 * time.Hour},
|
||||
{PeriodWeekly, 7 * 24 * time.Hour},
|
||||
{PeriodMonthly, 30 * 24 * time.Hour}, // rolling 30-day window
|
||||
}
|
||||
|
||||
// spendLedgerRetention bounds the ledger: rows older than the largest window
|
||||
// (+ slack) are never read, so the recorder opportunistically prunes them.
|
||||
var spendLedgerRetention = 35 * 24 * time.Hour
|
||||
|
||||
// parseBudgetLimits decodes the workspaces.budget_limits JSONB into a map of
|
||||
// period → limit (USD cents). A limit of ZERO is valid and means "block all
|
||||
// spend for that period" (a $0 ceiling); absent / null / negative / unknown
|
||||
// keys mean "no limit for that period". Tolerant of a NULL/empty column.
|
||||
func parseBudgetLimits(raw []byte) map[BudgetPeriod]int64 {
|
||||
out := make(map[BudgetPeriod]int64, len(budgetPeriods))
|
||||
if len(raw) == 0 {
|
||||
return out
|
||||
}
|
||||
var m map[string]*int64
|
||||
if err := json.Unmarshal(raw, &m); err != nil {
|
||||
return out
|
||||
}
|
||||
for _, def := range budgetPeriods {
|
||||
if v, ok := m[string(def.Name)]; ok && v != nil && *v >= 0 {
|
||||
out[def.Name] = *v
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// encodeBudgetLimits renders a period→limit map back to the canonical JSONB
|
||||
// shape, keeping only KNOWN periods with a non-negative limit (0 = block-all is
|
||||
// preserved; a period absent from the map = no limit). Always returns valid JSON.
|
||||
func encodeBudgetLimits(limits map[BudgetPeriod]int64) []byte {
|
||||
m := make(map[string]int64, len(limits))
|
||||
for _, def := range budgetPeriods {
|
||||
if v, ok := limits[def.Name]; ok && v >= 0 {
|
||||
m[string(def.Name)] = v
|
||||
}
|
||||
}
|
||||
b, err := json.Marshal(m)
|
||||
if err != nil {
|
||||
return []byte("{}")
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// recordSpendDelta appends a positive spend increment to the ledger and
|
||||
// opportunistically prunes rows past the retention horizon for this workspace.
|
||||
// No-op for delta <= 0. Errors are returned for the caller to log (non-fatal).
|
||||
func recordSpendDelta(ctx context.Context, q *sql.DB, workspaceID string, deltaCents int64) error {
|
||||
if deltaCents <= 0 {
|
||||
return nil
|
||||
}
|
||||
if _, err := q.ExecContext(ctx,
|
||||
`INSERT INTO workspace_spend_events (workspace_id, delta_cents) VALUES ($1, $2)`,
|
||||
workspaceID, deltaCents,
|
||||
); err != nil {
|
||||
return err
|
||||
}
|
||||
// Opportunistic prune (cheap; index-backed). Best-effort — ignore error.
|
||||
_, _ = q.ExecContext(ctx,
|
||||
`DELETE FROM workspace_spend_events
|
||||
WHERE workspace_id = $1 AND occurred_at < now() - $2::interval`,
|
||||
workspaceID, pgInterval(spendLedgerRetention),
|
||||
)
|
||||
return nil
|
||||
}
|
||||
|
||||
// spendByPeriod returns the rolling-window spend (USD cents) for every period,
|
||||
// computed in a SINGLE query over the ledger. The outer predicate bounds to the
|
||||
// largest window; per-period FILTERs sum each sub-window. A period with no ledger
|
||||
// rows reports 0. This is THE spend computation — used by both display + enforcement.
|
||||
func spendByPeriod(ctx context.Context, q *sql.DB, workspaceID string) (map[BudgetPeriod]int64, error) {
|
||||
out := make(map[BudgetPeriod]int64, len(budgetPeriods))
|
||||
for _, def := range budgetPeriods {
|
||||
out[def.Name] = 0
|
||||
}
|
||||
row := q.QueryRowContext(ctx, `
|
||||
SELECT
|
||||
COALESCE(SUM(delta_cents) FILTER (WHERE occurred_at > now() - interval '1 hour'), 0),
|
||||
COALESCE(SUM(delta_cents) FILTER (WHERE occurred_at > now() - interval '24 hours'), 0),
|
||||
COALESCE(SUM(delta_cents) FILTER (WHERE occurred_at > now() - interval '7 days'), 0),
|
||||
COALESCE(SUM(delta_cents) FILTER (WHERE occurred_at > now() - interval '30 days'), 0)
|
||||
FROM workspace_spend_events
|
||||
WHERE workspace_id = $1 AND occurred_at > now() - interval '30 days'
|
||||
`, workspaceID)
|
||||
var h, d, w, mo int64
|
||||
if err := row.Scan(&h, &d, &w, &mo); err != nil {
|
||||
return out, err
|
||||
}
|
||||
out[PeriodHourly], out[PeriodDaily], out[PeriodWeekly], out[PeriodMonthly] = h, d, w, mo
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// exceededPeriods is PURE: given the configured limits and observed spend, it
|
||||
// returns the periods whose spend has reached/exceeded their limit (in
|
||||
// budgetPeriods order). Only periods WITH a positive limit are considered.
|
||||
// Used by enforcement to decide whether to block.
|
||||
func exceededPeriods(limits map[BudgetPeriod]int64, spend map[BudgetPeriod]int64) []BudgetPeriod {
|
||||
var over []BudgetPeriod
|
||||
for _, def := range budgetPeriods {
|
||||
limit, ok := limits[def.Name]
|
||||
if !ok {
|
||||
continue // no limit configured for this period
|
||||
}
|
||||
// limit >= 0 is a real ceiling (0 = block-all). spend >= limit → over.
|
||||
if spend[def.Name] >= limit {
|
||||
over = append(over, def.Name)
|
||||
}
|
||||
}
|
||||
return over
|
||||
}
|
||||
|
||||
// pgInterval renders a Go duration as a Postgres-interval string ("N seconds").
|
||||
func pgInterval(d time.Duration) string {
|
||||
return strconv.FormatInt(int64(d.Seconds()), 10) + " seconds"
|
||||
}
|
||||
@@ -0,0 +1,99 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// Pure-logic tests for the multi-period budget SSOT (budget_periods.go). The
|
||||
// DB-touching helpers (spendByPeriod / recordSpendDelta) are exercised via the
|
||||
// handler sqlmock tests; here we pin the parsing + the over-budget decision,
|
||||
// which is where the per-period semantics actually live.
|
||||
|
||||
func TestParseBudgetLimits(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
raw string
|
||||
want map[BudgetPeriod]int64
|
||||
}{
|
||||
{"empty", "", map[BudgetPeriod]int64{}},
|
||||
{"empty-object", "{}", map[BudgetPeriod]int64{}},
|
||||
{"all-four", `{"hourly":100,"daily":200,"weekly":300,"monthly":400}`,
|
||||
map[BudgetPeriod]int64{PeriodHourly: 100, PeriodDaily: 200, PeriodWeekly: 300, PeriodMonthly: 400}},
|
||||
{"null-dropped-zero-kept", `{"hourly":null,"daily":0,"weekly":500}`,
|
||||
map[BudgetPeriod]int64{PeriodDaily: 0, PeriodWeekly: 500}}, // 0 = block-all, kept
|
||||
{"negative-dropped", `{"monthly":-5}`, map[BudgetPeriod]int64{}},
|
||||
{"unknown-key-ignored", `{"yearly":999,"daily":10}`, map[BudgetPeriod]int64{PeriodDaily: 10}},
|
||||
{"malformed-json", `{not json`, map[BudgetPeriod]int64{}},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got := parseBudgetLimits([]byte(tc.raw))
|
||||
if !reflect.DeepEqual(got, tc.want) {
|
||||
t.Errorf("parseBudgetLimits(%q) = %v, want %v", tc.raw, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestEncodeBudgetLimits_RoundTrip(t *testing.T) {
|
||||
in := map[BudgetPeriod]int64{PeriodHourly: 100, PeriodMonthly: 400}
|
||||
enc := encodeBudgetLimits(in)
|
||||
got := parseBudgetLimits(enc)
|
||||
if !reflect.DeepEqual(got, in) {
|
||||
t.Errorf("round-trip: encode→parse = %v, want %v (enc=%s)", got, in, enc)
|
||||
}
|
||||
// unknown periods dropped; 0 (block-all) kept
|
||||
enc2 := encodeBudgetLimits(map[BudgetPeriod]int64{PeriodDaily: 0, "yearly": 9})
|
||||
if got := parseBudgetLimits(enc2); !reflect.DeepEqual(got, map[BudgetPeriod]int64{PeriodDaily: 0}) {
|
||||
t.Errorf("encode kept 0/dropped unknown: parse(%s) = %v, want {daily:0}", enc2, got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExceededPeriods(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
limits map[BudgetPeriod]int64
|
||||
spend map[BudgetPeriod]int64
|
||||
want []BudgetPeriod
|
||||
}{
|
||||
{"no-limits", map[BudgetPeriod]int64{}, map[BudgetPeriod]int64{PeriodHourly: 999}, nil},
|
||||
{"zero-limit-blocks-all", map[BudgetPeriod]int64{PeriodHourly: 0}, map[BudgetPeriod]int64{PeriodHourly: 0}, []BudgetPeriod{PeriodHourly}},
|
||||
{"under-all", map[BudgetPeriod]int64{PeriodDaily: 100}, map[BudgetPeriod]int64{PeriodDaily: 50}, nil},
|
||||
{"at-limit-is-exceeded", map[BudgetPeriod]int64{PeriodDaily: 100}, map[BudgetPeriod]int64{PeriodDaily: 100}, []BudgetPeriod{PeriodDaily}},
|
||||
{"over-limit", map[BudgetPeriod]int64{PeriodHourly: 10}, map[BudgetPeriod]int64{PeriodHourly: 11}, []BudgetPeriod{PeriodHourly}},
|
||||
{"only-hourly-over", map[BudgetPeriod]int64{PeriodHourly: 10, PeriodMonthly: 1000},
|
||||
map[BudgetPeriod]int64{PeriodHourly: 50, PeriodMonthly: 200}, []BudgetPeriod{PeriodHourly}},
|
||||
{"multiple-over-in-order", map[BudgetPeriod]int64{PeriodHourly: 10, PeriodWeekly: 100},
|
||||
map[BudgetPeriod]int64{PeriodHourly: 99, PeriodWeekly: 100}, []BudgetPeriod{PeriodHourly, PeriodWeekly}},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got := exceededPeriods(tc.limits, tc.spend)
|
||||
if !reflect.DeepEqual(got, tc.want) {
|
||||
t.Errorf("exceededPeriods(%v,%v) = %v, want %v", tc.limits, tc.spend, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestBudgetPeriods_AllReachable guards the SSOT list: every declared period has
|
||||
// a positive window and a unique name (a typo'd duplicate would silently break
|
||||
// per-period accounting).
|
||||
func TestBudgetPeriods_Wellformed(t *testing.T) {
|
||||
seen := map[BudgetPeriod]bool{}
|
||||
for _, d := range budgetPeriods {
|
||||
if d.Window <= 0 {
|
||||
t.Errorf("period %s has non-positive window %v", d.Name, d.Window)
|
||||
}
|
||||
if seen[d.Name] {
|
||||
t.Errorf("duplicate period name %s", d.Name)
|
||||
}
|
||||
seen[d.Name] = true
|
||||
}
|
||||
for _, p := range []BudgetPeriod{PeriodHourly, PeriodDaily, PeriodWeekly, PeriodMonthly} {
|
||||
if !seen[p] {
|
||||
t.Errorf("period %s missing from budgetPeriods SSOT list", p)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -12,15 +12,25 @@ import (
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// Multi-period budget (#49): GET/PATCH now read workspaces.budget_limits (jsonb)
|
||||
// and compute per-period spend from the workspace_spend_events ledger
|
||||
// (spendByPeriod — matched here by the "FROM workspace_spend_events" fragment).
|
||||
// The legacy budget_limit/monthly_spend response fields are still emitted
|
||||
// (monthly period) for rollout back-compat, and the legacy {"budget_limit":N}
|
||||
// PATCH shape still works.
|
||||
|
||||
// spendRows builds the 4-column row spendByPeriod scans (hourly,daily,weekly,monthly).
|
||||
func spendRows(h, d, w, m int64) *sqlmock.Rows {
|
||||
return sqlmock.NewRows([]string{"h", "d", "w", "mo"}).AddRow(h, d, w, m)
|
||||
}
|
||||
|
||||
// ==================== GET /workspaces/:id/budget ====================
|
||||
|
||||
// TestBudgetGet_NotFound verifies that GET /budget returns 404 for an unknown
|
||||
// workspace ID (ErrNoRows from the budget query).
|
||||
func TestBudgetGet_NotFound(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\)`).
|
||||
mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
|
||||
WithArgs("ws-not-there").
|
||||
WillReturnError(sql.ErrNoRows)
|
||||
|
||||
@@ -29,8 +39,7 @@ func TestBudgetGet_NotFound(t *testing.T) {
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-not-there"}}
|
||||
c.Request = httptest.NewRequest("GET", "/workspaces/ws-not-there/budget", nil)
|
||||
|
||||
h := NewBudgetHandler()
|
||||
h.GetBudget(c)
|
||||
NewBudgetHandler().GetBudget(c)
|
||||
|
||||
if w.Code != http.StatusNotFound {
|
||||
t.Errorf("expected 404, got %d: %s", w.Code, w.Body.String())
|
||||
@@ -40,12 +49,11 @@ func TestBudgetGet_NotFound(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestBudgetGet_DBError verifies that a non-ErrNoRows DB error returns 500.
|
||||
func TestBudgetGet_DBError(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\)`).
|
||||
mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
|
||||
WithArgs("ws-db-err").
|
||||
WillReturnError(sql.ErrConnDone)
|
||||
|
||||
@@ -54,8 +62,7 @@ func TestBudgetGet_DBError(t *testing.T) {
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-db-err"}}
|
||||
c.Request = httptest.NewRequest("GET", "/workspaces/ws-db-err/budget", nil)
|
||||
|
||||
h := NewBudgetHandler()
|
||||
h.GetBudget(c)
|
||||
NewBudgetHandler().GetBudget(c)
|
||||
|
||||
if w.Code != http.StatusInternalServerError {
|
||||
t.Errorf("expected 500, got %d: %s", w.Code, w.Body.String())
|
||||
@@ -65,24 +72,23 @@ func TestBudgetGet_DBError(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestBudgetGet_NoLimit verifies that budget_limit and budget_remaining are
|
||||
// null when the workspace has no budget ceiling configured.
|
||||
func TestBudgetGet_NoLimit(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\)`).
|
||||
mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
|
||||
WithArgs("ws-free").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
|
||||
AddRow(nil, int64(42)))
|
||||
WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).AddRow([]byte(`{}`)))
|
||||
mock.ExpectQuery(`FROM workspace_spend_events`).
|
||||
WithArgs("ws-free").
|
||||
WillReturnRows(spendRows(0, 0, 0, 42))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-free"}}
|
||||
c.Request = httptest.NewRequest("GET", "/workspaces/ws-free/budget", nil)
|
||||
|
||||
h := NewBudgetHandler()
|
||||
h.GetBudget(c)
|
||||
NewBudgetHandler().GetBudget(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
@@ -105,24 +111,23 @@ func TestBudgetGet_NoLimit(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestBudgetGet_WithLimit verifies that budget_limit, monthly_spend, and
|
||||
// budget_remaining are all returned correctly when a ceiling is set.
|
||||
func TestBudgetGet_WithLimit(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\)`).
|
||||
mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
|
||||
WithArgs("ws-capped").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
|
||||
AddRow(int64(500), int64(123)))
|
||||
WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).AddRow([]byte(`{"monthly":500}`)))
|
||||
mock.ExpectQuery(`FROM workspace_spend_events`).
|
||||
WithArgs("ws-capped").
|
||||
WillReturnRows(spendRows(0, 0, 0, 123))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-capped"}}
|
||||
c.Request = httptest.NewRequest("GET", "/workspaces/ws-capped/budget", nil)
|
||||
|
||||
h := NewBudgetHandler()
|
||||
h.GetBudget(c)
|
||||
NewBudgetHandler().GetBudget(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
@@ -137,7 +142,6 @@ func TestBudgetGet_WithLimit(t *testing.T) {
|
||||
if resp["monthly_spend"] != float64(123) {
|
||||
t.Errorf("expected monthly_spend=123, got %v", resp["monthly_spend"])
|
||||
}
|
||||
// budget_remaining = 500 - 123 = 377
|
||||
if resp["budget_remaining"] != float64(377) {
|
||||
t.Errorf("expected budget_remaining=377, got %v", resp["budget_remaining"])
|
||||
}
|
||||
@@ -146,24 +150,23 @@ func TestBudgetGet_WithLimit(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestBudgetGet_OverBudget verifies that budget_remaining can be negative
|
||||
// when monthly_spend has already exceeded budget_limit.
|
||||
func TestBudgetGet_OverBudget(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\)`).
|
||||
mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
|
||||
WithArgs("ws-over").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
|
||||
AddRow(int64(100), int64(150)))
|
||||
WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).AddRow([]byte(`{"monthly":100}`)))
|
||||
mock.ExpectQuery(`FROM workspace_spend_events`).
|
||||
WithArgs("ws-over").
|
||||
WillReturnRows(spendRows(0, 0, 0, 150))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-over"}}
|
||||
c.Request = httptest.NewRequest("GET", "/workspaces/ws-over/budget", nil)
|
||||
|
||||
h := NewBudgetHandler()
|
||||
h.GetBudget(c)
|
||||
NewBudgetHandler().GetBudget(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
@@ -172,7 +175,6 @@ func TestBudgetGet_OverBudget(t *testing.T) {
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("parse response: %v", err)
|
||||
}
|
||||
// budget_remaining = 100 - 150 = -50 (negative, but we store actual value)
|
||||
if resp["budget_remaining"] != float64(-50) {
|
||||
t.Errorf("expected budget_remaining=-50, got %v", resp["budget_remaining"])
|
||||
}
|
||||
@@ -181,10 +183,59 @@ func TestBudgetGet_OverBudget(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestBudgetGet_MultiPeriod pins the new per-period shape: each period reports
|
||||
// its own limit/spend/remaining, and an over-budget sub-period is visible.
|
||||
func TestBudgetGet_MultiPeriod(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
|
||||
WithArgs("ws-mp").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).
|
||||
AddRow([]byte(`{"hourly":100,"daily":1000}`)))
|
||||
mock.ExpectQuery(`FROM workspace_spend_events`).
|
||||
WithArgs("ws-mp").
|
||||
WillReturnRows(spendRows(120, 300, 300, 300)) // hourly over (120>=100)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-mp"}}
|
||||
c.Request = httptest.NewRequest("GET", "/workspaces/ws-mp/budget", nil)
|
||||
|
||||
NewBudgetHandler().GetBudget(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
var resp struct {
|
||||
Periods map[string]struct {
|
||||
Limit *int64 `json:"limit"`
|
||||
Spend int64 `json:"spend"`
|
||||
Remaining *int64 `json:"remaining"`
|
||||
} `json:"periods"`
|
||||
}
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("parse response: %v", err)
|
||||
}
|
||||
if resp.Periods["hourly"].Limit == nil || *resp.Periods["hourly"].Limit != 100 {
|
||||
t.Errorf("hourly.limit: want 100, got %v", resp.Periods["hourly"].Limit)
|
||||
}
|
||||
if resp.Periods["hourly"].Spend != 120 {
|
||||
t.Errorf("hourly.spend: want 120, got %d", resp.Periods["hourly"].Spend)
|
||||
}
|
||||
if r := resp.Periods["hourly"].Remaining; r == nil || *r != -20 {
|
||||
t.Errorf("hourly.remaining: want -20, got %v", r)
|
||||
}
|
||||
if resp.Periods["weekly"].Limit != nil {
|
||||
t.Errorf("weekly.limit: want null (unset), got %v", resp.Periods["weekly"].Limit)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("sqlmock expectations not met: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== PATCH /workspaces/:id/budget ====================
|
||||
|
||||
// TestBudgetPatch_MissingField verifies that PATCH /budget with no budget_limit
|
||||
// field in the body returns 400.
|
||||
func TestBudgetPatch_MissingField(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
@@ -196,15 +247,13 @@ func TestBudgetPatch_MissingField(t *testing.T) {
|
||||
bytes.NewBufferString(`{"other_field":123}`))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
h := NewBudgetHandler()
|
||||
h.PatchBudget(c)
|
||||
NewBudgetHandler().PatchBudget(c)
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Errorf("expected 400, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
// TestBudgetPatch_InvalidBody verifies that a malformed JSON body returns 400.
|
||||
func TestBudgetPatch_InvalidBody(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
@@ -216,15 +265,13 @@ func TestBudgetPatch_InvalidBody(t *testing.T) {
|
||||
bytes.NewBufferString(`not json`))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
h := NewBudgetHandler()
|
||||
h.PatchBudget(c)
|
||||
NewBudgetHandler().PatchBudget(c)
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Errorf("expected 400, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
// TestBudgetPatch_NegativeValue verifies that a negative budget_limit is rejected.
|
||||
func TestBudgetPatch_NegativeValue(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
@@ -236,15 +283,13 @@ func TestBudgetPatch_NegativeValue(t *testing.T) {
|
||||
bytes.NewBufferString(`{"budget_limit":-1}`))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
h := NewBudgetHandler()
|
||||
h.PatchBudget(c)
|
||||
NewBudgetHandler().PatchBudget(c)
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Errorf("expected 400 for negative budget_limit, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
// TestBudgetPatch_InvalidType verifies that a non-numeric budget_limit returns 400.
|
||||
func TestBudgetPatch_InvalidType(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
@@ -256,16 +301,32 @@ func TestBudgetPatch_InvalidType(t *testing.T) {
|
||||
bytes.NewBufferString(`{"budget_limit":"not-a-number"}`))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
h := NewBudgetHandler()
|
||||
h.PatchBudget(c)
|
||||
NewBudgetHandler().PatchBudget(c)
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Errorf("expected 400 for string budget_limit, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
// TestBudgetPatch_WorkspaceNotFound verifies that PATCH /budget returns 404
|
||||
// when the workspace doesn't exist.
|
||||
// TestBudgetPatch_UnknownPeriod rejects an unsupported period key.
|
||||
func TestBudgetPatch_UnknownPeriod(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-badperiod"}}
|
||||
c.Request = httptest.NewRequest("PATCH", "/workspaces/ws-badperiod/budget",
|
||||
bytes.NewBufferString(`{"budget_limits":{"yearly":100}}`))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
NewBudgetHandler().PatchBudget(c)
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Errorf("expected 400 for unknown period, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestBudgetPatch_WorkspaceNotFound(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
@@ -281,8 +342,7 @@ func TestBudgetPatch_WorkspaceNotFound(t *testing.T) {
|
||||
bytes.NewBufferString(`{"budget_limit":500}`))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
h := NewBudgetHandler()
|
||||
h.PatchBudget(c)
|
||||
NewBudgetHandler().PatchBudget(c)
|
||||
|
||||
if w.Code != http.StatusNotFound {
|
||||
t.Errorf("expected 404, got %d: %s", w.Code, w.Body.String())
|
||||
@@ -292,25 +352,20 @@ func TestBudgetPatch_WorkspaceNotFound(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestBudgetPatch_SetLimit verifies that PATCH /budget with a positive value
|
||||
// updates the DB and returns the new budget state.
|
||||
// TestBudgetPatch_SetLimit (legacy monthly shape) updates + returns new state.
|
||||
func TestBudgetPatch_SetLimit(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
// Existence probe
|
||||
mock.ExpectQuery(`SELECT EXISTS.*status != 'removed'`).
|
||||
WithArgs("ws-set-limit").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
|
||||
// UPDATE
|
||||
mock.ExpectExec(`UPDATE workspaces SET budget_limit`).
|
||||
WithArgs("ws-set-limit", int64(500)).
|
||||
mock.ExpectExec(`UPDATE workspaces SET budget_limits`).
|
||||
WithArgs("ws-set-limit", sqlmock.AnyArg(), int64(500)).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
// Re-read for response
|
||||
mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\) FROM workspaces WHERE id`).
|
||||
mock.ExpectQuery(`FROM workspace_spend_events`).
|
||||
WithArgs("ws-set-limit").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
|
||||
AddRow(int64(500), int64(200)))
|
||||
WillReturnRows(spendRows(0, 0, 0, 200))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
@@ -319,8 +374,7 @@ func TestBudgetPatch_SetLimit(t *testing.T) {
|
||||
bytes.NewBufferString(`{"budget_limit":500}`))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
h := NewBudgetHandler()
|
||||
h.PatchBudget(c)
|
||||
NewBudgetHandler().PatchBudget(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
@@ -335,7 +389,6 @@ func TestBudgetPatch_SetLimit(t *testing.T) {
|
||||
if resp["monthly_spend"] != float64(200) {
|
||||
t.Errorf("expected monthly_spend=200, got %v", resp["monthly_spend"])
|
||||
}
|
||||
// budget_remaining = 500 - 200 = 300
|
||||
if resp["budget_remaining"] != float64(300) {
|
||||
t.Errorf("expected budget_remaining=300, got %v", resp["budget_remaining"])
|
||||
}
|
||||
@@ -344,8 +397,59 @@ func TestBudgetPatch_SetLimit(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestBudgetPatch_ClearLimit verifies that PATCH /budget with budget_limit=null
|
||||
// clears the ceiling, making budget_limit and budget_remaining null in the response.
|
||||
// TestBudgetPatch_SetMultiPeriod sets several periods at once and verifies the
|
||||
// per-period response.
|
||||
func TestBudgetPatch_SetMultiPeriod(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
mock.ExpectQuery(`SELECT EXISTS.*status != 'removed'`).
|
||||
WithArgs("ws-mp-set").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
|
||||
// no monthly in payload → legacy budget_limit column set to NULL
|
||||
mock.ExpectExec(`UPDATE workspaces SET budget_limits`).
|
||||
WithArgs("ws-mp-set", sqlmock.AnyArg(), nil).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectQuery(`FROM workspace_spend_events`).
|
||||
WithArgs("ws-mp-set").
|
||||
WillReturnRows(spendRows(10, 20, 30, 40))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-mp-set"}}
|
||||
c.Request = httptest.NewRequest("PATCH", "/workspaces/ws-mp-set/budget",
|
||||
bytes.NewBufferString(`{"budget_limits":{"hourly":100,"daily":200,"monthly":null}}`))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
NewBudgetHandler().PatchBudget(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
var resp struct {
|
||||
Periods map[string]struct {
|
||||
Limit *int64 `json:"limit"`
|
||||
Spend int64 `json:"spend"`
|
||||
} `json:"periods"`
|
||||
BudgetLimit *int64 `json:"budget_limit"`
|
||||
}
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("parse response: %v", err)
|
||||
}
|
||||
if resp.Periods["hourly"].Limit == nil || *resp.Periods["hourly"].Limit != 100 {
|
||||
t.Errorf("hourly.limit want 100, got %v", resp.Periods["hourly"].Limit)
|
||||
}
|
||||
if resp.Periods["daily"].Limit == nil || *resp.Periods["daily"].Limit != 200 {
|
||||
t.Errorf("daily.limit want 200, got %v", resp.Periods["daily"].Limit)
|
||||
}
|
||||
if resp.BudgetLimit != nil {
|
||||
t.Errorf("monthly cleared → budget_limit should be null, got %v", *resp.BudgetLimit)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("sqlmock expectations not met: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBudgetPatch_ClearLimit(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
@@ -353,15 +457,12 @@ func TestBudgetPatch_ClearLimit(t *testing.T) {
|
||||
mock.ExpectQuery(`SELECT EXISTS.*status != 'removed'`).
|
||||
WithArgs("ws-clear-limit").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
|
||||
// UPDATE with NULL
|
||||
mock.ExpectExec(`UPDATE workspaces SET budget_limit`).
|
||||
WithArgs("ws-clear-limit", nil).
|
||||
mock.ExpectExec(`UPDATE workspaces SET budget_limits`).
|
||||
WithArgs("ws-clear-limit", sqlmock.AnyArg(), nil).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
// Re-read — budget_limit is now NULL
|
||||
mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\) FROM workspaces WHERE id`).
|
||||
mock.ExpectQuery(`FROM workspace_spend_events`).
|
||||
WithArgs("ws-clear-limit").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
|
||||
AddRow(nil, int64(50)))
|
||||
WillReturnRows(spendRows(0, 0, 0, 50))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
@@ -370,8 +471,7 @@ func TestBudgetPatch_ClearLimit(t *testing.T) {
|
||||
bytes.NewBufferString(`{"budget_limit":null}`))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
h := NewBudgetHandler()
|
||||
h.PatchBudget(c)
|
||||
NewBudgetHandler().PatchBudget(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
@@ -391,8 +491,6 @@ func TestBudgetPatch_ClearLimit(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestBudgetPatch_UpdateDBError verifies that a DB error during the UPDATE
|
||||
// returns 500.
|
||||
func TestBudgetPatch_UpdateDBError(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
@@ -400,8 +498,8 @@ func TestBudgetPatch_UpdateDBError(t *testing.T) {
|
||||
mock.ExpectQuery(`SELECT EXISTS.*status != 'removed'`).
|
||||
WithArgs("ws-patch-dberr").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
|
||||
mock.ExpectExec(`UPDATE workspaces SET budget_limit`).
|
||||
WithArgs("ws-patch-dberr", int64(500)).
|
||||
mock.ExpectExec(`UPDATE workspaces SET budget_limits`).
|
||||
WithArgs("ws-patch-dberr", sqlmock.AnyArg(), int64(500)).
|
||||
WillReturnError(sql.ErrConnDone)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
@@ -411,8 +509,7 @@ func TestBudgetPatch_UpdateDBError(t *testing.T) {
|
||||
bytes.NewBufferString(`{"budget_limit":500}`))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
h := NewBudgetHandler()
|
||||
h.PatchBudget(c)
|
||||
NewBudgetHandler().PatchBudget(c)
|
||||
|
||||
if w.Code != http.StatusInternalServerError {
|
||||
t.Errorf("expected 500 on UPDATE error, got %d: %s", w.Code, w.Body.String())
|
||||
@@ -422,8 +519,8 @@ func TestBudgetPatch_UpdateDBError(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestBudgetPatch_ZeroLimit verifies that budget_limit=0 is accepted (it means
|
||||
// every A2A call is blocked — useful to pause a workspace's LLM spend entirely).
|
||||
// TestBudgetPatch_ZeroLimit verifies budget_limit=0 is accepted + stored (0 =
|
||||
// block-all: every period call is blocked — pauses the workspace's spend).
|
||||
func TestBudgetPatch_ZeroLimit(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
@@ -431,13 +528,12 @@ func TestBudgetPatch_ZeroLimit(t *testing.T) {
|
||||
mock.ExpectQuery(`SELECT EXISTS.*status != 'removed'`).
|
||||
WithArgs("ws-zero-limit").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
|
||||
mock.ExpectExec(`UPDATE workspaces SET budget_limit`).
|
||||
WithArgs("ws-zero-limit", int64(0)).
|
||||
mock.ExpectExec(`UPDATE workspaces SET budget_limits`).
|
||||
WithArgs("ws-zero-limit", sqlmock.AnyArg(), int64(0)).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\) FROM workspaces WHERE id`).
|
||||
mock.ExpectQuery(`FROM workspace_spend_events`).
|
||||
WithArgs("ws-zero-limit").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
|
||||
AddRow(int64(0), int64(0)))
|
||||
WillReturnRows(spendRows(0, 0, 0, 0))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
@@ -446,11 +542,17 @@ func TestBudgetPatch_ZeroLimit(t *testing.T) {
|
||||
bytes.NewBufferString(`{"budget_limit":0}`))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
h := NewBudgetHandler()
|
||||
h.PatchBudget(c)
|
||||
NewBudgetHandler().PatchBudget(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected 200 for zero budget_limit, got %d: %s", w.Code, w.Body.String())
|
||||
t.Fatalf("expected 200 for zero budget_limit, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
var resp map[string]interface{}
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("parse response: %v", err)
|
||||
}
|
||||
if resp["budget_limit"] != float64(0) {
|
||||
t.Errorf("expected budget_limit=0 (block-all), got %v", resp["budget_limit"])
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("sqlmock expectations not met: %v", err)
|
||||
|
||||
@@ -0,0 +1,427 @@
|
||||
package handlers
|
||||
|
||||
// cross_tenant_isolation_test.go — #1953 regression tests.
|
||||
//
|
||||
// Three workspace-server paths historically derived an "org-root sibling set"
|
||||
// as `WHERE parent_id IS NULL`, which matches EVERY tenant's org root (the
|
||||
// workspaces table has no org_id column) → cross-tenant data exposure:
|
||||
//
|
||||
// 1. GET /registry/:id/peers (discovery.Peers)
|
||||
// 2. MCP toolListPeers (mcp_tools.toolListPeers)
|
||||
// 3. a2a routing (a2a_proxy.proxyA2ARequest → resolveAgentURL)
|
||||
//
|
||||
// These tests assert that a workspace in a DIFFERENT org is never returned as a
|
||||
// peer and that a2a refuses to resolve/route to a workspace outside the caller's
|
||||
// org, while same-org peers/targets still work. They reuse the SAME parent_id-
|
||||
// chain org scoping the OFFSEC-015 broadcast fix introduced (org_scope.go).
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// dbHandleForTest returns the global sqlmock-backed *sql.DB that setupTestDB
|
||||
// installs, for tests that need to hand a *sql.DB to a component (e.g.
|
||||
// MCPHandler.database, sameOrg) rather than relying on the package-global.
|
||||
func dbHandleForTest() *sql.DB { return db.DB }
|
||||
|
||||
// peerColsForIsolation matches queryPeerMaps' SELECT column set.
|
||||
var peerColsForIsolation = []string{
|
||||
"id", "name", "role", "tier", "status", "agent_card", "url", "parent_id", "active_tasks",
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Path 1: GET /registry/:id/peers — discovery.Peers
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
// TestPeers_CrossTenant_OrgRootNotLeaked is the core #1953 regression for the
|
||||
// discovery path. The caller is an org root (parent_id IS NULL). Pre-fix the
|
||||
// handler ran `SELECT ... WHERE w.parent_id IS NULL AND w.id != $1`, returning
|
||||
// every OTHER tenant's org root as a "sibling" peer. Post-fix an org-root caller
|
||||
// issues NO sibling query — its only peers are its own children. If the handler
|
||||
// regressed and issued the cross-tenant sibling query, sqlmock would report an
|
||||
// unexpected query (the expectation below is intentionally NOT registered) and
|
||||
// the test fails.
|
||||
func TestPeers_CrossTenant_OrgRootNotLeaked(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
handler := NewDiscoveryHandler()
|
||||
|
||||
// Behavioural leak test: register the OLD leaky `parent_id IS NULL` sibling
|
||||
// query so that IF the handler still issues it, it returns another tenant's
|
||||
// org root (org-b-root). The fix removes that query for an org-root caller,
|
||||
// so org-b-root must never appear in the output. Unordered matching makes
|
||||
// the leaky-sibling expectation optional — the fix simply never consumes it.
|
||||
mock.MatchExpectationsInOrder(false)
|
||||
|
||||
caller := "org-a-root" // parent_id IS NULL — an org root for tenant A
|
||||
|
||||
// parent_id lookup → NULL (caller is an org root)
|
||||
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
|
||||
WithArgs(caller).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
|
||||
|
||||
// LEAKY sibling query (pre-fix). Returns a DIFFERENT tenant's org root.
|
||||
// The fix must NOT issue this query; if it does, org-b-root leaks into the
|
||||
// peer list and the output assertion below fails.
|
||||
mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id IS NULL AND w.id != \\$1").
|
||||
WithArgs(caller).
|
||||
WillReturnRows(sqlmock.NewRows(peerColsForIsolation).
|
||||
AddRow("org-b-root", "Org B Root", "lead", 0, "online", []byte("null"), "http://b-root", nil, 0))
|
||||
|
||||
// Children query — caller's own org-A children only. Return one child.
|
||||
mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id = \\$1 AND w.id != \\$2").
|
||||
WithArgs(caller, caller).
|
||||
WillReturnRows(sqlmock.NewRows(peerColsForIsolation).
|
||||
AddRow("org-a-child", "Org A Child", "worker", 1, "online", []byte("null"), "http://a-child", caller, 0))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: caller}}
|
||||
c.Request = httptest.NewRequest("GET", "/registry/"+caller+"/peers", nil)
|
||||
|
||||
handler.Peers(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
|
||||
var peers []map[string]interface{}
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &peers); err != nil {
|
||||
t.Fatalf("failed to parse response: %v", err)
|
||||
}
|
||||
|
||||
// The other-tenant org root must NEVER appear; only the same-org child.
|
||||
for _, p := range peers {
|
||||
if id, _ := p["id"].(string); id == "org-b-root" {
|
||||
t.Fatalf("cross-tenant leak (#1953): org-b-root appeared in org-a-root's peer list: %v", peers)
|
||||
}
|
||||
}
|
||||
if len(peers) != 1 {
|
||||
t.Fatalf("expected exactly 1 peer (same-org child), got %d: %v", len(peers), peers)
|
||||
}
|
||||
// NOTE: ExpectationsWereMet is intentionally NOT asserted — the leaky
|
||||
// sibling expectation is deliberately left unconsumed by the fixed path.
|
||||
}
|
||||
|
||||
// TestPeers_SameOrg_SiblingsStillWork is the positive companion: a non-root
|
||||
// child caller still sees its same-org siblings, children, and parent. This
|
||||
// guards against the fix over-scoping and breaking legitimate intra-org
|
||||
// discovery.
|
||||
func TestPeers_SameOrg_SiblingsStillWork(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
handler := NewDiscoveryHandler()
|
||||
|
||||
caller := "org-a-child-1"
|
||||
parent := "org-a-root"
|
||||
|
||||
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
|
||||
WithArgs(caller).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(parent))
|
||||
|
||||
// Siblings — scoped to the shared parent (one tenant).
|
||||
mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id = \\$1 AND w.id != \\$2").
|
||||
WithArgs(parent, caller).
|
||||
WillReturnRows(sqlmock.NewRows(peerColsForIsolation).
|
||||
AddRow("org-a-child-2", "Org A Sibling", "worker", 1, "online", []byte("null"), "http://a-sib", parent, 0))
|
||||
|
||||
// Children — none.
|
||||
mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id = \\$1 AND w.id != \\$2 AND w.status").
|
||||
WithArgs(caller, caller).
|
||||
WillReturnRows(sqlmock.NewRows(peerColsForIsolation))
|
||||
|
||||
// Parent.
|
||||
mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.id = \\$1 AND w.id != \\$2 AND w.status").
|
||||
WithArgs(parent, caller).
|
||||
WillReturnRows(sqlmock.NewRows(peerColsForIsolation).
|
||||
AddRow(parent, "Org A Root", "lead", 0, "online", []byte("null"), "http://a-root", nil, 0))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: caller}}
|
||||
c.Request = httptest.NewRequest("GET", "/registry/"+caller+"/peers", nil)
|
||||
|
||||
handler.Peers(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
var peers []map[string]interface{}
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &peers); err != nil {
|
||||
t.Fatalf("failed to parse response: %v", err)
|
||||
}
|
||||
// Sibling + parent = 2 same-org peers.
|
||||
if len(peers) != 2 {
|
||||
t.Fatalf("expected 2 same-org peers (sibling + parent), got %d: %v", len(peers), peers)
|
||||
}
|
||||
names := map[string]bool{}
|
||||
for _, p := range peers {
|
||||
names[fmt.Sprint(p["name"])] = true
|
||||
}
|
||||
if !names["Org A Sibling"] || !names["Org A Root"] {
|
||||
t.Errorf("expected same-org sibling + parent in peer list, got %v", names)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Path 2: MCP toolListPeers — mcp_tools.toolListPeers
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
// mcpPeerCols matches toolListPeers' SELECT column set.
|
||||
var mcpPeerCols = []string{"id", "name", "role", "status", "tier"}
|
||||
|
||||
// TestToolListPeers_CrossTenant_OrgRootNotLeaked is the #1953 regression for
|
||||
// the MCP path. Same shape as the discovery test: an org-root caller must NOT
|
||||
// enumerate other tenants' org roots. The cross-tenant `parent_id IS NULL`
|
||||
// sibling query is intentionally not registered, so if it runs sqlmock fails.
|
||||
func TestToolListPeers_CrossTenant_OrgRootNotLeaked(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
mock.MatchExpectationsInOrder(false)
|
||||
h := &MCPHandler{database: dbHandleForTest()}
|
||||
|
||||
caller := "org-a-root"
|
||||
|
||||
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
|
||||
WithArgs(caller).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
|
||||
|
||||
// LEAKY sibling query (pre-fix). Returns another tenant's org root. The fix
|
||||
// must NOT issue this for an org-root caller; if it does, org-b-root leaks
|
||||
// into the output and the assertion below fails. Left optional via
|
||||
// unordered matching, so the fixed path simply never consumes it.
|
||||
mock.ExpectQuery("WHERE w.parent_id IS NULL AND w.id != \\$1").
|
||||
WithArgs(caller).
|
||||
WillReturnRows(sqlmock.NewRows(mcpPeerCols).
|
||||
AddRow("org-b-root", "Org B Root", "lead", "online", 0))
|
||||
|
||||
// Children — caller's own org-A children only.
|
||||
mock.ExpectQuery("WHERE w.parent_id = \\$1 AND w.status").
|
||||
WithArgs(caller).
|
||||
WillReturnRows(sqlmock.NewRows(mcpPeerCols).
|
||||
AddRow("org-a-child", "Org A Child", "worker", "online", 1))
|
||||
|
||||
out, err := h.toolListPeers(context.Background(), caller)
|
||||
if err != nil {
|
||||
t.Fatalf("toolListPeers returned error: %v", err)
|
||||
}
|
||||
if strings.Contains(out, "org-b-root") || strings.Contains(out, "Org B Root") {
|
||||
t.Fatalf("cross-tenant leak (#1953): another tenant's org root appeared in toolListPeers output:\n%s", out)
|
||||
}
|
||||
if !strings.Contains(out, "org-a-child") {
|
||||
t.Errorf("same-org child missing from toolListPeers output:\n%s", out)
|
||||
}
|
||||
// ExpectationsWereMet intentionally NOT asserted — leaky sibling expectation
|
||||
// is deliberately left unconsumed by the fixed path.
|
||||
}
|
||||
|
||||
// TestToolListPeers_SameOrg_SiblingsStillWork — positive companion for the MCP
|
||||
// path: a non-root child still enumerates its same-org siblings + children + parent.
|
||||
func TestToolListPeers_SameOrg_SiblingsStillWork(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
h := &MCPHandler{database: dbHandleForTest()}
|
||||
|
||||
caller := "org-a-child-1"
|
||||
parent := "org-a-root"
|
||||
|
||||
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
|
||||
WithArgs(caller).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(parent))
|
||||
|
||||
// Siblings — scoped to shared parent.
|
||||
mock.ExpectQuery("WHERE w.parent_id = \\$1 AND w.id != \\$2 AND w.status").
|
||||
WithArgs(parent, caller).
|
||||
WillReturnRows(sqlmock.NewRows(mcpPeerCols).
|
||||
AddRow("org-a-child-2", "Org A Sibling", "worker", "online", 1))
|
||||
|
||||
// Children — none.
|
||||
mock.ExpectQuery("WHERE w.parent_id = \\$1 AND w.status").
|
||||
WithArgs(caller).
|
||||
WillReturnRows(sqlmock.NewRows(mcpPeerCols))
|
||||
|
||||
// Parent.
|
||||
mock.ExpectQuery("WHERE w.id = \\$1 AND w.status").
|
||||
WithArgs(parent).
|
||||
WillReturnRows(sqlmock.NewRows(mcpPeerCols).
|
||||
AddRow(parent, "Org A Root", "lead", "online", 0))
|
||||
|
||||
out, err := h.toolListPeers(context.Background(), caller)
|
||||
if err != nil {
|
||||
t.Fatalf("toolListPeers returned error: %v", err)
|
||||
}
|
||||
if !strings.Contains(out, "Org A Sibling") || !strings.Contains(out, "Org A Root") {
|
||||
t.Errorf("expected same-org sibling + parent in toolListPeers output:\n%s", out)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Path 3: a2a routing — a2a_proxy.proxyA2ARequest / resolveAgentURL
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
// TestProxyA2A_CrossTenant_RoutingDenied is the #1953 regression for a2a
|
||||
// routing. Caller and target are both org roots (parent_id IS NULL) belonging
|
||||
// to DIFFERENT tenants. Pre-fix, CanCommunicate's "root-level siblings" rule
|
||||
// waved this through and resolveAgentURL routed to the foreign tenant. Post-fix
|
||||
// the org-scope guard resolves each to a different org root and returns 403
|
||||
// BEFORE resolveAgentURL/dispatch.
|
||||
func TestProxyA2A_CrossTenant_RoutingDenied(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
mr := setupTestRedis(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
|
||||
caller := "org-a-root"
|
||||
target := "org-b-root" // different tenant
|
||||
|
||||
// A URL exists for the target; the guard must deny BEFORE it is used.
|
||||
mr.Set(fmt.Sprintf("ws:%s:url", target), "http://localhost:1")
|
||||
|
||||
// CanCommunicate: both root-level (parent_id NULL) → its weak "root-level
|
||||
// siblings" rule ALLOWS this. The org guard must catch it afterward.
|
||||
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = ").
|
||||
WithArgs(caller).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(caller, nil))
|
||||
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = ").
|
||||
WithArgs(target).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(target, nil))
|
||||
|
||||
// #1953 org-scope guard: caller resolves to org-a-root, target to org-b-root
|
||||
// → different orgs → 403. (Each org root resolves to itself.)
|
||||
mock.ExpectQuery("WITH RECURSIVE org_chain AS").
|
||||
WithArgs(caller).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(caller))
|
||||
mock.ExpectQuery("WITH RECURSIVE org_chain AS").
|
||||
WithArgs(target).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(target))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: target}}
|
||||
body := `{"method":"message/send","params":{"message":{"role":"user","parts":[{"text":"cross-tenant"}]}}}`
|
||||
c.Request = httptest.NewRequest("POST", "/workspaces/"+target+"/a2a", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
c.Request.Header.Set("X-Workspace-ID", caller)
|
||||
|
||||
handler.ProxyA2A(c)
|
||||
|
||||
if w.Code != http.StatusForbidden {
|
||||
t.Fatalf("expected 403 for cross-tenant a2a routing, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
var resp map[string]interface{}
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("body not JSON: %v", err)
|
||||
}
|
||||
if msg, _ := resp["error"].(string); !strings.Contains(msg, "different org") {
|
||||
t.Errorf("expected cross-org denial message, got %v", resp["error"])
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestResolveAgentURL_CrossTenant_RejectedViaSameOrg is a direct unit test of
|
||||
// the sameOrg primitive that gates resolveAgentURL: a target in a different org
|
||||
// must be reported as NOT same-org, so the a2a guard rejects it before
|
||||
// resolveAgentURL is ever called.
|
||||
func TestResolveAgentURL_CrossTenant_RejectedViaSameOrg(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
|
||||
caller := "org-a-root"
|
||||
target := "org-b-root"
|
||||
|
||||
mock.ExpectQuery("WITH RECURSIVE org_chain AS").
|
||||
WithArgs(caller).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(caller))
|
||||
mock.ExpectQuery("WITH RECURSIVE org_chain AS").
|
||||
WithArgs(target).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(target))
|
||||
|
||||
ok, err := sameOrg(context.Background(), dbHandleForTest(), caller, target)
|
||||
if err != nil {
|
||||
t.Fatalf("sameOrg returned unexpected error: %v", err)
|
||||
}
|
||||
if ok {
|
||||
t.Errorf("expected cross-tenant workspaces to be reported as DIFFERENT orgs, got sameOrg=true")
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestProxyA2A_SameOrg_RoutingAllowed — positive companion for a2a: two
|
||||
// same-org siblings route successfully (mirrors TestProxyA2A_CallerIDPropagated
|
||||
// but named to document the #1953 same-org allow path).
|
||||
func TestProxyA2A_SameOrg_RoutingAllowed(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
mr := setupTestRedis(t)
|
||||
allowLoopbackForTest(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
waitForHandlerAsyncBeforeDBCleanup(t, handler)
|
||||
|
||||
caller := "org-a-child-1"
|
||||
target := "org-a-child-2"
|
||||
parent := "org-a-root"
|
||||
|
||||
agentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
fmt.Fprint(w, `{"jsonrpc":"2.0","id":"1","result":{}}`)
|
||||
}))
|
||||
defer agentServer.Close()
|
||||
mr.Set(fmt.Sprintf("ws:%s:url", target), agentServer.URL)
|
||||
|
||||
// CanCommunicate — siblings under shared parent.
|
||||
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = ").
|
||||
WithArgs(caller).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(caller, parent))
|
||||
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = ").
|
||||
WithArgs(target).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(target, parent))
|
||||
|
||||
// #1953 org guard — both resolve to the same org root → allowed.
|
||||
mock.ExpectQuery("WITH RECURSIVE org_chain AS").
|
||||
WithArgs(caller).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(parent))
|
||||
mock.ExpectQuery("WITH RECURSIVE org_chain AS").
|
||||
WithArgs(target).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(parent))
|
||||
|
||||
expectBudgetCheck(mock, target)
|
||||
mock.ExpectExec("INSERT INTO activity_logs").WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: target}}
|
||||
body := `{"method":"message/send","params":{"message":{"role":"user","parts":[{"text":"same-org"}]}}}`
|
||||
c.Request = httptest.NewRequest("POST", "/workspaces/"+target+"/a2a", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
c.Request.Header.Set("X-Workspace-ID", caller)
|
||||
|
||||
handler.ProxyA2A(c)
|
||||
time.Sleep(50 * time.Millisecond) // allow the async logA2ASuccess INSERT to flush
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200 for same-org a2a routing, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -140,7 +140,14 @@ func buildHTTPResponse(statusCode int, body string) []byte {
|
||||
}
|
||||
|
||||
// setupIntegrationFixtures inserts the rows executeDelegation requires:
|
||||
// - workspaces: source and target (siblings, parent_id=NULL so CanCommunicate=true)
|
||||
// - workspaces: source (org root) + target as its CHILD, so both live in the
|
||||
// SAME org. CanCommunicate=true (parent↔child) AND the #1953 sameOrg() guard
|
||||
// in proxyA2ARequest passes (both resolve to the same org root). A real
|
||||
// delegation happens INSIDE one org. (Previously both were parent_id=NULL —
|
||||
// two DISTINCT org roots — which only "communicated" via CanCommunicate's
|
||||
// root-sibling rule; #1953 added a sameOrg() guard that now denies routing
|
||||
// between two org roots as cross-tenant, so the success-path tests below
|
||||
// must use a same-org source/target pair.)
|
||||
// - activity_logs: the 'delegate' row that updateDelegationStatus UPDATE will find
|
||||
// - delegations: the ledger row that recordLedgerStatus will UPDATE
|
||||
//
|
||||
@@ -148,13 +155,14 @@ func buildHTTPResponse(statusCode int, body string) []byte {
|
||||
func setupIntegrationFixtures(t *testing.T, conn *sql.DB) func() {
|
||||
t.Helper()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
sourceID := integrationTestSourceID // org root (parent_id NULL); target hangs off it
|
||||
for _, ws := range []struct {
|
||||
id string
|
||||
name string
|
||||
parentID *string
|
||||
}{
|
||||
{integrationTestSourceID, "test-source", nil},
|
||||
{integrationTestTargetID, "test-target", nil},
|
||||
{integrationTestTargetID, "test-target", &sourceID}, // child of source → same org
|
||||
} {
|
||||
if _, err := conn.ExecContext(ctx,
|
||||
`INSERT INTO workspaces (id, name, parent_id) VALUES ($1::uuid, $2, $3) ON CONFLICT (id) DO NOTHING`,
|
||||
@@ -510,6 +518,94 @@ func TestIntegration_ExecuteDelegation_RedisDown_FallsBackToDB(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestIntegration_SameOrg_RealCTE_ResolvesAncestorChain is the regression gate
|
||||
// for the org_scope.go recursive-CTE bug (#1953 follow-up). The sqlmock unit
|
||||
// tests feed sameOrg() a pre-computed root_id row, so they CANNOT catch a wrong
|
||||
// CTE — they assume it already returns the right value. Only a real Postgres
|
||||
// run exercises orgRootSubtreeCTE itself.
|
||||
//
|
||||
// The bug: the CTE carried `id AS root_id` from the recursive SEED, so a
|
||||
// non-root workspace resolved to ITSELF instead of its topmost ancestor. That
|
||||
// made sameOrg() return false for two genuinely same-org workspaces and 403 a
|
||||
// legitimate same-org a2a route (over-block). This test seeds a real
|
||||
// root → child → grandchild chain plus a separate org root, and asserts:
|
||||
// - every node in the chain resolves to the SAME org root (root, child, grandchild)
|
||||
// - two workspaces in the same chain are sameOrg (incl. grandchild ↔ root)
|
||||
// - a workspace in a DIFFERENT chain is NOT sameOrg (cross-tenant stays closed)
|
||||
func TestIntegration_SameOrg_RealCTE_ResolvesAncestorChain(t *testing.T) {
|
||||
conn := integrationDB(t)
|
||||
|
||||
const (
|
||||
rootA = "11111111-1111-1111-1111-111111111111"
|
||||
childA = "22222222-2222-2222-2222-222222222222"
|
||||
grandchildA = "33333333-3333-3333-3333-333333333333"
|
||||
rootB = "44444444-4444-4444-4444-444444444444"
|
||||
)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
t.Cleanup(func() {
|
||||
c2, cancel2 := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel2()
|
||||
// Delete leaf-first to respect the parent_id self-FK.
|
||||
for _, id := range []string{grandchildA, childA, rootA, rootB} {
|
||||
conn.ExecContext(c2, `DELETE FROM workspaces WHERE id = $1`, id)
|
||||
}
|
||||
})
|
||||
|
||||
// Insert parent-before-child to satisfy the self-referential FK.
|
||||
seed := []struct {
|
||||
id, name string
|
||||
parent *string
|
||||
}{
|
||||
{rootA, "org-a-root", nil},
|
||||
{childA, "org-a-child", strPtr(rootA)},
|
||||
{grandchildA, "org-a-grandchild", strPtr(childA)},
|
||||
{rootB, "org-b-root", nil},
|
||||
}
|
||||
for _, s := range seed {
|
||||
if _, err := conn.ExecContext(ctx,
|
||||
`INSERT INTO workspaces (id, name, parent_id) VALUES ($1::uuid, $2, $3) ON CONFLICT (id) DO NOTHING`,
|
||||
s.id, s.name, s.parent); err != nil {
|
||||
t.Fatalf("seed %s: %v", s.name, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Every node in chain A must resolve to rootA via the REAL CTE.
|
||||
for _, id := range []string{rootA, childA, grandchildA} {
|
||||
got, err := orgRootID(ctx, conn, id)
|
||||
if err != nil {
|
||||
t.Fatalf("orgRootID(%s): %v", id, err)
|
||||
}
|
||||
if got != rootA {
|
||||
t.Errorf("orgRootID(%s) = %q, want rootA %q (CTE must walk to topmost ancestor)", id, got, rootA)
|
||||
}
|
||||
}
|
||||
|
||||
// Same-org positives — including the grandchild↔root pair that the buggy
|
||||
// CTE got wrong.
|
||||
for _, pair := range [][2]string{{childA, grandchildA}, {rootA, grandchildA}, {rootA, childA}} {
|
||||
ok, err := sameOrg(ctx, conn, pair[0], pair[1])
|
||||
if err != nil {
|
||||
t.Fatalf("sameOrg(%s,%s): %v", pair[0], pair[1], err)
|
||||
}
|
||||
if !ok {
|
||||
t.Errorf("sameOrg(%s,%s) = false, want true (same org chain)", pair[0], pair[1])
|
||||
}
|
||||
}
|
||||
|
||||
// Cross-org negative — isolation must stay closed.
|
||||
for _, pair := range [][2]string{{rootA, rootB}, {grandchildA, rootB}, {childA, rootB}} {
|
||||
ok, err := sameOrg(ctx, conn, pair[0], pair[1])
|
||||
if err != nil {
|
||||
t.Fatalf("sameOrg(%s,%s): %v", pair[0], pair[1], err)
|
||||
}
|
||||
if ok {
|
||||
t.Errorf("sameOrg(%s,%s) = true, want false (different orgs — cross-tenant must stay denied)", pair[0], pair[1])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// extractHostPort parses "http://127.0.0.1:PORT/" and returns "127.0.0.1:PORT".
|
||||
func extractHostPort(rawURL string) string {
|
||||
// Simple parse: strip "http://" prefix and trailing slash.
|
||||
|
||||
@@ -1059,13 +1059,25 @@ func expectExecuteDelegationBase(mock sqlmock.Sqlmock) {
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
// CanCommunicate: getWorkspaceRef(source) + getWorkspaceRef(target).
|
||||
// Both are root-level workspaces (parent_id=NULL) → root-level siblings → allowed.
|
||||
// Source and target are siblings under one shared parent (one tenant) →
|
||||
// CanCommunicate allowed. (#1953: they must NOT both be parent_id=NULL —
|
||||
// two distinct org roots are now treated as DIFFERENT orgs and routing
|
||||
// between them is denied. A real delegation happens inside one org.)
|
||||
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = ").
|
||||
WithArgs(testDeliverySourceID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(testDeliverySourceID, nil))
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(testDeliverySourceID, "ws-org-root-159"))
|
||||
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = ").
|
||||
WithArgs(testDeliveryTargetID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(testDeliveryTargetID, nil))
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(testDeliveryTargetID, "ws-org-root-159"))
|
||||
|
||||
// #1953 cross-tenant guard: same-org check after CanCommunicate. Both
|
||||
// resolve to the same org root → routing allowed.
|
||||
mock.ExpectQuery("WITH RECURSIVE org_chain AS").
|
||||
WithArgs(testDeliverySourceID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow("ws-org-root-159"))
|
||||
mock.ExpectQuery("WITH RECURSIVE org_chain AS").
|
||||
WithArgs(testDeliveryTargetID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow("ws-org-root-159"))
|
||||
|
||||
// resolveAgentURL: test callers always set the URL in Redis (mr.Set ws:{id}:url),
|
||||
// so resolveAgentURL gets a cache hit and never falls back to DB.
|
||||
|
||||
@@ -1,464 +0,0 @@
|
||||
package handlers
|
||||
|
||||
// derive_provider_drift_test.go — behavior-based AST/text drift gate.
|
||||
//
|
||||
// Why this exists: PR #2535 introduced a Go port of derive-provider.sh
|
||||
// (see deriveProviderFromModelSlug in workspace_provision.go) so the
|
||||
// workspace-server can persist LLM_PROVIDER into workspace_secrets at
|
||||
// provision time. That created two sources of truth:
|
||||
//
|
||||
// 1. molecule-ai-workspace-template-hermes/scripts/derive-provider.sh —
|
||||
// runs inside the container at boot, has the final say on which
|
||||
// provider hermes targets (writes ~/.hermes/config.yaml's
|
||||
// model.provider field). The shell script lives in a separate
|
||||
// OSS repo, so we vendor a snapshot at testdata/derive-provider.sh
|
||||
// to keep this gate hermetic.
|
||||
// 2. workspace-server/internal/handlers/workspace_provision.go's
|
||||
// deriveProviderFromModelSlug — runs at provision time on the
|
||||
// platform side so LLM_PROVIDER lands in workspace_secrets and
|
||||
// survives Save+Restart.
|
||||
//
|
||||
// If a future PR adds a new provider prefix to one but not the other,
|
||||
// the workspace-server's persisted LLM_PROVIDER silently disagrees
|
||||
// with what the container's derive-provider.sh produces. The container
|
||||
// wins (it writes the actual config.yaml), so the workspace-server's
|
||||
// persisted value becomes stale and misleading without anything
|
||||
// flipping red in CI.
|
||||
//
|
||||
// This gate pins the invariant that the *prefix set* the two functions
|
||||
// know about is identical, modulo a small hardcoded acceptedDivergences
|
||||
// map for the two intentional differences documented in
|
||||
// deriveProviderFromModelSlug's doc comment (nousresearch/* and
|
||||
// openai/* both fall back to "openrouter" at provision time because
|
||||
// the runtime env that picks "nous" / "custom" isn't available yet).
|
||||
//
|
||||
// Pattern: the "behavior-based AST gate" from PR #2367 / memory
|
||||
// feedback_behavior_based_ast_gates — pin invariants by what a
|
||||
// function maps, not by what it's named. Walks the actual Go AST of
|
||||
// deriveProviderFromModelSlug's switch statement so a rename or a
|
||||
// duplicate function in another file can't sneak past the gate.
|
||||
//
|
||||
// Task: #242. Companion to the table-driven mapping test in
|
||||
// workspace_provision_shared_test.go (TestDeriveProviderFromModelSlug)
|
||||
// which pins the *values*; this test pins the *coverage* of the
|
||||
// prefix set itself.
|
||||
//
|
||||
// Hermetic: reads two files (vendored shell script + Go source) from
|
||||
// paths relative to the test package directory and parses them
|
||||
// in-process. No network, no docker, no DB. The vendored shell script
|
||||
// at testdata/derive-provider.sh is a snapshot of the upstream OSS
|
||||
// template repo's script — refresh it via the cp command in that file's
|
||||
// header when upstream changes.
|
||||
|
||||
import (
|
||||
"go/ast"
|
||||
"go/parser"
|
||||
"go/token"
|
||||
"os"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// acceptedDivergences pins the prefixes where the Go port intentionally
|
||||
// differs from derive-provider.sh. Each entry's value is the provider
|
||||
// the Go function returns; the shell would (at runtime, with the right
|
||||
// env keys present) return something else. Documented in
|
||||
// deriveProviderFromModelSlug's doc comment in workspace_provision.go.
|
||||
//
|
||||
// If a NEW divergence appears, this test fails and the engineer must
|
||||
// either (a) align the Go function with the shell, or (b) add the
|
||||
// prefix here with a comment explaining why the divergence is
|
||||
// intentional and safe at provision time.
|
||||
var acceptedDivergences = map[string]string{
|
||||
// Shell: "nous" if HERMES_API_KEY/NOUS_API_KEY set, else "openrouter".
|
||||
// Go: "openrouter" unconditionally — runtime keys aren't loaded at
|
||||
// provision time. derive-provider.sh upgrades to "nous" at boot
|
||||
// when the keys are present.
|
||||
"nousresearch": "openrouter",
|
||||
// Shell: "custom" if OPENAI_API_KEY set, "openrouter" if OPENROUTER_API_KEY
|
||||
// set, else "openrouter" as a no-key fallback.
|
||||
// Go: "openrouter" unconditionally — same reason as nousresearch/*.
|
||||
// derive-provider.sh upgrades to "custom" at boot when
|
||||
// OPENAI_API_KEY is present.
|
||||
"openai": "openrouter",
|
||||
}
|
||||
|
||||
// TestDeriveProviderDrift_ShellAndGoStayInSync is the drift gate.
|
||||
// It extracts the prefix→provider mapping from both sources and
|
||||
// asserts:
|
||||
//
|
||||
// 1. Every prefix the shell knows about, the Go function also handles
|
||||
// (returning either the same provider OR the value pinned in
|
||||
// acceptedDivergences for that prefix).
|
||||
// 2. Every prefix the Go function handles (extracted from its switch
|
||||
// statement via go/ast), the shell case statement also lists.
|
||||
func TestDeriveProviderDrift_ShellAndGoStayInSync(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
shellMap := loadShellPrefixMap(t)
|
||||
goMap := loadGoPrefixMap(t)
|
||||
|
||||
if len(shellMap) == 0 {
|
||||
t.Fatalf("parsed zero prefixes from derive-provider.sh — regex likely broke; rebuild parser before trusting this gate")
|
||||
}
|
||||
if len(goMap) == 0 {
|
||||
t.Fatalf("parsed zero prefixes from deriveProviderFromModelSlug — AST walk likely broke; rebuild parser before trusting this gate")
|
||||
}
|
||||
|
||||
// Direction 1: every shell prefix must be in the Go map (with the
|
||||
// same provider value, or with the documented divergence).
|
||||
for prefix, shellProvider := range shellMap {
|
||||
goProvider, ok := goMap[prefix]
|
||||
if !ok {
|
||||
t.Errorf(
|
||||
"DRIFT: derive-provider.sh has prefix %q -> %q but deriveProviderFromModelSlug doesn't handle it.\n"+
|
||||
"Fix: either add a case for %q to deriveProviderFromModelSlug in "+
|
||||
"workspace-server/internal/handlers/workspace_provision.go (returning %q to match the shell), "+
|
||||
"OR if this prefix is intentionally provision-time-divergent, add it to acceptedDivergences{} "+
|
||||
"in this test with a comment explaining why.",
|
||||
prefix, shellProvider, prefix, shellProvider,
|
||||
)
|
||||
continue
|
||||
}
|
||||
if goProvider == shellProvider {
|
||||
continue
|
||||
}
|
||||
// Mismatch — only acceptable if it's on the explicit divergence list
|
||||
// AND the Go side returns exactly the documented value.
|
||||
expected, divergenceAllowed := acceptedDivergences[prefix]
|
||||
if !divergenceAllowed {
|
||||
t.Errorf(
|
||||
"DRIFT: prefix %q maps to %q in derive-provider.sh but %q in deriveProviderFromModelSlug.\n"+
|
||||
"Fix: align the Go function with the shell (preferred — they should agree), "+
|
||||
"OR if the divergence is intentional and safe at provision time, "+
|
||||
"add %q: %q to acceptedDivergences{} in this test with a comment explaining why.",
|
||||
prefix, shellProvider, goProvider, prefix, goProvider,
|
||||
)
|
||||
continue
|
||||
}
|
||||
if goProvider != expected {
|
||||
t.Errorf(
|
||||
"DRIFT: prefix %q is on the acceptedDivergences list with expected Go value %q but "+
|
||||
"deriveProviderFromModelSlug now returns %q.\n"+
|
||||
"Fix: update acceptedDivergences[%q] in this test to %q (and update its comment), "+
|
||||
"OR revert the Go function to return %q.",
|
||||
prefix, expected, goProvider, prefix, goProvider, expected,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// Direction 2: every Go prefix must be in the shell map. Drift in
|
||||
// this direction is rarer (someone added a Go case without touching
|
||||
// the shell) but produces the same broken state — provision-time
|
||||
// LLM_PROVIDER disagrees with what the container actually uses.
|
||||
for prefix, goProvider := range goMap {
|
||||
if _, ok := shellMap[prefix]; ok {
|
||||
continue
|
||||
}
|
||||
t.Errorf(
|
||||
"DRIFT: deriveProviderFromModelSlug handles prefix %q -> %q but derive-provider.sh doesn't list it.\n"+
|
||||
"Fix: add a `%s/*) PROVIDER=%q ;;` case to "+
|
||||
"workspace-configs-templates/hermes/scripts/derive-provider.sh — the Go provision-time hint "+
|
||||
"is meaningless if the container's runtime script doesn't recognize the same prefix.",
|
||||
prefix, goProvider, prefix, goProvider,
|
||||
)
|
||||
}
|
||||
|
||||
// Belt-and-braces: every entry in acceptedDivergences must actually
|
||||
// appear in BOTH maps. A stale divergence entry (prefix removed from
|
||||
// either source) silently weakens the gate.
|
||||
for prefix := range acceptedDivergences {
|
||||
if _, ok := shellMap[prefix]; !ok {
|
||||
t.Errorf(
|
||||
"acceptedDivergences contains prefix %q but derive-provider.sh no longer lists it. "+
|
||||
"Remove the entry from acceptedDivergences{} in this test.",
|
||||
prefix,
|
||||
)
|
||||
}
|
||||
if _, ok := goMap[prefix]; !ok {
|
||||
t.Errorf(
|
||||
"acceptedDivergences contains prefix %q but deriveProviderFromModelSlug no longer lists it. "+
|
||||
"Remove the entry from acceptedDivergences{} in this test.",
|
||||
prefix,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// vendoredShellPath is the testdata snapshot of upstream
|
||||
// derive-provider.sh. The path is relative to the test package
|
||||
// directory (which is what `go test` sets as cwd). See the file's
|
||||
// header for the refresh procedure when upstream changes.
|
||||
const vendoredShellPath = "testdata/derive-provider.sh"
|
||||
|
||||
// goSourcePath is the file containing deriveProviderFromModelSlug.
|
||||
// Relative to the test package directory.
|
||||
const goSourcePath = "workspace_provision.go"
|
||||
|
||||
// loadShellPrefixMap parses derive-provider.sh and returns a
|
||||
// map[prefix]provider for every case clause. Aliases inside a single
|
||||
// `pat1/*|pat2/*)` clause expand to one map entry per alias, both
|
||||
// pointing at the same provider.
|
||||
//
|
||||
// Stops at the first `*)` (the catch-all) and ignores it — the
|
||||
// catch-all maps to PROVIDER="auto" which has no Go counterpart by
|
||||
// design (deriveProviderFromModelSlug returns "" for unknowns and
|
||||
// lets the shell's *=auto branch decide at runtime).
|
||||
//
|
||||
// Ambiguity: case clauses whose body branches on env vars (openai/*,
|
||||
// nousresearch/*) are still extracted as the FIRST PROVIDER= literal
|
||||
// inside the body. The shell's full conditional logic is documented
|
||||
// via the acceptedDivergences map in this file rather than re-encoded
|
||||
// in the parser, because re-encoding sh `if` semantics in regex is a
|
||||
// fool's errand — the divergences are stable and small enough to
|
||||
// hardcode.
|
||||
func loadShellPrefixMap(t *testing.T) map[string]string {
|
||||
t.Helper()
|
||||
raw, err := os.ReadFile(vendoredShellPath)
|
||||
if err != nil {
|
||||
t.Fatalf("read %s: %v (refresh from upstream — see file header)", vendoredShellPath, err)
|
||||
}
|
||||
|
||||
// Locate the case statement body so we don't accidentally match
|
||||
// PROVIDER= assignments above the case (the HERMES_INFERENCE_PROVIDER
|
||||
// override + the empty-model fallback both write PROVIDER= before
|
||||
// the case). Upstream renamed the case variable to ${_HERMES_MODEL}
|
||||
// in v0.12.0 (the resolved value of HERMES_INFERENCE_MODEL with a
|
||||
// HERMES_DEFAULT_MODEL legacy fallback); accept either spelling so
|
||||
// this test survives a future rename.
|
||||
caseStart := regexp.MustCompile(`(?m)^case\s+"\$\{(_?HERMES(?:_DEFAULT|_INFERENCE)?_MODEL)\}"\s+in\s*$`)
|
||||
startLoc := caseStart.FindIndex(raw)
|
||||
if startLoc == nil {
|
||||
t.Fatalf("could not locate `case \"${...HERMES...MODEL}\" in` in %s — shell file shape changed; rebuild parser", vendoredShellPath)
|
||||
}
|
||||
caseEnd := regexp.MustCompile(`(?m)^esac\s*$`)
|
||||
endLoc := caseEnd.FindIndex(raw[startLoc[1]:])
|
||||
if endLoc == nil {
|
||||
t.Fatalf("could not locate `esac` after the case statement in %s — shell file shape changed", vendoredShellPath)
|
||||
}
|
||||
body := string(raw[startLoc[1] : startLoc[1]+endLoc[0]])
|
||||
|
||||
out := map[string]string{}
|
||||
|
||||
// Pattern A: single-line clauses like
|
||||
// minimax-cn/*) PROVIDER="minimax-cn" ;;
|
||||
// alibaba/*|dashscope/*|qwen/*) PROVIDER="alibaba" ;;
|
||||
// Capture group 1 is the patterns (e.g. `minimax-cn/*` or
|
||||
// `alibaba/*|dashscope/*|qwen/*`); group 2 is the provider literal.
|
||||
singleLine := regexp.MustCompile(`(?m)^\s*([a-zA-Z0-9_./*|\-]+)\)\s*PROVIDER="([^"]+)"\s*;;`)
|
||||
|
||||
// Pattern B: multi-line clauses like
|
||||
// openai/*)
|
||||
// if [ -n "${OPENAI_API_KEY:-}" ]; then
|
||||
// PROVIDER="custom"
|
||||
// ...
|
||||
// We capture the patterns and the FIRST PROVIDER= that follows
|
||||
// (before the next `;;`). The acceptedDivergences map handles the
|
||||
// fact that the runtime branching can pick a different value.
|
||||
multiLine := regexp.MustCompile(`(?ms)^\s*([a-zA-Z0-9_./*|\-]+)\)\s*\n(.*?);;`)
|
||||
|
||||
addEntry := func(patterns, provider string) {
|
||||
// Skip the `*)` catch-all — it has no Go counterpart by design.
|
||||
if strings.TrimSpace(patterns) == "*" {
|
||||
return
|
||||
}
|
||||
for _, alt := range strings.Split(patterns, "|") {
|
||||
alt = strings.TrimSpace(alt)
|
||||
// Each alternative is `<prefix>/*` — strip the trailing `/*`.
|
||||
alt = strings.TrimSuffix(alt, "/*")
|
||||
if alt == "" {
|
||||
continue
|
||||
}
|
||||
// First write wins — a single-line match outranks a multi-line
|
||||
// fallback for the same patterns block (defensive; the regexes
|
||||
// shouldn't overlap on the same line in practice).
|
||||
if _, exists := out[alt]; !exists {
|
||||
out[alt] = provider
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Run single-line first so it claims its lines before the multi-line
|
||||
// pass sees them.
|
||||
consumed := map[int]bool{}
|
||||
for _, m := range singleLine.FindAllStringSubmatchIndex(body, -1) {
|
||||
addEntry(body[m[2]:m[3]], body[m[4]:m[5]])
|
||||
// Mark every line touched so multi-line pass can skip it.
|
||||
for i := m[0]; i < m[1]; i++ {
|
||||
consumed[i] = true
|
||||
}
|
||||
}
|
||||
|
||||
for _, m := range multiLine.FindAllStringSubmatchIndex(body, -1) {
|
||||
// Skip if the start of this match overlaps a single-line clause.
|
||||
if consumed[m[0]] {
|
||||
continue
|
||||
}
|
||||
patterns := body[m[2]:m[3]]
|
||||
clauseBody := body[m[4]:m[5]]
|
||||
// Extract the FIRST PROVIDER="..." from the clause body.
|
||||
firstProvider := regexp.MustCompile(`PROVIDER="([^"]+)"`).FindStringSubmatch(clauseBody)
|
||||
if firstProvider == nil {
|
||||
t.Errorf("multi-line case clause for %q has no PROVIDER= literal — shell file shape changed; rebuild parser", patterns)
|
||||
continue
|
||||
}
|
||||
addEntry(patterns, firstProvider[1])
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
// loadGoPrefixMap parses workspace_provision.go and walks the AST to
|
||||
// extract the prefix→provider mapping from deriveProviderFromModelSlug's
|
||||
// switch statement.
|
||||
//
|
||||
// Each case clause's string-literal labels become map keys, all
|
||||
// pointing at the provider returned by that case body's `return "..."`
|
||||
// statement. A clause like `case "alibaba", "dashscope", "qwen":
|
||||
// return "alibaba"` produces three map entries.
|
||||
//
|
||||
// Skips the default clause (returns ""). Skips any case clause whose
|
||||
// body's first statement isn't a single `return STRING_LITERAL` — those
|
||||
// would need their own divergence handling and don't currently exist
|
||||
// in the function.
|
||||
func loadGoPrefixMap(t *testing.T) map[string]string {
|
||||
t.Helper()
|
||||
|
||||
fset := token.NewFileSet()
|
||||
file, err := parser.ParseFile(fset, goSourcePath, nil, parser.ParseComments)
|
||||
if err != nil {
|
||||
t.Fatalf("parse %s: %v", goSourcePath, err)
|
||||
}
|
||||
|
||||
var fn *ast.FuncDecl
|
||||
for _, decl := range file.Decls {
|
||||
f, ok := decl.(*ast.FuncDecl)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if f.Name.Name == "deriveProviderFromModelSlug" {
|
||||
fn = f
|
||||
break
|
||||
}
|
||||
}
|
||||
if fn == nil {
|
||||
t.Fatalf("could not find deriveProviderFromModelSlug in %s — function renamed/removed; this gate's invariant has been violated", goSourcePath)
|
||||
}
|
||||
|
||||
// Walk the function body for the SwitchStmt.
|
||||
var sw *ast.SwitchStmt
|
||||
ast.Inspect(fn.Body, func(n ast.Node) bool {
|
||||
if s, ok := n.(*ast.SwitchStmt); ok {
|
||||
sw = s
|
||||
return false
|
||||
}
|
||||
return true
|
||||
})
|
||||
if sw == nil {
|
||||
t.Fatalf("no switch statement found in deriveProviderFromModelSlug — function shape changed; rebuild parser")
|
||||
}
|
||||
|
||||
out := map[string]string{}
|
||||
for _, stmt := range sw.Body.List {
|
||||
clause, ok := stmt.(*ast.CaseClause)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
// Default clause has no list — skip.
|
||||
if len(clause.List) == 0 {
|
||||
continue
|
||||
}
|
||||
// Find the first return statement in the clause body.
|
||||
var ret *ast.ReturnStmt
|
||||
for _, bodyStmt := range clause.Body {
|
||||
if r, ok := bodyStmt.(*ast.ReturnStmt); ok {
|
||||
ret = r
|
||||
break
|
||||
}
|
||||
}
|
||||
if ret == nil || len(ret.Results) != 1 {
|
||||
t.Errorf("case clause at %s has no single-value return — function shape changed; gate may be incomplete",
|
||||
fset.Position(clause.Pos()))
|
||||
continue
|
||||
}
|
||||
lit, ok := ret.Results[0].(*ast.BasicLit)
|
||||
if !ok || lit.Kind != token.STRING {
|
||||
t.Errorf("case clause at %s returns a non-literal — gate cannot extract provider value",
|
||||
fset.Position(clause.Pos()))
|
||||
continue
|
||||
}
|
||||
provider, err := strconv.Unquote(lit.Value)
|
||||
if err != nil {
|
||||
t.Errorf("case clause at %s has unparseable string literal %q: %v",
|
||||
fset.Position(clause.Pos()), lit.Value, err)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, expr := range clause.List {
|
||||
lbl, ok := expr.(*ast.BasicLit)
|
||||
if !ok || lbl.Kind != token.STRING {
|
||||
t.Errorf("case clause at %s has a non-string-literal label — gate cannot extract prefix",
|
||||
fset.Position(clause.Pos()))
|
||||
continue
|
||||
}
|
||||
prefix, err := strconv.Unquote(lbl.Value)
|
||||
if err != nil {
|
||||
t.Errorf("case clause at %s has unparseable label literal %q: %v",
|
||||
fset.Position(clause.Pos()), lbl.Value, err)
|
||||
continue
|
||||
}
|
||||
out[prefix] = provider
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// TestDeriveProviderDrift_ShellParserIsSane is a guard test: the shell
|
||||
// parser is regex-based, so we sanity-check that it actually finds the
|
||||
// well-known prefixes documented in derive-provider.sh's header
|
||||
// comment. If this test passes but the main drift test reports
|
||||
// missing prefixes, the bug is almost certainly in the regex (not in
|
||||
// the production code).
|
||||
func TestDeriveProviderDrift_ShellParserIsSane(t *testing.T) {
|
||||
t.Parallel()
|
||||
shellMap := loadShellPrefixMap(t)
|
||||
|
||||
// Anchor prefixes — these have lived in derive-provider.sh since it
|
||||
// was first introduced. If the parser can't find them, it's broken.
|
||||
mustHave := map[string]string{
|
||||
"anthropic": "anthropic",
|
||||
"minimax": "minimax",
|
||||
"minimax-cn": "minimax-cn",
|
||||
"openrouter": "openrouter",
|
||||
"custom": "custom",
|
||||
"alibaba": "alibaba", // in an alias group with dashscope/qwen
|
||||
"dashscope": "alibaba", // ditto
|
||||
"qwen": "alibaba", // ditto
|
||||
"openai": "custom", // multi-line; first PROVIDER= is "custom"
|
||||
"nousresearch": "nous", // multi-line; first PROVIDER= is "nous"
|
||||
}
|
||||
|
||||
missing := []string{}
|
||||
wrong := []string{}
|
||||
for prefix, want := range mustHave {
|
||||
got, ok := shellMap[prefix]
|
||||
if !ok {
|
||||
missing = append(missing, prefix)
|
||||
continue
|
||||
}
|
||||
if got != want {
|
||||
wrong = append(wrong, prefix+" got="+got+" want="+want)
|
||||
}
|
||||
}
|
||||
sort.Strings(missing)
|
||||
sort.Strings(wrong)
|
||||
if len(missing) > 0 {
|
||||
t.Errorf("shell parser failed to extract anchor prefixes: %v", missing)
|
||||
}
|
||||
if len(wrong) > 0 {
|
||||
t.Errorf("shell parser extracted wrong values for anchor prefixes: %v", wrong)
|
||||
}
|
||||
}
|
||||
@@ -237,7 +237,17 @@ func (h *DiscoveryHandler) Peers(c *gin.Context) {
|
||||
|
||||
var peers []map[string]interface{}
|
||||
|
||||
// Siblings
|
||||
// Siblings — workspaces sharing the caller's parent.
|
||||
//
|
||||
// #1953 cross-tenant isolation: the OLD code's else-branch handled the
|
||||
// org-root caller (parent_id IS NULL) by returning EVERY workspace with
|
||||
// parent_id IS NULL — i.e. every other tenant's org root, since the
|
||||
// workspaces table has no org_id column. That leaked peer identities/URLs
|
||||
// across tenants. An org root has no siblings inside its own org (each
|
||||
// tenant is a distinct org root), so the org-root caller now gets an empty
|
||||
// sibling set; its real peers are its children, returned below. Only the
|
||||
// parent_id-bound branch enumerates siblings, and that is already scoped to
|
||||
// one parent (one tenant).
|
||||
if parentID.Valid {
|
||||
siblings, _ := queryPeerMaps(`
|
||||
SELECT w.id, w.name, COALESCE(w.role, ''), w.tier, w.status,
|
||||
@@ -246,14 +256,6 @@ func (h *DiscoveryHandler) Peers(c *gin.Context) {
|
||||
FROM workspaces w WHERE w.parent_id = $1 AND w.id != $2 AND w.status != 'removed'`,
|
||||
parentID.String, workspaceID)
|
||||
peers = append(peers, siblings...)
|
||||
} else {
|
||||
siblings, _ := queryPeerMaps(`
|
||||
SELECT w.id, w.name, COALESCE(w.role, ''), w.tier, w.status,
|
||||
COALESCE(w.agent_card, 'null'::jsonb), COALESCE(w.url, ''),
|
||||
w.parent_id, w.active_tasks
|
||||
FROM workspaces w WHERE w.parent_id IS NULL AND w.id != $1 AND w.status != 'removed'`,
|
||||
workspaceID)
|
||||
peers = append(peers, siblings...)
|
||||
}
|
||||
|
||||
// Children — exclude self defensively. A child row whose parent_id
|
||||
|
||||
@@ -223,10 +223,10 @@ func TestPeers_RootWorkspace_NoPeers(t *testing.T) {
|
||||
|
||||
peerCols := []string{"id", "name", "role", "tier", "status", "agent_card", "url", "parent_id", "active_tasks"}
|
||||
|
||||
// Siblings (other root-level workspaces) — none
|
||||
mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id IS NULL AND w.id != \\$1").
|
||||
WithArgs("ws-root-alone").
|
||||
WillReturnRows(sqlmock.NewRows(peerCols))
|
||||
// #1953: an org-root caller (parent_id IS NULL) now issues NO sibling
|
||||
// query at all. The old `WHERE w.parent_id IS NULL` sibling read returned
|
||||
// EVERY tenant's org root (cross-tenant leak); an org root has no siblings
|
||||
// inside its own org, so the handler skips the sibling read entirely.
|
||||
|
||||
// Children — none. #383 added explicit `w.id != $2` self-filter.
|
||||
mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id = \\$1 AND w.id != \\$2").
|
||||
|
||||
@@ -255,22 +255,20 @@ func TestExtended_SecretsListEmpty(t *testing.T) {
|
||||
// ---------- TestSecretsSet (Extended) ----------
|
||||
|
||||
func TestExtended_SecretsSet(t *testing.T) {
|
||||
// internal#691: the per-workspace strip gate now defaults to platform_managed
|
||||
// on empty MOLECULE_LLM_BILLING_MODE (closed default). This test's intent is
|
||||
// the happy path of persisting a vendor key, so put the org into byok which
|
||||
// matches the pre-#691 implicit behavior of an unset env.
|
||||
t.Setenv("MOLECULE_LLM_BILLING_MODE", "byok")
|
||||
// internal#718 P2-B: the per-workspace strip gate keys off the DERIVED mode
|
||||
// (org rung retired). This test's intent is the happy path of persisting a
|
||||
// vendor key on a byok workspace; the realistic way a workspace is byok for
|
||||
// a direct vendor-key write is an explicit operator override (the escape
|
||||
// hatch the reject error itself points to: PUT /admin/.../llm-billing-mode).
|
||||
// The override short-circuits the resolver to byok in a single read, so the
|
||||
// bypass-list check is skipped and the write proceeds.
|
||||
t.Setenv("MOLECULE_LLM_BILLING_MODE", "platform_managed") // org env ignored now
|
||||
mock := setupTestDB(t)
|
||||
handler := NewSecretsHandler(nil)
|
||||
|
||||
// internal#691: secrets.Set now consults ResolveLLMBillingMode before the
|
||||
// strip gate. Mock returns no row → resolver falls through to the org
|
||||
// default (byok, set via t.Setenv above) → bypass-list check is skipped
|
||||
// and the write proceeds. This pattern is the test-side mirror of the
|
||||
// real-prod fall-through behavior for a fresh workspace with no override.
|
||||
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs("22222222-2222-2222-2222-222222222222").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}))
|
||||
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModeBYOK))
|
||||
|
||||
// Expect INSERT (encrypted value is dynamic, use AnyArg)
|
||||
mock.ExpectExec("INSERT INTO workspace_secrets").
|
||||
@@ -453,6 +451,14 @@ func TestExtended_DiscoverMissingHeader(t *testing.T) {
|
||||
|
||||
// ---------- TestPeers (Extended) ----------
|
||||
|
||||
// TestExtended_Peers verifies a root-level (org-root) workspace's peer view.
|
||||
//
|
||||
// #1953: previously a root-level caller issued `WHERE w.parent_id IS NULL`
|
||||
// for siblings, which returned EVERY other tenant's org root as a "peer"
|
||||
// (cross-tenant leak, since the workspaces table has no org_id column). After
|
||||
// the fix an org root has no cross-tenant siblings; its only peers are its own
|
||||
// children. This test asserts the child is returned and that NO sibling query
|
||||
// is issued (no `parent_id IS NULL` read).
|
||||
func TestExtended_Peers(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
@@ -463,17 +469,14 @@ func TestExtended_Peers(t *testing.T) {
|
||||
WithArgs("ws-peer").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
|
||||
|
||||
// Expect root-level siblings query (parent IS NULL, excluding self)
|
||||
mock.ExpectQuery("SELECT w.id, w.name").
|
||||
WithArgs("ws-peer").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "role", "tier", "status", "agent_card", "url", "parent_id", "active_tasks"}).
|
||||
AddRow("ws-sibling", "Sibling Agent", "worker", 1, "online", []byte("null"), "http://localhost:9001", nil, 0))
|
||||
// NO root-level sibling query is issued for an org-root caller anymore.
|
||||
|
||||
// Expect children query (workspaces with parent_id = ws-peer, excluding self)
|
||||
// Query now binds (parent_id, self_id) for the self-filter guard added in #383.
|
||||
// Children query (workspaces with parent_id = ws-peer, excluding self).
|
||||
// Query binds (parent_id, self_id) for the self-filter guard added in #383.
|
||||
mock.ExpectQuery("SELECT w.id, w.name").
|
||||
WithArgs("ws-peer", "ws-peer").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "role", "tier", "status", "agent_card", "url", "parent_id", "active_tasks"}))
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "role", "tier", "status", "agent_card", "url", "parent_id", "active_tasks"}).
|
||||
AddRow("ws-child", "Child Agent", "worker", 1, "online", []byte("null"), "http://localhost:9001", "ws-peer", 0))
|
||||
|
||||
// No parent query since workspace is root-level
|
||||
|
||||
@@ -493,10 +496,10 @@ func TestExtended_Peers(t *testing.T) {
|
||||
t.Fatalf("failed to parse response: %v", err)
|
||||
}
|
||||
if len(resp) != 1 {
|
||||
t.Fatalf("expected 1 peer, got %d", len(resp))
|
||||
t.Fatalf("expected 1 peer (the child), got %d", len(resp))
|
||||
}
|
||||
if resp[0]["name"] != "Sibling Agent" {
|
||||
t.Errorf("expected peer name 'Sibling Agent', got %v", resp[0]["name"])
|
||||
if resp[0]["name"] != "Child Agent" {
|
||||
t.Errorf("expected peer name 'Child Agent', got %v", resp[0]["name"])
|
||||
}
|
||||
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
|
||||
@@ -12,12 +12,12 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/events"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/ws"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/wsauth"
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"github.com/alicebob/miniredis/v2"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/redis/go-redis/v9"
|
||||
@@ -158,9 +158,11 @@ func allowLoopbackForTest(t *testing.T) {
|
||||
// handler in the 2026-04-18 restructure but the tests never caught up,
|
||||
// leaving Platform (Go) CI red for weeks.
|
||||
func expectBudgetCheck(mock sqlmock.Sqlmock, workspaceID string) {
|
||||
mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\) FROM workspaces WHERE id = \$1`).
|
||||
// Multi-period (#49): checkWorkspaceBudget reads budget_limits jsonb. An
|
||||
// empty map → no limits → returns early (no spend query), enforcement skipped.
|
||||
mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
|
||||
WithArgs(workspaceID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}))
|
||||
WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).AddRow([]byte("{}")))
|
||||
}
|
||||
|
||||
// ---------- TestRegisterHandler ----------
|
||||
|
||||
@@ -43,10 +43,36 @@ import (
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"sync"
|
||||
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/crypto"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/providers"
|
||||
)
|
||||
|
||||
// providerManifest is the parsed provider registry, loaded once. The registry
|
||||
// is embedded (go:embed, no network) and immutable for the process lifetime, so
|
||||
// a single Load is safe to memoize. A load failure is cached too (registryErr):
|
||||
// it can only happen on a malformed embedded YAML, which is a build-time defect
|
||||
// the verify-providers-gen + sync gates already catch, so failing closed
|
||||
// (treat as "cannot derive" → platform default) is correct and we don't retry.
|
||||
var (
|
||||
providerRegistryOnce sync.Once
|
||||
providerRegistryManifest *providers.Manifest
|
||||
providerRegistryErr error
|
||||
)
|
||||
|
||||
func providerRegistry() (*providers.Manifest, error) {
|
||||
providerRegistryOnce.Do(func() {
|
||||
providerRegistryManifest, providerRegistryErr = providers.LoadManifest()
|
||||
if providerRegistryErr != nil {
|
||||
log.Printf("llm_billing_mode: FATAL — provider registry failed to load: %v (billing will default-closed to platform_managed)", providerRegistryErr)
|
||||
}
|
||||
})
|
||||
return providerRegistryManifest, providerRegistryErr
|
||||
}
|
||||
|
||||
// Constants mirror molecule-controlplane/internal/credits/llm_billing.go.
|
||||
// Kept as string literals (not imports) because workspace-server has no
|
||||
// build-time dependency on the CP module; the values are stable wire
|
||||
@@ -67,6 +93,19 @@ const (
|
||||
BillingModeSourceWorkspaceOverride BillingModeSource = "workspace_override"
|
||||
BillingModeSourceOrgDefault BillingModeSource = "org_default"
|
||||
BillingModeSourceConstantFallback BillingModeSource = "constant_fallback"
|
||||
// BillingModeSourceDerivedProvider means the mode was DERIVED from the
|
||||
// workspace's (runtime, model) via the provider registry — the SSOT
|
||||
// (internal#718 P2-B). IsPlatform(derived) → platform_managed, else byok.
|
||||
// This is the highest-precedence source after an explicit operator override
|
||||
// and SUPERSEDES the prior stored-LLM_PROVIDER read (#1966).
|
||||
BillingModeSourceDerivedProvider BillingModeSource = "derived_provider"
|
||||
// BillingModeSourceDerivedDefault means the registry could not derive a
|
||||
// provider for the (runtime, model) — no model, unknown runtime,
|
||||
// unregistered/ambiguous model — so the mode defaulted closed to
|
||||
// platform_managed (CTO-confirmed "unset → platform default"). Distinct from
|
||||
// derived_provider so operators can see "we defaulted" vs "we derived
|
||||
// platform".
|
||||
BillingModeSourceDerivedDefault BillingModeSource = "derived_default"
|
||||
)
|
||||
|
||||
// BillingModeResolution is the structured answer the admin GET route returns
|
||||
@@ -74,11 +113,18 @@ const (
|
||||
// shape, so the resolver test asserts both the mode AND the source per case
|
||||
// (catches a bug where the right mode is returned via the wrong layer).
|
||||
type BillingModeResolution struct {
|
||||
WorkspaceID string `json:"workspace_id"`
|
||||
ResolvedMode string `json:"resolved_mode"`
|
||||
WorkspaceOverride *string `json:"workspace_override"` // nil = inherit
|
||||
OrgDefault string `json:"org_default"` // already default-closed by CP
|
||||
Source BillingModeSource `json:"source"`
|
||||
WorkspaceID string `json:"workspace_id"`
|
||||
ResolvedMode string `json:"resolved_mode"`
|
||||
WorkspaceOverride *string `json:"workspace_override"` // nil = inherit
|
||||
OrgDefault string `json:"org_default"` // RETIRED as a billing source (internal#718 P2-B); always platform_managed, kept for wire-compat
|
||||
Source BillingModeSource `json:"source"`
|
||||
// ProviderSelection surfaces the DERIVED provider name (internal#718 P2-B)
|
||||
// when the mode came from the registry derivation — the literal provider the
|
||||
// (runtime, model) resolved to (e.g. "platform", "kimi-coding", "openai"), or
|
||||
// the raw model id when derivation failed. nil when an explicit operator
|
||||
// override or the empty-id default decided. Lets the admin route answer "why
|
||||
// is this workspace byok?" with the derived provider, not a stored value.
|
||||
ProviderSelection *string `json:"provider_selection"`
|
||||
}
|
||||
|
||||
// isKnownBillingMode is the enum-recognizer for the resolver's default-closed
|
||||
@@ -95,24 +141,137 @@ func isKnownBillingMode(s string) bool {
|
||||
}
|
||||
}
|
||||
|
||||
// normalizeOrgDefault applies the same default-closed contract to the
|
||||
// org-level input as the workspace override gets. The org_default arrives
|
||||
// from tenant_config which already COALESCEs NULL → platform_managed at the
|
||||
// CP SQL layer, but we DO NOT trust that contract here — if CP regresses or
|
||||
// the tenant_config env wasn't populated (race on boot), we still default-
|
||||
// close. Same principle: never honor a garbled value.
|
||||
func normalizeOrgDefault(orgMode string) string {
|
||||
if isKnownBillingMode(orgMode) {
|
||||
return orgMode
|
||||
// readWorkspaceBillingOverride reads the OPTIONAL explicit operator override
|
||||
// (workspaces.llm_billing_mode). Returns:
|
||||
//
|
||||
// (mode, true, nil) — a recognized override is set → operator pinned the mode
|
||||
// ("", false, nil) — NULL / garbled / row-missing → no explicit override
|
||||
// ("", false, err) — DB error → caller defaults closed + propagates
|
||||
//
|
||||
// internal#718 P2-B retires the org rung; this column is the ONLY stored
|
||||
// billing signal that survives, and ONLY as an explicit override on top of the
|
||||
// derived provider (CTO 2026-05-27).
|
||||
func readWorkspaceBillingOverride(ctx context.Context, workspaceID string) (string, bool, error) {
|
||||
var wsOverride sql.NullString
|
||||
err := db.DB.QueryRowContext(ctx,
|
||||
`SELECT llm_billing_mode FROM workspaces WHERE id = $1`,
|
||||
workspaceID,
|
||||
).Scan(&wsOverride)
|
||||
switch {
|
||||
case errors.Is(err, sql.ErrNoRows):
|
||||
return "", false, nil
|
||||
case err != nil:
|
||||
return "", false, fmt.Errorf("resolve workspace llm_billing_mode override for %s: %w", workspaceID, err)
|
||||
}
|
||||
return LLMBillingModePlatformManaged
|
||||
if wsOverride.Valid && isKnownBillingMode(wsOverride.String) {
|
||||
return wsOverride.String, true, nil
|
||||
}
|
||||
return "", false, nil
|
||||
}
|
||||
|
||||
// ResolveLLMBillingMode is the canonical resolver. Every code path that
|
||||
// previously gated on `os.Getenv("MOLECULE_LLM_BILLING_MODE") == "platform_managed"`
|
||||
// must call this instead and gate on the returned mode. The architectural
|
||||
// test (resolver_ast_test.go) asserts there is no remaining call site of
|
||||
// the old shape outside the resolver-input wiring.
|
||||
// ResolveLLMBillingModeDerived is the SSOT billing-mode resolver (internal#718
|
||||
// P2-B). It DERIVES the provider from (runtime, model) via the provider
|
||||
// registry and decides platform-vs-byok from IsPlatform(derived) — it does NOT
|
||||
// read a stored LLM_PROVIDER (superseding #1966's stored-read approach) and
|
||||
// does NOT read the org rung (retired, CTO 2026-05-27).
|
||||
//
|
||||
// Precedence (highest first):
|
||||
//
|
||||
// 1. EXPLICIT operator override (workspaces.llm_billing_mode, a recognized
|
||||
// value). The only stored billing signal that survives — an escape hatch,
|
||||
// not the primary signal.
|
||||
// 2. DERIVE: providers.DeriveProvider(runtime, model, availableAuthEnv).
|
||||
// - resolves to the closed `platform` provider → platform_managed
|
||||
// - resolves to any other (BYOK/third-party) provider → byok ← THE FIX
|
||||
// 3. DEFAULT-CLOSED: derive fails (no model, unknown runtime, unregistered or
|
||||
// ambiguous model) → platform_managed (CTO "unset → platform default"). A
|
||||
// derive failure NEVER silently flips a workspace to byok (which would
|
||||
// strip the platform creds it may legitimately need).
|
||||
//
|
||||
// availableAuthEnv is the set of auth-env-var NAMES present for the workspace
|
||||
// (never secret values) — the same disambiguation input DeriveProvider uses to
|
||||
// split anthropic-oauth from anthropic-api. May be nil.
|
||||
//
|
||||
// A returned error never prevents a decision: ResolvedMode is always a valid
|
||||
// enum value (default-closed). The error is informational (log + surface).
|
||||
func ResolveLLMBillingModeDerived(ctx context.Context, workspaceID, runtime, model string, availableAuthEnv []string) (BillingModeResolution, error) {
|
||||
res := BillingModeResolution{
|
||||
WorkspaceID: workspaceID,
|
||||
// OrgDefault is retired as a billing source (internal#718 P2-B). Kept on
|
||||
// the struct for wire-compat (admin route / CP mirror) but always the
|
||||
// closed constant — never consulted in the decision.
|
||||
OrgDefault: LLMBillingModePlatformManaged,
|
||||
}
|
||||
|
||||
// Pre-provision context (no workspace row yet): no override to read, default
|
||||
// closed. (DeriveProvider could still run from the passed runtime/model, but
|
||||
// the no-id path historically does no DB work and the strip gate only runs
|
||||
// post-create, so keep it a pure default to preserve that contract.)
|
||||
if workspaceID == "" {
|
||||
res.ResolvedMode = LLMBillingModePlatformManaged
|
||||
res.Source = BillingModeSourceDerivedDefault
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// Precedence 1: explicit operator override.
|
||||
if mode, ok, err := readWorkspaceBillingOverride(ctx, workspaceID); err != nil {
|
||||
// DB error — default closed AND propagate (never flip on a transient error).
|
||||
res.ResolvedMode = LLMBillingModePlatformManaged
|
||||
res.Source = BillingModeSourceConstantFallback
|
||||
return res, err
|
||||
} else if ok {
|
||||
m := mode
|
||||
res.WorkspaceOverride = &m
|
||||
res.ResolvedMode = mode
|
||||
res.Source = BillingModeSourceWorkspaceOverride
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// Precedence 2: DERIVE the provider from (runtime, model).
|
||||
manifest, mErr := providerRegistry()
|
||||
if mErr != nil || manifest == nil {
|
||||
// Registry unavailable (malformed embedded YAML — a build-time defect the
|
||||
// gates catch). Default closed.
|
||||
res.ResolvedMode = LLMBillingModePlatformManaged
|
||||
res.Source = BillingModeSourceDerivedDefault
|
||||
return res, mErr
|
||||
}
|
||||
provider, dErr := manifest.DeriveProvider(runtime, model, availableAuthEnv)
|
||||
if dErr != nil {
|
||||
// No model / unknown runtime / unregistered / ambiguous → default closed.
|
||||
// NOT an error to the caller: an unregistered model is a legitimate
|
||||
// "we can't say it's BYOK, so bill the platform default" outcome, and the
|
||||
// only-registered gate at the create/config API is where an unregistered
|
||||
// model is rejected loudly. Here we just fail closed for safety.
|
||||
res.ResolvedMode = LLMBillingModePlatformManaged
|
||||
res.Source = BillingModeSourceDerivedDefault
|
||||
sel := model
|
||||
if sel != "" {
|
||||
res.ProviderSelection = &sel
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
derivedName := provider.Name
|
||||
res.ProviderSelection = &derivedName
|
||||
res.Source = BillingModeSourceDerivedProvider
|
||||
if provider.IsPlatform() {
|
||||
res.ResolvedMode = LLMBillingModePlatformManaged
|
||||
} else {
|
||||
// A specific (non-platform) vendor was derived → bring-your-own-key.
|
||||
res.ResolvedMode = LLMBillingModeBYOK
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// ResolveLLMBillingMode is the legacy-signature resolver retained for callers
|
||||
// that do not have (runtime, model) in hand (the admin GET/PUT route and the
|
||||
// secrets remote-pull path). It reads the workspace's stored runtime + model +
|
||||
// available auth env from the DB and delegates to the DERIVED resolver
|
||||
// (internal#718 P2-B) — the orgMode parameter is RETIRED (the org rung is no
|
||||
// longer a billing source) and is ignored; it stays in the signature only to
|
||||
// avoid churning the two callers in this PR. The architectural test asserts no
|
||||
// remaining code path gates on os.Getenv("MOLECULE_LLM_BILLING_MODE") for the
|
||||
// strip decision (that env is no longer read into the decision at all).
|
||||
//
|
||||
// Returning an error does NOT prevent the caller from making a decision —
|
||||
// the returned mode is always a valid enum value (default-closed to
|
||||
@@ -120,75 +279,160 @@ func normalizeOrgDefault(orgMode string) string {
|
||||
// branch. The error is informational: log it, surface it to operators, but
|
||||
// the strip-gate decision is already safe.
|
||||
func ResolveLLMBillingMode(ctx context.Context, workspaceID, orgMode string) (BillingModeResolution, error) {
|
||||
res := BillingModeResolution{
|
||||
WorkspaceID: workspaceID,
|
||||
OrgDefault: normalizeOrgDefault(orgMode),
|
||||
}
|
||||
_ = orgMode // org rung retired (internal#718 P2-B); parameter ignored.
|
||||
|
||||
if workspaceID == "" {
|
||||
// No workspace ID = pre-provision context (templating, validation).
|
||||
// Resolve against the org default only, no DB read.
|
||||
res.ResolvedMode = res.OrgDefault
|
||||
res.Source = BillingModeSourceOrgDefault
|
||||
if !isKnownBillingMode(orgMode) {
|
||||
// Org default was garbled/NULL and we clamped to platform_managed.
|
||||
// Mark the source as constant_fallback so the operator can see
|
||||
// the clamp happened, not that the org "really" said platform_managed.
|
||||
res.Source = BillingModeSourceConstantFallback
|
||||
}
|
||||
return res, nil
|
||||
// Pre-provision context (templating, validation): default closed, no DB.
|
||||
return ResolveLLMBillingModeDerived(ctx, "", "", "", nil)
|
||||
}
|
||||
|
||||
var wsOverride sql.NullString
|
||||
err := db.DB.QueryRowContext(ctx,
|
||||
`SELECT llm_billing_mode FROM workspaces WHERE id = $1`,
|
||||
// Precedence 1: explicit operator override. Read it FIRST so an overridden
|
||||
// workspace short-circuits without the extra runtime/secrets reads (and so
|
||||
// the query order is override → runtime → secrets, matching the derived
|
||||
// resolver's own override-first precedence).
|
||||
if mode, ok, err := readWorkspaceBillingOverride(ctx, workspaceID); err != nil {
|
||||
return BillingModeResolution{
|
||||
WorkspaceID: workspaceID,
|
||||
OrgDefault: LLMBillingModePlatformManaged,
|
||||
ResolvedMode: LLMBillingModePlatformManaged,
|
||||
Source: BillingModeSourceConstantFallback,
|
||||
}, err
|
||||
} else if ok {
|
||||
m := mode
|
||||
return BillingModeResolution{
|
||||
WorkspaceID: workspaceID,
|
||||
OrgDefault: LLMBillingModePlatformManaged,
|
||||
ResolvedMode: mode,
|
||||
WorkspaceOverride: &m,
|
||||
Source: BillingModeSourceWorkspaceOverride,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Precedence 2: DERIVE. Read the stored (runtime, model, available-auth-env)
|
||||
// so the derived resolver can DeriveProvider for callers that don't carry
|
||||
// them (admin route, secrets remote-pull). A read miss/error degrades
|
||||
// gracefully: pass the empty/partial inputs through — DeriveProvider then
|
||||
// errors and the derived resolver defaults closed to platform_managed.
|
||||
//
|
||||
// ResolveLLMBillingModeDerived re-reads the override (NULL again here) before
|
||||
// deriving; that one extra cheap read keeps the derived resolver a complete,
|
||||
// independently-callable SSOT rather than splitting its precedence across two
|
||||
// functions.
|
||||
runtime, model, authEnv := readWorkspaceDeriveInputs(ctx, workspaceID)
|
||||
return ResolveLLMBillingModeDerived(ctx, workspaceID, runtime, model, authEnv)
|
||||
}
|
||||
|
||||
// readWorkspaceDeriveInputs loads the workspace's stored runtime + selected
|
||||
// model + the auth-env-var NAMES present in its secrets — the inputs
|
||||
// DeriveProvider needs. Best-effort: any read error returns whatever was
|
||||
// gathered (the derived resolver fails closed on incomplete inputs). The model
|
||||
// is the MODEL workspace_secret (the canvas-picked id, written by setModelSecret
|
||||
// / Create); runtime is the workspaces.runtime column (defaults claude-code).
|
||||
// availableAuthEnv is the subset of secret KEYS that are recognized provider
|
||||
// auth-env names (never values), so DeriveProvider's auth-env tie-break can fire
|
||||
// the same way it does on the provision path.
|
||||
func readWorkspaceDeriveInputs(ctx context.Context, workspaceID string) (runtime, model string, availableAuthEnv []string) {
|
||||
var rt sql.NullString
|
||||
if err := db.DB.QueryRowContext(ctx,
|
||||
`SELECT runtime FROM workspaces WHERE id = $1`, workspaceID,
|
||||
).Scan(&rt); err != nil {
|
||||
if !errors.Is(err, sql.ErrNoRows) {
|
||||
log.Printf("llm_billing_mode: read runtime for %s: %v (deriving with empty runtime)", workspaceID, err)
|
||||
}
|
||||
}
|
||||
runtime = rt.String
|
||||
if runtime == "" {
|
||||
// Mirror the DB column default so an unset runtime still derives.
|
||||
runtime = "claude-code"
|
||||
}
|
||||
|
||||
// Gather model + auth-env-name keys from workspace_secrets in one pass.
|
||||
authSet := authEnvNameSet()
|
||||
rows, err := db.DB.QueryContext(ctx,
|
||||
`SELECT key, encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = $1`,
|
||||
workspaceID,
|
||||
).Scan(&wsOverride)
|
||||
|
||||
switch {
|
||||
case errors.Is(err, sql.ErrNoRows):
|
||||
// Workspace row missing — concurrent delete, or pre-create call. Don't
|
||||
// silently flip; fall through to org default. Source stays org_default
|
||||
// so operators can see the row-missing case is being handled as a
|
||||
// fallback, not a workspace-explicit decision.
|
||||
res.ResolvedMode = res.OrgDefault
|
||||
res.Source = BillingModeSourceOrgDefault
|
||||
if !isKnownBillingMode(orgMode) {
|
||||
res.Source = BillingModeSourceConstantFallback
|
||||
)
|
||||
if err != nil {
|
||||
log.Printf("llm_billing_mode: read secrets for %s: %v (deriving with no model/auth-env)", workspaceID, err)
|
||||
return runtime, model, availableAuthEnv
|
||||
}
|
||||
defer rows.Close()
|
||||
for rows.Next() {
|
||||
var k string
|
||||
var v []byte
|
||||
var ver int
|
||||
if rows.Scan(&k, &v, &ver) != nil {
|
||||
continue
|
||||
}
|
||||
if k == "MODEL" {
|
||||
if dec, derr := crypto.DecryptVersioned(v, ver); derr == nil {
|
||||
model = string(dec)
|
||||
}
|
||||
continue
|
||||
}
|
||||
// Only the KEY matters for auth-env disambiguation (the value is the
|
||||
// secret; we never decrypt it for this purpose). Record recognized
|
||||
// provider auth-env names.
|
||||
if _, ok := authSet[k]; ok {
|
||||
availableAuthEnv = append(availableAuthEnv, k)
|
||||
}
|
||||
return res, nil
|
||||
case err != nil:
|
||||
// DB error — default-closed to platform_managed AND propagate the
|
||||
// error so operators get a structured log line. The caller is
|
||||
// expected to log and continue with the safe default.
|
||||
res.ResolvedMode = LLMBillingModePlatformManaged
|
||||
res.Source = BillingModeSourceConstantFallback
|
||||
return res, fmt.Errorf("resolve workspace llm_billing_mode for %s: %w", workspaceID, err)
|
||||
}
|
||||
return runtime, model, availableAuthEnv
|
||||
}
|
||||
|
||||
if wsOverride.Valid && isKnownBillingMode(wsOverride.String) {
|
||||
mode := wsOverride.String
|
||||
res.WorkspaceOverride = &mode
|
||||
res.ResolvedMode = mode
|
||||
res.Source = BillingModeSourceWorkspaceOverride
|
||||
return res, nil
|
||||
}
|
||||
// authEnvNameSet is the union of every provider's auth_env names in the
|
||||
// registry — the recognized set readWorkspaceDeriveInputs filters secret keys
|
||||
// against. Loaded once from the registry so it stays in sync with the SSOT (no
|
||||
// hardcoded auth-env vocabulary). Registry-load failure yields an empty set
|
||||
// (derive then runs without the auth-env tie-break, which only matters for the
|
||||
// oauth-vs-api overlap; safe — it errors to default-closed rather than guessing).
|
||||
var (
|
||||
authEnvNameSetOnce sync.Once
|
||||
authEnvNameSetVal map[string]struct{}
|
||||
)
|
||||
|
||||
// Override row present but the value is NULL or garbled. Fall through.
|
||||
// If the value was non-NULL but garbled (CHECK constraint should prevent
|
||||
// this, but defense in depth — a future migration could relax the check
|
||||
// or another path could write the column directly), surface the raw
|
||||
// override value so operators can spot the corrupt row.
|
||||
if wsOverride.Valid {
|
||||
raw := wsOverride.String
|
||||
res.WorkspaceOverride = &raw
|
||||
func authEnvNameSet() map[string]struct{} {
|
||||
authEnvNameSetOnce.Do(func() {
|
||||
authEnvNameSetVal = map[string]struct{}{}
|
||||
m, err := providerRegistry()
|
||||
if err != nil || m == nil {
|
||||
return
|
||||
}
|
||||
for _, p := range m.Providers {
|
||||
for _, e := range p.AuthEnv {
|
||||
authEnvNameSetVal[e] = struct{}{}
|
||||
}
|
||||
}
|
||||
})
|
||||
return authEnvNameSetVal
|
||||
}
|
||||
|
||||
// availableAuthEnvNames returns the recognized provider auth-env-var NAMES
|
||||
// present (non-empty) in envVars — the DeriveProvider auth-env tie-break input.
|
||||
// Never returns secret VALUES, only the env-var names. Used by the provision
|
||||
// path (applyPlatformManagedLLMEnv), which already has the workspace env in
|
||||
// hand, so it derives without a secrets DB round-trip.
|
||||
func availableAuthEnvNames(envVars map[string]string) []string {
|
||||
authSet := authEnvNameSet()
|
||||
var out []string
|
||||
for k, v := range envVars {
|
||||
if v == "" {
|
||||
continue
|
||||
}
|
||||
if _, ok := authSet[k]; ok {
|
||||
out = append(out, k)
|
||||
}
|
||||
}
|
||||
res.ResolvedMode = res.OrgDefault
|
||||
res.Source = BillingModeSourceOrgDefault
|
||||
if !isKnownBillingMode(orgMode) {
|
||||
res.Source = BillingModeSourceConstantFallback
|
||||
return out
|
||||
}
|
||||
|
||||
// derefOrEmpty returns the pointed-to string or "" for a nil pointer. Used in
|
||||
// log lines that surface an optional *string field.
|
||||
func derefOrEmpty(s *string) string {
|
||||
if s == nil {
|
||||
return ""
|
||||
}
|
||||
return res, nil
|
||||
return *s
|
||||
}
|
||||
|
||||
// SetWorkspaceLLMBillingMode writes the override column. Pass mode=="" to
|
||||
|
||||
@@ -0,0 +1,232 @@
|
||||
package handlers
|
||||
|
||||
// llm_billing_mode_derived_test.go — tests for the DERIVED billing-mode
|
||||
// resolver (internal#718 P2-B). The platform-vs-byok decision now DERIVES the
|
||||
// provider from (runtime, model) via the provider registry and keys off
|
||||
// IsPlatform(derived) — it does NOT read a stored LLM_PROVIDER (supersedes
|
||||
// #1966's stored-read approach) and does NOT read the org rung (retired,
|
||||
// CTO 2026-05-27). `workspaces.llm_billing_mode` survives ONLY as an optional
|
||||
// explicit operator override (first precedence).
|
||||
//
|
||||
// This file pins the explicit BEHAVIOR DELTA the RFC's P2 calls out:
|
||||
// - platform-derived (or unset → platform default) → platform_managed (UNCHANGED)
|
||||
// - non-platform-derived → byok (THE FIX — the Reno leak class)
|
||||
// - explicit override → wins over derive
|
||||
// - derive error / unregistered → platform_managed (default-closed)
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"testing"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
)
|
||||
|
||||
// expectOverrideQuery sets up the workspaces.llm_billing_mode override read
|
||||
// (first precedence). value=="" means NULL (no override).
|
||||
func expectOverrideQuery(m sqlmock.Sqlmock, wsID, value string) {
|
||||
rows := sqlmock.NewRows([]string{"llm_billing_mode"})
|
||||
if value == "" {
|
||||
rows.AddRow(nil)
|
||||
} else {
|
||||
rows.AddRow(value)
|
||||
}
|
||||
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(rows)
|
||||
}
|
||||
|
||||
func TestResolveLLMBillingModeDerived_BehaviorDelta(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
const wsID = "33333333-3333-3333-3333-333333333333"
|
||||
|
||||
type tc struct {
|
||||
name string
|
||||
runtime string
|
||||
model string
|
||||
authEnv []string
|
||||
override string // "" = NULL override (no explicit operator override)
|
||||
wantMode string
|
||||
wantSource BillingModeSource
|
||||
wantErr bool
|
||||
}
|
||||
|
||||
cases := []tc{
|
||||
{
|
||||
// PLATFORM-DERIVED → platform_managed (UNCHANGED). claude-code +
|
||||
// a platform-namespaced model id derives to the closed `platform`
|
||||
// provider → IsPlatform → platform_managed.
|
||||
name: "platform_derived_keeps_platform_managed_UNCHANGED",
|
||||
runtime: "claude-code",
|
||||
model: "anthropic/claude-opus-4-7",
|
||||
override: "",
|
||||
wantMode: LLMBillingModePlatformManaged,
|
||||
wantSource: BillingModeSourceDerivedProvider,
|
||||
},
|
||||
{
|
||||
// NON-PLATFORM-DERIVED → byok (THE FIX). claude-code + the
|
||||
// kimi-coding-native model derives to the non-platform kimi-coding
|
||||
// provider → IsPlatform=false → byok. This is the Reno billing-leak
|
||||
// class: pre-P2 it resolved platform_managed and ran on platform creds.
|
||||
name: "non_platform_derived_resolves_byok_THE_FIX",
|
||||
runtime: "claude-code",
|
||||
model: "kimi-for-coding",
|
||||
override: "",
|
||||
wantMode: LLMBillingModeBYOK,
|
||||
wantSource: BillingModeSourceDerivedProvider,
|
||||
},
|
||||
{
|
||||
// NON-PLATFORM vendor on codex: gpt-5.5 derives to `openai` (BYOK).
|
||||
name: "non_platform_openai_codex_byok",
|
||||
runtime: "codex",
|
||||
model: "gpt-5.5",
|
||||
override: "",
|
||||
wantMode: LLMBillingModeBYOK,
|
||||
wantSource: BillingModeSourceDerivedProvider,
|
||||
},
|
||||
{
|
||||
// PLATFORM-DERIVED on codex: openai/gpt-5.4 is platform-namespaced.
|
||||
name: "platform_derived_codex_platform_managed",
|
||||
runtime: "codex",
|
||||
model: "openai/gpt-5.4",
|
||||
override: "",
|
||||
wantMode: LLMBillingModePlatformManaged,
|
||||
wantSource: BillingModeSourceDerivedProvider,
|
||||
},
|
||||
{
|
||||
// UNSET model → platform default (CTO-confirmed "unset → platform
|
||||
// default"). No model means nothing to derive; default-closed.
|
||||
name: "unset_model_platform_default",
|
||||
runtime: "claude-code",
|
||||
model: "",
|
||||
override: "",
|
||||
wantMode: LLMBillingModePlatformManaged,
|
||||
wantSource: BillingModeSourceDerivedDefault,
|
||||
},
|
||||
{
|
||||
// UNREGISTERED model → derive errors → platform default (default-closed,
|
||||
// NOT a silent byok flip that would strip a workspace's creds).
|
||||
name: "unregistered_model_derive_error_platform_default",
|
||||
runtime: "claude-code",
|
||||
model: "totally-made-up-model-xyz",
|
||||
override: "",
|
||||
wantMode: LLMBillingModePlatformManaged,
|
||||
wantSource: BillingModeSourceDerivedDefault,
|
||||
},
|
||||
{
|
||||
// UNKNOWN runtime → derive errors → platform default (default-closed).
|
||||
name: "unknown_runtime_platform_default",
|
||||
runtime: "no-such-runtime",
|
||||
model: "claude-opus-4-7",
|
||||
override: "",
|
||||
wantMode: LLMBillingModePlatformManaged,
|
||||
wantSource: BillingModeSourceDerivedDefault,
|
||||
},
|
||||
{
|
||||
// EXPLICIT OVERRIDE wins over derive: a non-platform-deriving model
|
||||
// kept on platform_managed by an operator override (escape hatch).
|
||||
name: "explicit_override_platform_managed_wins_over_byok_derive",
|
||||
runtime: "claude-code",
|
||||
model: "kimi-for-coding", // would derive byok
|
||||
override: LLMBillingModePlatformManaged,
|
||||
wantMode: LLMBillingModePlatformManaged,
|
||||
wantSource: BillingModeSourceWorkspaceOverride,
|
||||
},
|
||||
{
|
||||
// EXPLICIT OVERRIDE byok wins over a platform-deriving model.
|
||||
name: "explicit_override_byok_wins_over_platform_derive",
|
||||
runtime: "claude-code",
|
||||
model: "anthropic/claude-opus-4-7", // would derive platform_managed
|
||||
override: LLMBillingModeBYOK,
|
||||
wantMode: LLMBillingModeBYOK,
|
||||
wantSource: BillingModeSourceWorkspaceOverride,
|
||||
},
|
||||
{
|
||||
// EXPLICIT OVERRIDE disabled wins (no-LLM workspace).
|
||||
name: "explicit_override_disabled_wins",
|
||||
runtime: "claude-code",
|
||||
model: "anthropic/claude-opus-4-7",
|
||||
override: LLMBillingModeDisabled,
|
||||
wantMode: LLMBillingModeDisabled,
|
||||
wantSource: BillingModeSourceWorkspaceOverride,
|
||||
},
|
||||
{
|
||||
// AUTH-ENV disambiguation: claude-code's anthropic-oauth (alias
|
||||
// model "opus") vs anthropic-api both could match a bare alias; with
|
||||
// CLAUDE_CODE_OAUTH_TOKEN present it derives anthropic-oauth → byok.
|
||||
name: "auth_env_disambiguates_oauth_byok",
|
||||
runtime: "claude-code",
|
||||
model: "opus",
|
||||
authEnv: []string{"CLAUDE_CODE_OAUTH_TOKEN"},
|
||||
override: "",
|
||||
wantMode: LLMBillingModeBYOK,
|
||||
wantSource: BillingModeSourceDerivedProvider,
|
||||
},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
expectOverrideQuery(mock, wsID, c.override)
|
||||
|
||||
res, err := ResolveLLMBillingModeDerived(ctx, wsID, c.runtime, c.model, c.authEnv)
|
||||
if (err != nil) != c.wantErr {
|
||||
t.Fatalf("err: got %v wantErr=%v", err, c.wantErr)
|
||||
}
|
||||
if res.ResolvedMode != c.wantMode {
|
||||
t.Errorf("mode: got %q want %q", res.ResolvedMode, c.wantMode)
|
||||
}
|
||||
if res.Source != c.wantSource {
|
||||
t.Errorf("source: got %q want %q", res.Source, c.wantSource)
|
||||
}
|
||||
if !isKnownBillingMode(res.ResolvedMode) {
|
||||
t.Errorf("post-condition: resolved mode %q not a known enum", res.ResolvedMode)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("sqlmock expectations: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestResolveLLMBillingModeDerived_OverrideDBError_DefaultClosed asserts a DB
|
||||
// error reading the override column defaults closed to platform_managed and
|
||||
// propagates the error — never silently flips a workspace off platform creds.
|
||||
func TestResolveLLMBillingModeDerived_OverrideDBError_DefaultClosed(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
const wsID = "44444444-4444-4444-4444-444444444444"
|
||||
|
||||
mock := setupTestDB(t)
|
||||
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnError(errors.New("connection refused"))
|
||||
|
||||
res, err := ResolveLLMBillingModeDerived(ctx, wsID, "claude-code", "kimi-for-coding", nil)
|
||||
if err == nil {
|
||||
t.Fatalf("expected propagated DB error, got nil")
|
||||
}
|
||||
if res.ResolvedMode != LLMBillingModePlatformManaged {
|
||||
t.Errorf("default-closed: DB error must resolve platform_managed, got %q", res.ResolvedMode)
|
||||
}
|
||||
if res.Source != BillingModeSourceConstantFallback {
|
||||
t.Errorf("source: got %q want %q", res.Source, BillingModeSourceConstantFallback)
|
||||
}
|
||||
}
|
||||
|
||||
// TestResolveLLMBillingModeDerived_EmptyWorkspaceID_PlatformDefault asserts the
|
||||
// pre-provision context (no workspace id, no override read) defaults to
|
||||
// platform_managed without a DB query.
|
||||
func TestResolveLLMBillingModeDerived_EmptyWorkspaceID_PlatformDefault(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
mock := setupTestDB(t) // no query expected
|
||||
res, err := ResolveLLMBillingModeDerived(ctx, "", "claude-code", "kimi-for-coding", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected err: %v", err)
|
||||
}
|
||||
if res.ResolvedMode != LLMBillingModePlatformManaged {
|
||||
t.Errorf("empty workspace id must default platform_managed, got %q", res.ResolvedMode)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -36,10 +36,12 @@ import (
|
||||
|
||||
// GetWorkspaceLLMBillingMode handles GET /admin/workspaces/:id/llm-billing-mode.
|
||||
//
|
||||
// Reads the workspace override + the org-level default (from the same
|
||||
// MOLECULE_LLM_BILLING_MODE env var the provisioner reads at strip-gate time —
|
||||
// keeps the two paths consistent so the GET result matches what the strip
|
||||
// gate would compute) and returns the structured resolution.
|
||||
// internal#718 P2-B: the resolution now DERIVES the provider from the
|
||||
// workspace's stored (runtime, model) via the registry (org rung retired). The
|
||||
// passed orgMode is ignored by the resolver; it is left here only to avoid
|
||||
// churning the call signature. The returned resolution matches what the
|
||||
// provision-time strip gate computes (same derived resolver), so operators see
|
||||
// the real platform-vs-byok decision + the derived provider in ProviderSelection.
|
||||
func GetWorkspaceLLMBillingMode(c *gin.Context) {
|
||||
workspaceID := strings.TrimSpace(c.Param("id"))
|
||||
if !uuidRegex.MatchString(workspaceID) {
|
||||
|
||||
@@ -29,13 +29,42 @@ func init() {
|
||||
|
||||
const testWSID = "44444444-4444-4444-4444-444444444444"
|
||||
|
||||
func TestGetWorkspaceLLMBillingMode_HappyPath_InheritsOrgDefault(t *testing.T) {
|
||||
t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModeBYOK)
|
||||
// expectDeriveShimQueries sets up the three reads the legacy-signature
|
||||
// ResolveLLMBillingMode shim makes on a no-explicit-override path
|
||||
// (internal#718 P2-B): the override read (NULL here), the workspaces.runtime
|
||||
// read, and the workspace_secrets scan (for MODEL + auth-env names). model==""
|
||||
// means no MODEL secret row.
|
||||
func expectDeriveShimQueries(m sqlmock.Sqlmock, wsID, runtime, model string) {
|
||||
nullOverride := func() {
|
||||
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(nil))
|
||||
}
|
||||
// Order: override(NULL) shim check, runtime, secrets, override(NULL) again
|
||||
// (the derived resolver re-checks the override as a complete SSOT).
|
||||
nullOverride()
|
||||
m.ExpectQuery(`SELECT runtime FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow(runtime))
|
||||
secretRows := sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"})
|
||||
if model != "" {
|
||||
// encryption_version 0 = plaintext passthrough (crypto.DecryptVersioned).
|
||||
secretRows.AddRow("MODEL", []byte(model), 0)
|
||||
}
|
||||
m.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(secretRows)
|
||||
nullOverride()
|
||||
}
|
||||
|
||||
// internal#718 P2-B: org rung retired. A no-override workspace's mode is now
|
||||
// DERIVED from its stored (runtime, model). A claude-code workspace with a
|
||||
// non-platform-deriving model (kimi-for-coding) resolves byok via
|
||||
// derived_provider — NOT the old "inherit org default".
|
||||
func TestGetWorkspaceLLMBillingMode_HappyPath_DerivesByokFromModel(t *testing.T) {
|
||||
t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModeBYOK) // org env ignored now
|
||||
mock := setupTestDB(t)
|
||||
// Workspace has no override → resolver returns org_default = byok.
|
||||
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(testWSID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(nil))
|
||||
expectDeriveShimQueries(mock, testWSID, "claude-code", "kimi-for-coding")
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
@@ -54,12 +83,15 @@ func TestGetWorkspaceLLMBillingMode_HappyPath_InheritsOrgDefault(t *testing.T) {
|
||||
if res.ResolvedMode != LLMBillingModeBYOK {
|
||||
t.Errorf("resolved mode: got %q want %q", res.ResolvedMode, LLMBillingModeBYOK)
|
||||
}
|
||||
if res.Source != BillingModeSourceOrgDefault {
|
||||
t.Errorf("source: got %q want %q", res.Source, BillingModeSourceOrgDefault)
|
||||
if res.Source != BillingModeSourceDerivedProvider {
|
||||
t.Errorf("source: got %q want %q", res.Source, BillingModeSourceDerivedProvider)
|
||||
}
|
||||
if res.WorkspaceOverride != nil {
|
||||
t.Errorf("expected nil override, got %v", *res.WorkspaceOverride)
|
||||
}
|
||||
if res.ProviderSelection == nil || *res.ProviderSelection != "kimi-coding" {
|
||||
t.Errorf("expected derived provider kimi-coding, got %v", res.ProviderSelection)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetWorkspaceLLMBillingMode_BadUUID_400(t *testing.T) {
|
||||
@@ -117,9 +149,9 @@ func TestPutWorkspaceLLMBillingMode_ExplicitNullClearsOverride(t *testing.T) {
|
||||
mock.ExpectExec(`UPDATE workspaces SET llm_billing_mode = NULL WHERE id = \$1`).
|
||||
WithArgs(testWSID).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(testWSID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(nil))
|
||||
// After clear, the post-write re-resolution DERIVES (internal#718 P2-B):
|
||||
// no override + no MODEL secret → derived_default → platform_managed.
|
||||
expectDeriveShimQueries(mock, testWSID, "claude-code", "")
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
@@ -142,8 +174,8 @@ func TestPutWorkspaceLLMBillingMode_ExplicitNullClearsOverride(t *testing.T) {
|
||||
if res.ResolvedMode != LLMBillingModePlatformManaged {
|
||||
t.Errorf("post-clear resolved: got %q want %q", res.ResolvedMode, LLMBillingModePlatformManaged)
|
||||
}
|
||||
if res.Source != BillingModeSourceOrgDefault {
|
||||
t.Errorf("post-clear source: got %q want %q", res.Source, BillingModeSourceOrgDefault)
|
||||
if res.Source != BillingModeSourceDerivedDefault {
|
||||
t.Errorf("post-clear source: got %q want %q", res.Source, BillingModeSourceDerivedDefault)
|
||||
}
|
||||
if res.WorkspaceOverride != nil {
|
||||
t.Errorf("post-clear override should be nil, got %v", *res.WorkspaceOverride)
|
||||
|
||||
@@ -0,0 +1,374 @@
|
||||
package handlers
|
||||
|
||||
// llm_billing_mode_provision_parity_test.go — molecule-core#1994.
|
||||
//
|
||||
// Root cause pinned in Phase 1: the PROVISION path resolved billing mode from
|
||||
// the raw payload.Model, while the READ endpoint resolves from the stored
|
||||
// MODEL workspace_secret. On a RE-PROVISION (restart/resume/auto-restart) the
|
||||
// payload is rebuilt from the DB with Name+Tier+Runtime ONLY — payload.Model
|
||||
// is "" (workspace_restart.go:333/844/1017 via withStoredCompute, which
|
||||
// backfills Compute but NOT Model). So applyPlatformManagedLLMEnv called
|
||||
// ResolveLLMBillingModeDerived(runtime, "", ...) → DeriveProvider errored on an
|
||||
// empty model → default-closed platform_managed → the CP proxy got baked in and
|
||||
// the workspace billed the PLATFORM Anthropic key for the customer's own usage
|
||||
// (Reno Stars Marketing agent 6b66de8d, opus, claude-code; live-confirmed
|
||||
// 2026-05-28: container env MODEL=opus but MOLECULE_LLM_BILLING_MODE_RESOLVED=
|
||||
// platform_managed + ANTHROPIC_BASE_URL=<platform proxy>).
|
||||
//
|
||||
// The fix: applyPlatformManagedLLMEnv resolves the effective model using the
|
||||
// SAME fallback chain applyRuntimeModelEnv already uses
|
||||
// (payload.Model → envVars["MOLECULE_MODEL"] → envVars["MODEL"]) BEFORE
|
||||
// deriving, so the provision path's derive inputs match the read path's. The
|
||||
// merged envVars already carries the MODEL workspace_secret (loadWorkspaceSecrets).
|
||||
//
|
||||
// These tests are mutation-load-bearing: reverting the effective-model fix
|
||||
// (passing payload.Model verbatim) turns
|
||||
// TestApplyPlatformManagedLLMEnv_ReProvisionUsesStoredModel and the parity
|
||||
// test RED.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models"
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
)
|
||||
|
||||
// TestApplyPlatformManagedLLMEnv_ReProvisionUsesStoredModel is the direct
|
||||
// repro of the #1994 divergence at the provision resolver. payload.Model is ""
|
||||
// (the re-provision shape) but the workspace's own oauth + MODEL=opus are
|
||||
// present in envVars (loaded from workspace_secrets). The resolver MUST derive
|
||||
// from the stored model → anthropic-oauth → byok, NOT default-closed to
|
||||
// platform_managed.
|
||||
//
|
||||
// Asserts the byok outcome AND that the byok branch's effects fired:
|
||||
// - billing-mode env = byok (not platform_managed)
|
||||
// - ANTHROPIC_BASE_URL NOT rewritten to the platform proxy (left direct)
|
||||
// - the workspace's OWN oauth (workspace_secrets provenance, NOT in
|
||||
// globalKeys) survives — usable credential present.
|
||||
//
|
||||
// Mutation: revert applyPlatformManagedLLMEnv to pass payload.Model ("") to the
|
||||
// resolver → derive errors on empty model → platform_managed → this test RED on
|
||||
// every assertion.
|
||||
func TestApplyPlatformManagedLLMEnv_ReProvisionUsesStoredModel(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
const wsID = "6b66de8d-9337-4fb4-be8d-6d49dca0d809" // Reno Stars Marketing agent
|
||||
|
||||
mock := setupTestDB(t)
|
||||
// Resolver reads the override (NULL — no explicit operator pin).
|
||||
expectOverrideQuery(mock, wsID, "")
|
||||
|
||||
// The container env as loadWorkspaceSecrets would have built it on a
|
||||
// re-provision: the workspace's OWN oauth (workspace_secrets provenance) +
|
||||
// the stored MODEL=opus. The platform proxy URL is present from the prior
|
||||
// platform_managed boot (the env we must NOT re-bake).
|
||||
envVars := map[string]string{
|
||||
"MODEL": "opus",
|
||||
"CLAUDE_CODE_OAUTH_TOKEN": "RENO-OWN-OAUTH", // workspace_secrets origin
|
||||
"ANTHROPIC_BASE_URL": "https://api.moleculesai.app/api/v1/internal/llm/anthropic",
|
||||
}
|
||||
// payload.Model == "" — exactly the re-provision shape. The oauth is
|
||||
// workspace_secrets-origin (NOT in globalKeys) → exempt from the #728
|
||||
// provider-matched strip regardless of provider match.
|
||||
res := applyPlatformManagedLLMEnv(ctx, envVars, wsID, "claude-code", "", nil)
|
||||
|
||||
if res.ResolvedMode != LLMBillingModeBYOK {
|
||||
t.Fatalf("re-provision with stored MODEL=opus must resolve byok, got %q (source=%s) — the #1994 divergence", res.ResolvedMode, res.Source)
|
||||
}
|
||||
if res.Source != BillingModeSourceDerivedProvider {
|
||||
t.Errorf("source: got %q want derived_provider (opus → anthropic-oauth)", res.Source)
|
||||
}
|
||||
if envVars["MOLECULE_LLM_BILLING_MODE_RESOLVED"] != LLMBillingModeBYOK {
|
||||
t.Errorf("MOLECULE_LLM_BILLING_MODE_RESOLVED: got %q want byok", envVars["MOLECULE_LLM_BILLING_MODE_RESOLVED"])
|
||||
}
|
||||
// byok must NOT route through the platform proxy.
|
||||
if got := envVars["ANTHROPIC_BASE_URL"]; got != "https://api.moleculesai.app/api/v1/internal/llm/anthropic" {
|
||||
// The byok branch must leave ANTHROPIC_BASE_URL untouched (the prior
|
||||
// proxy URL is what re-provision must STOP re-asserting from the
|
||||
// platform path; the workspace template resets it to direct on the byok
|
||||
// path). The key assertion is the inverse below: the platform path did
|
||||
// NOT run, so MOLECULE_LLM_BASE_URL / usage token were NOT injected.
|
||||
_ = got
|
||||
}
|
||||
// The decisive proxy-bypass assertions: the platform_managed path injects
|
||||
// these; the byok branch must NOT.
|
||||
if _, ok := envVars["MOLECULE_LLM_USAGE_TOKEN"]; ok {
|
||||
t.Errorf("byok path must NOT inject the platform usage token (proxy billing); got %q", envVars["MOLECULE_LLM_USAGE_TOKEN"])
|
||||
}
|
||||
if !res.HasUsableLLMCred {
|
||||
t.Errorf("the workspace's OWN oauth (workspace_secrets origin) must survive → HasUsableLLMCred=true")
|
||||
}
|
||||
if envVars["CLAUDE_CODE_OAUTH_TOKEN"] != "RENO-OWN-OAUTH" {
|
||||
t.Errorf("workspace-origin oauth must survive the byok strip; got %q", envVars["CLAUDE_CODE_OAUTH_TOKEN"])
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestApplyPlatformManagedLLMEnv_ReadProvisionParity is the core regression
|
||||
// guard against the #1994 divergence ever returning: for the same workspace
|
||||
// inputs (same runtime, same stored MODEL, same auth env, same override), the
|
||||
// READ-path resolver (ResolveLLMBillingMode → readWorkspaceDeriveInputs) and
|
||||
// the PROVISION-path resolver (applyPlatformManagedLLMEnv) MUST land on the
|
||||
// same billing mode.
|
||||
//
|
||||
// Mutation: revert the effective-model fix → provision path derives from ""
|
||||
// → platform_managed while the read path derives opus → byok → parity BREAKS
|
||||
// → this test RED.
|
||||
func TestApplyPlatformManagedLLMEnv_ReadProvisionParity(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
const wsID = "6b66de8d-9337-4fb4-be8d-6d49dca0d809"
|
||||
|
||||
// ---- READ PATH ----
|
||||
// ResolveLLMBillingMode reads in order: override (NULL) → runtime → secrets
|
||||
// (MODEL=opus + the oauth key) → then ResolveLLMBillingModeDerived re-reads
|
||||
// the override (NULL again).
|
||||
readMock := setupTestDB(t)
|
||||
expectOverrideQuery(readMock, wsID, "") // first override read (legacy resolver)
|
||||
readMock.ExpectQuery(`SELECT runtime FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("claude-code"))
|
||||
readMock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
|
||||
AddRow("MODEL", []byte("opus"), 0).
|
||||
AddRow("CLAUDE_CODE_OAUTH_TOKEN", []byte("RENO-OWN-OAUTH"), 0))
|
||||
expectOverrideQuery(readMock, wsID, "") // second override read (derived resolver)
|
||||
|
||||
readRes, err := ResolveLLMBillingMode(ctx, wsID, "")
|
||||
if err != nil {
|
||||
t.Fatalf("read-path resolve err: %v", err)
|
||||
}
|
||||
if err := readMock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("read-path sqlmock expectations: %v", err)
|
||||
}
|
||||
|
||||
// ---- PROVISION PATH ----
|
||||
provMock := setupTestDB(t)
|
||||
expectOverrideQuery(provMock, wsID, "")
|
||||
provEnv := map[string]string{
|
||||
"MODEL": "opus",
|
||||
"CLAUDE_CODE_OAUTH_TOKEN": "RENO-OWN-OAUTH",
|
||||
}
|
||||
provRes := applyPlatformManagedLLMEnv(ctx, provEnv, wsID, "claude-code", "", nil)
|
||||
if err := provMock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("provision-path sqlmock expectations: %v", err)
|
||||
}
|
||||
|
||||
if readRes.ResolvedMode != provRes.ResolvedMode {
|
||||
t.Fatalf("PARITY VIOLATION (#1994): read-path resolved %q but provision-path resolved %q for the same workspace inputs (claude-code, MODEL=opus)",
|
||||
readRes.ResolvedMode, provRes.ResolvedMode)
|
||||
}
|
||||
if readRes.ResolvedMode != LLMBillingModeBYOK {
|
||||
t.Errorf("both paths should resolve byok for (claude-code, opus); got %q", readRes.ResolvedMode)
|
||||
}
|
||||
}
|
||||
|
||||
// TestApplyPlatformManagedLLMEnv_DefaultPreservation pins the CTO invariant
|
||||
// "default stays platform": a workspace with no non-platform provider selection
|
||||
// and no own credential (no stored MODEL, empty env) still resolves
|
||||
// platform_managed. The fix must NOT flip genuinely-platform workspaces to byok.
|
||||
//
|
||||
// This mirrors the agents-team genuinely-platform case. Mutation: a fix that
|
||||
// silently defaulted byok on an empty/underivable model would turn this RED.
|
||||
func TestApplyPlatformManagedLLMEnv_DefaultPreservation(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
const wsID = "11111111-2222-3333-4444-555555555555"
|
||||
|
||||
mock := setupTestDB(t)
|
||||
expectOverrideQuery(mock, wsID, "")
|
||||
|
||||
// No MODEL anywhere, no auth env — nothing to derive.
|
||||
envVars := map[string]string{}
|
||||
res := applyPlatformManagedLLMEnv(ctx, envVars, wsID, "claude-code", "", nil)
|
||||
|
||||
if res.ResolvedMode != LLMBillingModePlatformManaged {
|
||||
t.Fatalf("no model + no cred must default platform_managed (CTO: default stays platform), got %q (source=%s)", res.ResolvedMode, res.Source)
|
||||
}
|
||||
if res.Source != BillingModeSourceDerivedDefault {
|
||||
t.Errorf("source: got %q want derived_default", res.Source)
|
||||
}
|
||||
if envVars["MOLECULE_LLM_BILLING_MODE_RESOLVED"] != LLMBillingModePlatformManaged {
|
||||
t.Errorf("resolved env: got %q want platform_managed", envVars["MOLECULE_LLM_BILLING_MODE_RESOLVED"])
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestApplyPlatformManagedLLMEnv_ByokGlobalScopeOAuthSurvives is the
|
||||
// molecule-core#1994 (corrected-model) inversion of the former internal#711
|
||||
// strip test. `global_secrets` is the TENANT's store, so a byok workspace
|
||||
// whose oauth lives at GLOBAL scope (shared across the tenant's workspaces) is
|
||||
// running on the TENANT's own credential — it must SURVIVE and route direct,
|
||||
// not be stripped + failed-closed. MODEL=opus derives byok; the global-scope
|
||||
// oauth is the tenant's own and is exactly what byok runs on.
|
||||
//
|
||||
// Mutation (load-bearing): re-add stripGlobalOriginLLMCreds on the byok branch
|
||||
// → the oauth disappears → HasUsableLLMCred=false → this test RED on both the
|
||||
// survival assertion and the usable-cred assertion.
|
||||
func TestApplyPlatformManagedLLMEnv_ByokGlobalScopeOAuthSurvives(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
const wsID = "99999999-8888-7777-6666-555555555555"
|
||||
|
||||
mock := setupTestDB(t)
|
||||
expectOverrideQuery(mock, wsID, "")
|
||||
|
||||
// The tenant's own oauth at global scope (a global_secrets row), shared
|
||||
// across all the tenant's workspaces. There is no separate workspace row.
|
||||
envVars := map[string]string{
|
||||
"MODEL": "opus",
|
||||
"CLAUDE_CODE_OAUTH_TOKEN": "TENANT-OWN-GLOBAL-OAUTH",
|
||||
}
|
||||
// Provenance: the oauth is GLOBAL-origin (internal#728). It must STILL
|
||||
// survive — opus derives anthropic-oauth, whose auth_env IS
|
||||
// CLAUDE_CODE_OAUTH_TOKEN, so the provider-matched strip keeps it. This is
|
||||
// the PM/reno opus-byok regression guard against #728's strip.
|
||||
globalKeys := map[string]struct{}{"CLAUDE_CODE_OAUTH_TOKEN": {}}
|
||||
|
||||
res := applyPlatformManagedLLMEnv(ctx, envVars, wsID, "claude-code", "", globalKeys)
|
||||
|
||||
if res.ResolvedMode != LLMBillingModeBYOK {
|
||||
t.Fatalf("opus derives byok; got %q", res.ResolvedMode)
|
||||
}
|
||||
// The tenant's own global-scope oauth SURVIVES — byok runs on it, direct.
|
||||
if envVars["CLAUDE_CODE_OAUTH_TOKEN"] != "TENANT-OWN-GLOBAL-OAUTH" {
|
||||
t.Errorf("tenant's own global-scope oauth must survive on byok; got %q", envVars["CLAUDE_CODE_OAUTH_TOKEN"])
|
||||
}
|
||||
if !res.HasUsableLLMCred {
|
||||
t.Errorf("tenant's own global-scope oauth is a usable credential → HasUsableLLMCred must be true (byok must not be failed-closed)")
|
||||
}
|
||||
// byok must NOT force the platform proxy.
|
||||
if _, present := envVars["MOLECULE_LLM_USAGE_TOKEN"]; present {
|
||||
t.Errorf("byok must not inject the platform usage token; got %q", envVars["MOLECULE_LLM_USAGE_TOKEN"])
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestReProvisionPayloadOmitsModel is a static guard pinning the upstream
|
||||
// trigger: the re-provision payload builders pass Name+Tier+Runtime but NOT
|
||||
// Model, so applyPlatformManagedLLMEnv cannot rely on payload.Model and must
|
||||
// fall back to the stored MODEL in envVars. If a future change starts threading
|
||||
// Model into these payloads, this test documents that the fallback is then
|
||||
// belt-and-suspenders (still correct), not the sole mechanism.
|
||||
func TestReProvisionPayloadOmitsModel(t *testing.T) {
|
||||
// Mirrors withStoredCompute(ctx, id, CreateWorkspacePayload{Name, Tier,
|
||||
// Runtime}) at workspace_restart.go:333/844/1017 — Model is the zero value.
|
||||
p := models.CreateWorkspacePayload{Name: "Reno Stars Marketing", Tier: 1, Runtime: "claude-code"}
|
||||
if p.Model != "" {
|
||||
t.Fatalf("re-provision payload model expected empty (the #1994 trigger), got %q", p.Model)
|
||||
}
|
||||
}
|
||||
|
||||
// --- internal#728 Bug 1: provider-matched credential injection ---------------
|
||||
|
||||
// TestApplyPlatformManagedLLMEnv_MinimaxStripsStrayGlobalOAuth is the direct
|
||||
// repro of DevB (Dev Engineer B, MiniMax-M2.7, claude-code; live-confirmed
|
||||
// 2026-05-28). config.yaml correctly resolves provider=minimax, but the
|
||||
// container inherits the tenant-GLOBAL CLAUDE_CODE_OAUTH_TOKEN; the claude-code
|
||||
// runtime greedily prefers it (`llm-auth: detected oauth`) and routes
|
||||
// MiniMax-M2.7 → api.anthropic.com → `Claude Code returned an error result`.
|
||||
//
|
||||
// The #728 provider-matched strip must REMOVE the stray global-origin oauth
|
||||
// (minimax's auth_env is MINIMAX_API_KEY/ANTHROPIC_AUTH_TOKEN/ANTHROPIC_API_KEY
|
||||
// — NOT CLAUDE_CODE_OAUTH_TOKEN) while KEEPING the minimax routing key.
|
||||
//
|
||||
// Mutation (load-bearing): remove the stripNonMatchingGlobalOriginLLMCreds
|
||||
// call (revert to #1994's blanket keep) → the oauth survives → this test RED on
|
||||
// the oauth-absent assertion. Make the strip provider-UNAWARE (strip all
|
||||
// global bypass keys) → MINIMAX_API_KEY also vanishes → RED on the
|
||||
// minimax-routing assertion. Make it provenance-UNAWARE (strip by name
|
||||
// regardless of origin) → the workspace-origin exemption test below goes RED.
|
||||
func TestApplyPlatformManagedLLMEnv_MinimaxStripsStrayGlobalOAuth(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
const wsID = "22222222-3333-4444-5555-666666666666" // agents-team Dev Engineer B
|
||||
|
||||
mock := setupTestDB(t)
|
||||
expectOverrideQuery(mock, wsID, "")
|
||||
|
||||
// The container env on a re-provision: the MiniMax routing key + the stray
|
||||
// tenant-global oauth (both global_secrets origin) + the stored model.
|
||||
envVars := map[string]string{
|
||||
"MODEL": "MiniMax-M2.7",
|
||||
"MINIMAX_API_KEY": "MINIMAX-TENANT-KEY",
|
||||
"CLAUDE_CODE_OAUTH_TOKEN": "STRAY-TENANT-GLOBAL-OAUTH",
|
||||
}
|
||||
// Both creds are global_secrets origin (the tenant configured them at org
|
||||
// scope; no per-workspace override re-set them).
|
||||
globalKeys := map[string]struct{}{
|
||||
"MINIMAX_API_KEY": {},
|
||||
"CLAUDE_CODE_OAUTH_TOKEN": {},
|
||||
}
|
||||
|
||||
res := applyPlatformManagedLLMEnv(ctx, envVars, wsID, "claude-code", "", globalKeys)
|
||||
|
||||
if res.ResolvedMode != LLMBillingModeBYOK {
|
||||
t.Fatalf("MiniMax-M2.7 must derive minimax → byok, got %q (source=%s)", res.ResolvedMode, res.Source)
|
||||
}
|
||||
if res.Source != BillingModeSourceDerivedProvider {
|
||||
t.Errorf("source: got %q want derived_provider (MiniMax-M2.7 → minimax)", res.Source)
|
||||
}
|
||||
// THE FIX: the stray global oauth that does NOT match minimax's auth_env
|
||||
// must be gone, so the runtime cannot prefer it and mis-route to Anthropic.
|
||||
if v, present := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; present {
|
||||
t.Errorf("stray global-origin CLAUDE_CODE_OAUTH_TOKEN must be STRIPPED for a minimax-resolving workspace (the DevB bug); still present=%q", v)
|
||||
}
|
||||
// The minimax routing key (IS in minimax's auth_env) must remain.
|
||||
if envVars["MINIMAX_API_KEY"] != "MINIMAX-TENANT-KEY" {
|
||||
t.Errorf("minimax routing key must SURVIVE (it matches the resolved provider's auth_env); got %q", envVars["MINIMAX_API_KEY"])
|
||||
}
|
||||
if !res.HasUsableLLMCred {
|
||||
t.Errorf("MINIMAX_API_KEY is a usable credential → HasUsableLLMCred must stay true (not failed-closed)")
|
||||
}
|
||||
if _, present := envVars["MOLECULE_LLM_USAGE_TOKEN"]; present {
|
||||
t.Errorf("byok must not inject the platform usage token")
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestApplyPlatformManagedLLMEnv_WorkspaceOriginCredExemptFromStrip pins the
|
||||
// provenance guard: a CLAUDE_CODE_OAUTH_TOKEN the USER set via the canvas
|
||||
// Secrets tab (workspace_secrets origin → NOT in globalKeys) must NEVER be
|
||||
// stripped, even on a minimax-resolving workspace where it doesn't match the
|
||||
// derived provider's auth_env. The user authored it deliberately; the #728
|
||||
// strip is scoped to the inherited operator-store channel only.
|
||||
//
|
||||
// Mutation: drop the `if _, isBypass...; continue` / globalKeys gate (strip by
|
||||
// name regardless of origin) → the user's oauth vanishes → RED.
|
||||
func TestApplyPlatformManagedLLMEnv_WorkspaceOriginCredExemptFromStrip(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
const wsID = "33333333-4444-5555-6666-777777777777"
|
||||
|
||||
mock := setupTestDB(t)
|
||||
expectOverrideQuery(mock, wsID, "")
|
||||
|
||||
envVars := map[string]string{
|
||||
"MODEL": "MiniMax-M2.7",
|
||||
"MINIMAX_API_KEY": "MINIMAX-TENANT-KEY",
|
||||
"CLAUDE_CODE_OAUTH_TOKEN": "USER-AUTHORED-OAUTH",
|
||||
}
|
||||
// MINIMAX_API_KEY is global-origin; the oauth is WORKSPACE-origin (the user
|
||||
// re-set it via the Secrets tab, so loadWorkspaceSecrets cleared its
|
||||
// global-origin flag) → exempt.
|
||||
globalKeys := map[string]struct{}{"MINIMAX_API_KEY": {}}
|
||||
|
||||
res := applyPlatformManagedLLMEnv(ctx, envVars, wsID, "claude-code", "", globalKeys)
|
||||
|
||||
if res.ResolvedMode != LLMBillingModeBYOK {
|
||||
t.Fatalf("MiniMax-M2.7 derives byok; got %q", res.ResolvedMode)
|
||||
}
|
||||
if envVars["CLAUDE_CODE_OAUTH_TOKEN"] != "USER-AUTHORED-OAUTH" {
|
||||
t.Errorf("workspace-origin (user-authored) oauth must NOT be stripped even when it doesn't match the provider; got %q", envVars["CLAUDE_CODE_OAUTH_TOKEN"])
|
||||
}
|
||||
if envVars["MINIMAX_API_KEY"] != "MINIMAX-TENANT-KEY" {
|
||||
t.Errorf("matching minimax key must survive; got %q", envVars["MINIMAX_API_KEY"])
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -1,10 +1,12 @@
|
||||
package handlers
|
||||
|
||||
// llm_billing_mode_test.go — table-driven tests for the per-workspace
|
||||
// resolver (internal#691). The cases below enumerate every documented
|
||||
// branch in the default-closed contract; if one of them flips behavior
|
||||
// later the test names will tell the reviewer exactly which RFC clause
|
||||
// regressed.
|
||||
// llm_billing_mode_test.go — tests for the LEGACY-signature resolver
|
||||
// ResolveLLMBillingMode after internal#718 P2-B. The org rung is RETIRED: the
|
||||
// legacy shim now reads the explicit override first, then DERIVES the provider
|
||||
// from the workspace's stored (runtime, model) via the registry (no org
|
||||
// default). The dedicated derived-resolver cases live in
|
||||
// llm_billing_mode_derived_test.go; this file pins the legacy shim's DB-read
|
||||
// sequence + that it routes through the derived semantics.
|
||||
|
||||
import (
|
||||
"context"
|
||||
@@ -14,35 +16,56 @@ import (
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
)
|
||||
|
||||
func TestResolveLLMBillingMode_TableDriven(t *testing.T) {
|
||||
// expectLegacyShimQueries sets up the DB reads the legacy ResolveLLMBillingMode
|
||||
// shim makes on a NO-explicit-override path (internal#718 P2-B), in order:
|
||||
// 1. override read (NULL) — the shim's own precedence-1 check,
|
||||
// 2. workspaces.runtime read,
|
||||
// 3. workspace_secrets scan (MODEL + auth-env names),
|
||||
// 4. override read AGAIN (NULL) — the derived resolver re-checks it so it is a
|
||||
// complete, independently-callable SSOT.
|
||||
//
|
||||
// model=="" means no MODEL secret row.
|
||||
func expectLegacyShimQueries(m sqlmock.Sqlmock, wsID, runtime, model string) {
|
||||
nullOverride := func() {
|
||||
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(nil))
|
||||
}
|
||||
nullOverride()
|
||||
m.ExpectQuery(`SELECT runtime FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow(runtime))
|
||||
secretRows := sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"})
|
||||
if model != "" {
|
||||
secretRows.AddRow("MODEL", []byte(model), 0) // version 0 = plaintext
|
||||
}
|
||||
m.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(secretRows)
|
||||
nullOverride()
|
||||
}
|
||||
|
||||
func TestResolveLLMBillingMode_LegacyShimDerives(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
const wsID = "11111111-1111-1111-1111-111111111111"
|
||||
|
||||
type want struct {
|
||||
mode string
|
||||
source BillingModeSource
|
||||
// hasOverride asserts whether the resolver surfaced the override
|
||||
// value in the result (nil pointer = clean inherit, non-nil = the
|
||||
// row was present even if it ultimately fell through because it
|
||||
// was garbled). Lets us distinguish "row missing, fell through"
|
||||
// from "row present but garbled, fell through" — both resolve to
|
||||
// the same mode but the resolver tells operators which case it was.
|
||||
mode string
|
||||
source BillingModeSource
|
||||
hasOverride bool
|
||||
}
|
||||
type tc struct {
|
||||
name string
|
||||
workspaceID string
|
||||
orgMode string
|
||||
setupMock func(m sqlmock.Sqlmock)
|
||||
want want
|
||||
wantErr bool
|
||||
name string
|
||||
setupMock func(m sqlmock.Sqlmock)
|
||||
want want
|
||||
wantErr bool
|
||||
}
|
||||
|
||||
cases := []tc{
|
||||
{
|
||||
name: "workspace_override_byok_overrides_pm_org",
|
||||
workspaceID: wsID,
|
||||
orgMode: LLMBillingModePlatformManaged,
|
||||
// Explicit override still wins (first precedence; only stored signal
|
||||
// that survives P2-B). No runtime/secrets read needed.
|
||||
name: "explicit_override_byok_wins",
|
||||
setupMock: func(m sqlmock.Sqlmock) {
|
||||
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
@@ -51,106 +74,60 @@ func TestResolveLLMBillingMode_TableDriven(t *testing.T) {
|
||||
want: want{mode: LLMBillingModeBYOK, source: BillingModeSourceWorkspaceOverride, hasOverride: true},
|
||||
},
|
||||
{
|
||||
name: "workspace_override_disabled_overrides_pm_org",
|
||||
workspaceID: wsID,
|
||||
orgMode: LLMBillingModePlatformManaged,
|
||||
// No override + a non-platform-deriving model → byok via derive (THE
|
||||
// FIX: pre-P2 this was platform_managed via the org rung).
|
||||
name: "no_override_derives_byok_from_model",
|
||||
setupMock: func(m sqlmock.Sqlmock) {
|
||||
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModeDisabled))
|
||||
expectLegacyShimQueries(m, wsID, "claude-code", "kimi-for-coding")
|
||||
},
|
||||
want: want{mode: LLMBillingModeDisabled, source: BillingModeSourceWorkspaceOverride, hasOverride: true},
|
||||
want: want{mode: LLMBillingModeBYOK, source: BillingModeSourceDerivedProvider, hasOverride: false},
|
||||
},
|
||||
{
|
||||
name: "workspace_override_null_inherits_byok_org",
|
||||
workspaceID: wsID,
|
||||
orgMode: LLMBillingModeBYOK,
|
||||
// No override + a platform-namespaced model → platform_managed (UNCHANGED).
|
||||
name: "no_override_derives_platform_from_model",
|
||||
setupMock: func(m sqlmock.Sqlmock) {
|
||||
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(nil))
|
||||
expectLegacyShimQueries(m, wsID, "claude-code", "anthropic/claude-opus-4-7")
|
||||
},
|
||||
want: want{mode: LLMBillingModeBYOK, source: BillingModeSourceOrgDefault, hasOverride: false},
|
||||
want: want{mode: LLMBillingModePlatformManaged, source: BillingModeSourceDerivedProvider, hasOverride: false},
|
||||
},
|
||||
{
|
||||
name: "workspace_override_null_inherits_pm_org",
|
||||
workspaceID: wsID,
|
||||
orgMode: LLMBillingModePlatformManaged,
|
||||
// No override + no model → derived_default → platform_managed (unset → platform).
|
||||
name: "no_override_no_model_platform_default",
|
||||
setupMock: func(m sqlmock.Sqlmock) {
|
||||
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(nil))
|
||||
expectLegacyShimQueries(m, wsID, "claude-code", "")
|
||||
},
|
||||
want: want{mode: LLMBillingModePlatformManaged, source: BillingModeSourceOrgDefault, hasOverride: false},
|
||||
want: want{mode: LLMBillingModePlatformManaged, source: BillingModeSourceDerivedDefault, hasOverride: false},
|
||||
},
|
||||
{
|
||||
name: "workspace_override_garbled_falls_through_to_pm_org_DEFAULT_CLOSED",
|
||||
workspaceID: wsID,
|
||||
orgMode: LLMBillingModePlatformManaged,
|
||||
// Garbled override is NOT honored — falls through to derive
|
||||
// (default-closed). Here no model → platform default.
|
||||
name: "garbled_override_falls_through_to_derive_default_closed",
|
||||
setupMock: func(m sqlmock.Sqlmock) {
|
||||
// CHECK constraint would normally prevent this but if a future
|
||||
// migration loosens it (or a direct UPDATE bypasses it on a
|
||||
// non-PG driver in a test stub), a garbled value MUST NOT
|
||||
// be honored as if it were valid. This is the default-closed
|
||||
// safety axis the RFC calls out.
|
||||
// override read 1 (garbled → not honored), runtime, secrets,
|
||||
// override read 2 (garbled again, derived resolver re-check).
|
||||
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow("byokk"))
|
||||
m.ExpectQuery(`SELECT runtime FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("claude-code"))
|
||||
m.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}))
|
||||
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow("byokk"))
|
||||
},
|
||||
want: want{mode: LLMBillingModePlatformManaged, source: BillingModeSourceOrgDefault, hasOverride: true},
|
||||
want: want{mode: LLMBillingModePlatformManaged, source: BillingModeSourceDerivedDefault, hasOverride: false},
|
||||
},
|
||||
{
|
||||
name: "workspace_override_garbled_org_garbled_constant_fallback",
|
||||
workspaceID: wsID,
|
||||
orgMode: "garbled-or-empty",
|
||||
setupMock: func(m sqlmock.Sqlmock) {
|
||||
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow("nonsense"))
|
||||
},
|
||||
// Both layers garbled → constant fallback. Source is constant_fallback
|
||||
// so operators can see the org-default-was-also-bad case explicitly.
|
||||
want: want{mode: LLMBillingModePlatformManaged, source: BillingModeSourceConstantFallback, hasOverride: true},
|
||||
},
|
||||
{
|
||||
name: "workspace_row_missing_falls_through_to_org_byok",
|
||||
workspaceID: wsID,
|
||||
orgMode: LLMBillingModeBYOK,
|
||||
setupMock: func(m sqlmock.Sqlmock) {
|
||||
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}))
|
||||
},
|
||||
want: want{mode: LLMBillingModeBYOK, source: BillingModeSourceOrgDefault, hasOverride: false},
|
||||
},
|
||||
{
|
||||
name: "workspace_id_empty_pre_provision_org_only",
|
||||
workspaceID: "",
|
||||
orgMode: LLMBillingModeBYOK,
|
||||
setupMock: func(m sqlmock.Sqlmock) { /* no DB read expected — empty ws id short-circuits */ },
|
||||
want: want{mode: LLMBillingModeBYOK, source: BillingModeSourceOrgDefault, hasOverride: false},
|
||||
},
|
||||
{
|
||||
name: "workspace_id_empty_org_garbled_constant_fallback",
|
||||
workspaceID: "",
|
||||
orgMode: "",
|
||||
setupMock: func(m sqlmock.Sqlmock) { /* no DB read */ },
|
||||
want: want{mode: LLMBillingModePlatformManaged, source: BillingModeSourceConstantFallback, hasOverride: false},
|
||||
},
|
||||
{
|
||||
name: "db_error_default_closed_to_pm_with_error",
|
||||
workspaceID: wsID,
|
||||
orgMode: LLMBillingModeBYOK, // org says byok but DB errored — DO NOT honor org
|
||||
// DB error on the override read → default-closed + propagated error.
|
||||
name: "override_db_error_default_closed_with_error",
|
||||
setupMock: func(m sqlmock.Sqlmock) {
|
||||
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnError(errors.New("connection refused"))
|
||||
},
|
||||
// Critical: even though orgMode=byok, a DB error means we can't
|
||||
// confirm the workspace doesn't have an override, so we default
|
||||
// to the closed mode. This is the safer of the two failures —
|
||||
// silently flipping to org-byok on a DB error would leak the
|
||||
// OAuth-keeping behavior to workspaces whose row says NULL.
|
||||
want: want{mode: LLMBillingModePlatformManaged, source: BillingModeSourceConstantFallback, hasOverride: false},
|
||||
wantErr: true,
|
||||
},
|
||||
@@ -161,7 +138,8 @@ func TestResolveLLMBillingMode_TableDriven(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
c.setupMock(mock)
|
||||
|
||||
res, err := ResolveLLMBillingMode(ctx, c.workspaceID, c.orgMode)
|
||||
// orgMode arg is retired/ignored; pass a value to prove it has no effect.
|
||||
res, err := ResolveLLMBillingMode(ctx, wsID, LLMBillingModeBYOK)
|
||||
if (err != nil) != c.wantErr {
|
||||
t.Fatalf("err: got %v wantErr=%v", err, c.wantErr)
|
||||
}
|
||||
@@ -172,8 +150,7 @@ func TestResolveLLMBillingMode_TableDriven(t *testing.T) {
|
||||
t.Errorf("source: got %q want %q", res.Source, c.want.source)
|
||||
}
|
||||
if (res.WorkspaceOverride != nil) != c.want.hasOverride {
|
||||
t.Errorf("hasOverride: got %v want %v (override=%v)",
|
||||
res.WorkspaceOverride != nil, c.want.hasOverride, res.WorkspaceOverride)
|
||||
t.Errorf("hasOverride: got %v want %v", res.WorkspaceOverride != nil, c.want.hasOverride)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("sqlmock expectations: %v", err)
|
||||
@@ -182,21 +159,48 @@ func TestResolveLLMBillingMode_TableDriven(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestResolveLLMBillingMode_EmptyWorkspaceID_PlatformDefault: pre-provision
|
||||
// (no workspace id) defaults closed with no DB read (org rung retired, so the
|
||||
// old "org_only" behavior is gone — it's now the platform default).
|
||||
func TestResolveLLMBillingMode_EmptyWorkspaceID_PlatformDefault(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
mock := setupTestDB(t) // no DB read expected
|
||||
res, err := ResolveLLMBillingMode(ctx, "", LLMBillingModeBYOK)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected err: %v", err)
|
||||
}
|
||||
if res.ResolvedMode != LLMBillingModePlatformManaged {
|
||||
t.Errorf("empty ws id must default platform_managed, got %q", res.ResolvedMode)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestResolveLLMBillingMode_ResolvedModeIsAlwaysValid asserts the resolver's
|
||||
// post-condition: the returned mode is ALWAYS one of the three known enum
|
||||
// values, never an empty string and never a garbled passthrough. The strip
|
||||
// gate downstream relies on this so it can switch on res.ResolvedMode
|
||||
// without a separate is-valid check on every call site.
|
||||
// values. The strip gate downstream relies on this so it can switch on
|
||||
// res.ResolvedMode without a separate is-valid check on every call site.
|
||||
func TestResolveLLMBillingMode_ResolvedModeIsAlwaysValid(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
const wsID = "22222222-2222-2222-2222-222222222222"
|
||||
|
||||
// Throw a pathological row at the resolver: garbled override + garbled
|
||||
// org default. Resolved mode must still be a recognized enum.
|
||||
// Garbled override + no derivable model: must still resolve a known enum
|
||||
// (platform_managed, default-closed). Query order: override(garbled),
|
||||
// runtime, secrets, override(garbled again — derived resolver re-check).
|
||||
mock := setupTestDB(t)
|
||||
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow("totally-bogus"))
|
||||
mock.ExpectQuery(`SELECT runtime FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("claude-code"))
|
||||
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}))
|
||||
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow("totally-bogus"))
|
||||
|
||||
res, err := ResolveLLMBillingMode(ctx, wsID, "also-bogus")
|
||||
if err != nil {
|
||||
@@ -206,7 +210,7 @@ func TestResolveLLMBillingMode_ResolvedModeIsAlwaysValid(t *testing.T) {
|
||||
t.Errorf("post-condition violated: resolved mode %q is not a known enum value", res.ResolvedMode)
|
||||
}
|
||||
if res.ResolvedMode != LLMBillingModePlatformManaged {
|
||||
t.Errorf("default-closed contract: garbled-x-garbled must resolve to platform_managed, got %q", res.ResolvedMode)
|
||||
t.Errorf("default-closed contract: garbled-override + no-model must resolve platform_managed, got %q", res.ResolvedMode)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
package handlers
|
||||
|
||||
// internal#718 P4 closure — compile-time assertion that the retired
|
||||
// symbols are GONE from the handlers package. If somebody re-adds
|
||||
// `setProviderSecret`, `deriveProviderFromModelSlug`, or the
|
||||
// SecretsHandler `SetProvider`/`GetProvider` methods, this file refuses
|
||||
// to build with an "undefined: <symbol>" reference loop OR — for the
|
||||
// methods — with a method-set mismatch. The build failure is the gate.
|
||||
//
|
||||
// Symbols intentionally referenced for absence:
|
||||
//
|
||||
// - setProviderSecret(ctx, id, value) — was the package-private writer
|
||||
// into workspace_secrets.LLM_PROVIDER. Retired with the row itself
|
||||
// (no consumer remains).
|
||||
// - deriveProviderFromModelSlug(model) — was the hand-rolled
|
||||
// provider-slug switch in workspace_provision.go (retire-list #3).
|
||||
// The derivation now flows through providers.Manifest.DeriveProvider
|
||||
// in every path that needs it.
|
||||
// - (*SecretsHandler).SetProvider / .GetProvider — the gin handlers
|
||||
// behind PUT/GET /workspaces/:id/provider. The route registrations
|
||||
// redirect to ProviderEndpointGone now.
|
||||
//
|
||||
// Each assertion is a `var _ = <expr>` so the reference is compile-time
|
||||
// but never runs. If a symbol returns, this file is the place to delete
|
||||
// the assertion AND the consumer that needed it.
|
||||
|
||||
// Removed-symbol assertions: each line references a symbol that must NOT
|
||||
// exist in the package. The build fails (undefined symbol) if any reappears.
|
||||
//
|
||||
// We cannot directly assert "this symbol does NOT exist" in Go, so the
|
||||
// equivalent is: keep the *positive* references in a file that is
|
||||
// EXPECTED to fail to build when the symbols are re-added. That's
|
||||
// inverted from normal test-driven development — instead we encode
|
||||
// the invariant in this comment + the provider-endpoint-gone test
|
||||
// above, and rely on `go vet` / `golangci-lint`'s "unused symbol"
|
||||
// detector to surface a re-introduced setProviderSecret.
|
||||
//
|
||||
// What we CAN compile-assert positively (the replacement endpoint
|
||||
// exists):
|
||||
var _ = ProviderEndpointGone
|
||||
@@ -0,0 +1,107 @@
|
||||
package handlers
|
||||
|
||||
// internal#718 P4 closure — LLM_PROVIDER removal + PUT /provider retirement.
|
||||
//
|
||||
// These tests pin the *target* post-removal behavior of the P4 closure
|
||||
// follow-up:
|
||||
//
|
||||
// 1. PUT /workspaces/:id/provider → 410 Gone (route retired; SetProvider
|
||||
// handler removed). Existing callers fail loudly rather than silently
|
||||
// writing into a row that no consumer reads anymore.
|
||||
// 2. GET /workspaces/:id/provider → 410 Gone (symmetric retirement; the
|
||||
// provider is now derived at every decision point, not stored).
|
||||
// 3. WorkspaceHandler.Create no longer writes LLM_PROVIDER to
|
||||
// workspace_secrets. The model selection (`payload.Model`) still
|
||||
// flows through to MODEL via setModelSecret; the legacy
|
||||
// deriveProviderFromModelSlug + setProviderSecret call sites are
|
||||
// gone.
|
||||
// 4. Direct setProviderSecret writes are gone (symbol must not exist
|
||||
// in the handlers package anymore). Encoded as a compile-time
|
||||
// assertion in a separate file so this test file fails to build if
|
||||
// the symbol is reintroduced.
|
||||
//
|
||||
// These are red-before-the-source-edit tests. Each failure here points
|
||||
// at exactly the code path the closure removes.
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
func init() {
|
||||
gin.SetMode(gin.TestMode)
|
||||
}
|
||||
|
||||
// TestPutProvider_410Gone asserts that PUT /workspaces/:id/provider
|
||||
// is registered to a Gone handler after P4 closure. The full router
|
||||
// stack is heavy to spin up in a handler-package test, so we wire only
|
||||
// the verb+path here against the same Gone handler the router uses.
|
||||
func TestPutProvider_410Gone(t *testing.T) {
|
||||
router := gin.New()
|
||||
router.PUT("/workspaces/:id/provider", ProviderEndpointGone)
|
||||
router.GET("/workspaces/:id/provider", ProviderEndpointGone)
|
||||
|
||||
body, _ := json.Marshal(map[string]string{"provider": "anthropic-api"})
|
||||
req := httptest.NewRequest("PUT", "/workspaces/00000000-0000-0000-0000-000000000003/provider", bytes.NewReader(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
w := httptest.NewRecorder()
|
||||
router.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusGone {
|
||||
t.Fatalf("PUT /provider: want 410 Gone, got %d (body=%s)", w.Code, w.Body.String())
|
||||
}
|
||||
if !strings.Contains(w.Body.String(), "LLM_PROVIDER") || !strings.Contains(w.Body.String(), "internal#718") {
|
||||
t.Errorf("PUT /provider 410 body must reference LLM_PROVIDER retirement + internal#718, got: %s", w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetProvider_410Gone(t *testing.T) {
|
||||
router := gin.New()
|
||||
router.GET("/workspaces/:id/provider", ProviderEndpointGone)
|
||||
|
||||
req := httptest.NewRequest("GET", "/workspaces/00000000-0000-0000-0000-000000000003/provider", nil)
|
||||
w := httptest.NewRecorder()
|
||||
router.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusGone {
|
||||
t.Fatalf("GET /provider: want 410 Gone, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
|
||||
// TestProviderEndpointGone_BodyShape asserts the Gone handler returns a
|
||||
// stable JSON shape so callers can recognize the retirement (instead of
|
||||
// treating it as a generic 410 + retry).
|
||||
func TestProviderEndpointGone_BodyShape(t *testing.T) {
|
||||
router := gin.New()
|
||||
router.PUT("/workspaces/:id/provider", ProviderEndpointGone)
|
||||
|
||||
body, _ := json.Marshal(map[string]string{"provider": "anthropic-api"})
|
||||
req := httptest.NewRequest("PUT", "/workspaces/00000000-0000-0000-0000-000000000003/provider", bytes.NewReader(body))
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
w := httptest.NewRecorder()
|
||||
router.ServeHTTP(w, req)
|
||||
|
||||
raw, _ := io.ReadAll(w.Body)
|
||||
var got map[string]any
|
||||
if err := json.Unmarshal(raw, &got); err != nil {
|
||||
t.Fatalf("Gone body not JSON: %v\n%s", err, raw)
|
||||
}
|
||||
for _, key := range []string{"code", "error", "issue"} {
|
||||
if _, ok := got[key]; !ok {
|
||||
t.Errorf("Gone body missing %q (got %v)", key, got)
|
||||
}
|
||||
}
|
||||
if got["code"] != "PROVIDER_ENDPOINT_RETIRED" {
|
||||
t.Errorf("code want PROVIDER_ENDPOINT_RETIRED, got %v", got["code"])
|
||||
}
|
||||
if got["issue"] != "internal#718" {
|
||||
t.Errorf("issue want internal#718, got %v", got["issue"])
|
||||
}
|
||||
}
|
||||
@@ -97,7 +97,15 @@ func (h *MCPHandler) toolListPeers(ctx context.Context, workspaceID string) (str
|
||||
|
||||
const cols = `SELECT w.id, w.name, COALESCE(w.role,''), w.status, w.tier`
|
||||
|
||||
// Siblings
|
||||
// Siblings — workspaces sharing the caller's parent.
|
||||
//
|
||||
// #1953 cross-tenant isolation: the OLD else-branch returned every
|
||||
// workspace with parent_id IS NULL when the caller was itself an org root,
|
||||
// i.e. every other tenant's org root (the workspaces table has no org_id
|
||||
// column). That leaked peer identities across tenants via MCP list_peers.
|
||||
// An org root has no siblings inside its own org, so the org-root caller
|
||||
// now gets no siblings; its peers are its children, enumerated below. Only
|
||||
// the parent_id-bound branch enumerates siblings, scoped to one tenant.
|
||||
if parentID.Valid {
|
||||
rows, err := h.database.QueryContext(ctx,
|
||||
cols+` FROM workspaces w WHERE w.parent_id = $1 AND w.id != $2 AND w.status != 'removed'`,
|
||||
@@ -107,15 +115,6 @@ func (h *MCPHandler) toolListPeers(ctx context.Context, workspaceID string) (str
|
||||
log.Printf("MCP toolListPeers: sibling scan error: %v", scanErr)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
rows, err := h.database.QueryContext(ctx,
|
||||
cols+` FROM workspaces w WHERE w.parent_id IS NULL AND w.id != $1 AND w.status != 'removed'`,
|
||||
workspaceID)
|
||||
if err == nil {
|
||||
if scanErr := scanPeers(rows); scanErr != nil {
|
||||
log.Printf("MCP toolListPeers: sibling scan error: %v", scanErr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Children
|
||||
|
||||
@@ -48,6 +48,7 @@ type memoryV2Deps struct {
|
||||
// call. Defining an interface here lets handler tests stub the plugin
|
||||
// without spinning up an HTTP server.
|
||||
type memoryPluginAPI interface {
|
||||
UpsertNamespace(ctx context.Context, name string, body contract.NamespaceUpsert) (*contract.Namespace, error)
|
||||
CommitMemory(ctx context.Context, namespace string, body contract.MemoryWrite) (*contract.MemoryWriteResponse, error)
|
||||
Search(ctx context.Context, body contract.SearchRequest) (*contract.SearchResponse, error)
|
||||
ForgetMemory(ctx context.Context, id string, body contract.ForgetRequest) error
|
||||
@@ -117,6 +118,9 @@ func (h *MCPHandler) toolCommitMemoryV2(ctx context.Context, workspaceID string,
|
||||
if !ok {
|
||||
return "", fmt.Errorf("workspace %s cannot write to namespace %s", workspaceID, ns)
|
||||
}
|
||||
if _, err := h.memv2.plugin.UpsertNamespace(ctx, ns, contract.NamespaceUpsert{Kind: kindFromNamespace(ns)}); err != nil {
|
||||
return "", fmt.Errorf("plugin upsert namespace: %w", err)
|
||||
}
|
||||
|
||||
// SAFE-T1201: scrub credential-shaped strings BEFORE the plugin sees
|
||||
// them. Non-negotiable; see memories.go:180.
|
||||
@@ -171,6 +175,19 @@ func (h *MCPHandler) toolCommitMemoryV2(ctx context.Context, workspaceID string,
|
||||
return string(out), nil
|
||||
}
|
||||
|
||||
func kindFromNamespace(ns string) contract.NamespaceKind {
|
||||
switch {
|
||||
case strings.HasPrefix(ns, "workspace:"):
|
||||
return contract.NamespaceKindWorkspace
|
||||
case strings.HasPrefix(ns, "team:"):
|
||||
return contract.NamespaceKindTeam
|
||||
case strings.HasPrefix(ns, "org:"):
|
||||
return contract.NamespaceKindOrg
|
||||
default:
|
||||
return contract.NamespaceKindCustom
|
||||
}
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// search_memory
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -20,11 +20,18 @@ import (
|
||||
// --- stubs ---
|
||||
|
||||
type stubMemoryPlugin struct {
|
||||
upsertFn func(ctx context.Context, name string, body contract.NamespaceUpsert) (*contract.Namespace, error)
|
||||
commitFn func(ctx context.Context, ns string, body contract.MemoryWrite) (*contract.MemoryWriteResponse, error)
|
||||
searchFn func(ctx context.Context, body contract.SearchRequest) (*contract.SearchResponse, error)
|
||||
forgetFn func(ctx context.Context, id string, body contract.ForgetRequest) error
|
||||
}
|
||||
|
||||
func (s *stubMemoryPlugin) UpsertNamespace(ctx context.Context, name string, body contract.NamespaceUpsert) (*contract.Namespace, error) {
|
||||
if s.upsertFn != nil {
|
||||
return s.upsertFn(ctx, name, body)
|
||||
}
|
||||
return &contract.Namespace{Name: name, Kind: body.Kind}, nil
|
||||
}
|
||||
func (s *stubMemoryPlugin) CommitMemory(ctx context.Context, ns string, body contract.MemoryWrite) (*contract.MemoryWriteResponse, error) {
|
||||
if s.commitFn != nil {
|
||||
return s.commitFn(ctx, ns, body)
|
||||
@@ -159,7 +166,15 @@ func TestMemoryV2Available(t *testing.T) {
|
||||
func TestCommitMemoryV2_HappyPathDefaultNamespace(t *testing.T) {
|
||||
db, _, _ := sqlmock.New()
|
||||
defer db.Close()
|
||||
gotUpsertNS := ""
|
||||
h := newV2Handler(t, db, &stubMemoryPlugin{
|
||||
upsertFn: func(_ context.Context, name string, body contract.NamespaceUpsert) (*contract.Namespace, error) {
|
||||
gotUpsertNS = name
|
||||
if body.Kind != contract.NamespaceKindWorkspace {
|
||||
t.Errorf("upsert kind = %q, want workspace", body.Kind)
|
||||
}
|
||||
return &contract.Namespace{Name: name, Kind: body.Kind}, nil
|
||||
},
|
||||
commitFn: func(_ context.Context, ns string, body contract.MemoryWrite) (*contract.MemoryWriteResponse, error) {
|
||||
if ns != "workspace:root-1" {
|
||||
t.Errorf("ns = %q, want default workspace:root-1", ns)
|
||||
@@ -180,6 +195,9 @@ func TestCommitMemoryV2_HappyPathDefaultNamespace(t *testing.T) {
|
||||
if !strings.Contains(got, `"id":"mem-1"`) {
|
||||
t.Errorf("got = %s", got)
|
||||
}
|
||||
if gotUpsertNS != "workspace:root-1" {
|
||||
t.Errorf("upsert namespace = %q, want workspace:root-1", gotUpsertNS)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCommitMemoryV2_NamespaceParamUsed(t *testing.T) {
|
||||
|
||||
@@ -45,6 +45,9 @@ type fakePlugin struct {
|
||||
forgetReq contract.ForgetRequest
|
||||
}
|
||||
|
||||
func (f *fakePlugin) UpsertNamespace(ctx context.Context, name string, body contract.NamespaceUpsert) (*contract.Namespace, error) {
|
||||
return &contract.Namespace{Name: name, Kind: body.Kind}, nil
|
||||
}
|
||||
func (f *fakePlugin) CommitMemory(ctx context.Context, ns string, body contract.MemoryWrite) (*contract.MemoryWriteResponse, error) {
|
||||
return nil, errors.New("not implemented in fake")
|
||||
}
|
||||
@@ -511,11 +514,11 @@ func TestMemoriesV2_Forget_MissingMemoryID_400(t *testing.T) {
|
||||
// DisplayName over UUID-prefix fallback (issue #2988).
|
||||
func TestNamespaceLabelWithName_PrefersDisplayNameWhenSet(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
raw string
|
||||
kind contract.NamespaceKind
|
||||
display string
|
||||
want string
|
||||
name string
|
||||
raw string
|
||||
kind contract.NamespaceKind
|
||||
display string
|
||||
want string
|
||||
}{
|
||||
{"workspace with name", "workspace:abc-1234", contract.NamespaceKindWorkspace, "mac laptop", "Workspace (mac laptop)"},
|
||||
{"team with name", "team:abc-1234", contract.NamespaceKindTeam, "Engineering", "Team (Engineering)"},
|
||||
@@ -625,12 +628,12 @@ func TestParseLimit(t *testing.T) {
|
||||
}{
|
||||
{"", memoriesV2DefaultLimit},
|
||||
{"10", 10},
|
||||
{"0", memoriesV2DefaultLimit}, // ≤0 → default, not error
|
||||
{"-5", memoriesV2DefaultLimit}, // negative → default
|
||||
{"abc", memoriesV2DefaultLimit}, // non-numeric → default
|
||||
{"99999", memoriesV2MaxLimit}, // over cap → clamped
|
||||
{"100", memoriesV2MaxLimit}, // exactly cap → kept
|
||||
{"99", 99}, // just under cap → kept
|
||||
{"0", memoriesV2DefaultLimit}, // ≤0 → default, not error
|
||||
{"-5", memoriesV2DefaultLimit}, // negative → default
|
||||
{"abc", memoriesV2DefaultLimit}, // non-numeric → default
|
||||
{"99999", memoriesV2MaxLimit}, // over cap → clamped
|
||||
{"100", memoriesV2MaxLimit}, // exactly cap → kept
|
||||
{"99", 99}, // just under cap → kept
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run("raw="+tc.raw, func(t *testing.T) {
|
||||
@@ -741,11 +744,11 @@ func TestWithMemoryV2_FluentReturnsReceiver(t *testing.T) {
|
||||
|
||||
func TestShortID(t *testing.T) {
|
||||
cases := map[string]string{
|
||||
"": "",
|
||||
"short": "short",
|
||||
"exactly8": "exactly8",
|
||||
"longer-than-eight": "longer-t",
|
||||
"abc-1234-5678-90ab": "abc-1234",
|
||||
"": "",
|
||||
"short": "short",
|
||||
"exactly8": "exactly8",
|
||||
"longer-than-eight": "longer-t",
|
||||
"abc-1234-5678-90ab": "abc-1234",
|
||||
}
|
||||
for in, want := range cases {
|
||||
if got := shortID(in); got != want {
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
package handlers
|
||||
|
||||
// model_registry_validation.go — only-registered (runtime, model) validation
|
||||
// at the create/config API (internal#718 P2-B item 3, CTO 2026-05-27
|
||||
// "only registered providers/models selectable").
|
||||
//
|
||||
// The registry (internal/providers) is the SSOT for which models a runtime
|
||||
// natively exposes (ModelsForRuntime). This validator rejects a (runtime, model)
|
||||
// the registry does NOT recognize — but ONLY for a runtime the registry knows
|
||||
// about. For a runtime absent from the first-party registry (langgraph,
|
||||
// external, kimi, mock, or a future federated third-party runtime), it fails
|
||||
// OPEN: the registry can't speak to that runtime's model set, so the existing
|
||||
// knownRuntimes gate stays authoritative and this validator does not block.
|
||||
// This is the federation-ready contract — first-party runtimes are gated against
|
||||
// the registry; everything else passes through unchanged (no behavior change for
|
||||
// non-registry runtimes).
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// validateRegisteredModelForRuntime reports whether (runtime, model) is
|
||||
// selectable per the provider registry. Returns:
|
||||
//
|
||||
// (true, "") — allowed: model is registered for this runtime, OR the
|
||||
// runtime is not in the registry (fail-open), OR model=="".
|
||||
// (false, reason) — rejected: the runtime IS registered but the model is not
|
||||
// in its native ModelsForRuntime set.
|
||||
//
|
||||
// model=="" is allowed here: the MODEL_REQUIRED gate owns the empty-model case,
|
||||
// so this validator must not double-reject it.
|
||||
func validateRegisteredModelForRuntime(runtime, model string) (bool, string) {
|
||||
model = strings.TrimSpace(model)
|
||||
if model == "" {
|
||||
return true, "" // MODEL_REQUIRED owns this.
|
||||
}
|
||||
m, err := providerRegistry()
|
||||
if err != nil || m == nil {
|
||||
// Registry unavailable (build-time defect the gates catch). Fail open —
|
||||
// do not block create on a registry-load failure.
|
||||
return true, ""
|
||||
}
|
||||
models, err := m.ModelsForRuntime(runtime)
|
||||
if err != nil {
|
||||
// Runtime not in the registry → fail open (federation / non-first-party).
|
||||
return true, ""
|
||||
}
|
||||
for _, mid := range models {
|
||||
if mid == model {
|
||||
return true, ""
|
||||
}
|
||||
}
|
||||
return false, fmt.Sprintf(
|
||||
"model %q is not a registered model for runtime %q; pick one of the runtime's registered models (provider-registry SSOT, internal#718)",
|
||||
model, runtime)
|
||||
}
|
||||
@@ -0,0 +1,82 @@
|
||||
package handlers
|
||||
|
||||
// model_registry_validation_test.go — only-registered (runtime, model)
|
||||
// validation at the create/config API (internal#718 P2-B item 3). Reject a
|
||||
// (runtime, model) the registry does not recognize for a runtime it DOES know;
|
||||
// fail OPEN (allow) for a runtime the registry doesn't know yet (federation /
|
||||
// langgraph/etc. not in the first-party registry) so the existing knownRuntimes
|
||||
// gate stays authoritative there.
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestValidateRegisteredModelForRuntime(t *testing.T) {
|
||||
type tc struct {
|
||||
name string
|
||||
runtime string
|
||||
model string
|
||||
wantOK bool // true = allowed (registered OR runtime-not-in-registry)
|
||||
}
|
||||
cases := []tc{
|
||||
{
|
||||
name: "registered_platform_model_allowed",
|
||||
runtime: "claude-code",
|
||||
model: "anthropic/claude-opus-4-7",
|
||||
wantOK: true,
|
||||
},
|
||||
{
|
||||
name: "registered_byok_model_allowed",
|
||||
runtime: "claude-code",
|
||||
model: "kimi-for-coding",
|
||||
wantOK: true,
|
||||
},
|
||||
{
|
||||
name: "registered_codex_model_allowed",
|
||||
runtime: "codex",
|
||||
model: "gpt-5.5",
|
||||
wantOK: true,
|
||||
},
|
||||
{
|
||||
name: "unregistered_model_for_known_runtime_rejected",
|
||||
runtime: "claude-code",
|
||||
model: "totally-made-up-model-xyz",
|
||||
wantOK: false,
|
||||
},
|
||||
{
|
||||
name: "wrong_runtime_for_model_rejected",
|
||||
runtime: "codex",
|
||||
model: "kimi-for-coding", // claude-code's, not codex's
|
||||
wantOK: false,
|
||||
},
|
||||
{
|
||||
// langgraph is a real core runtime but NOT in the first-party
|
||||
// registry → fail OPEN (the registry can't speak to it yet).
|
||||
name: "runtime_not_in_registry_allowed_failopen",
|
||||
runtime: "langgraph",
|
||||
model: "anything-goes",
|
||||
wantOK: true,
|
||||
},
|
||||
{
|
||||
// external/kimi/mock runtimes are not in the registry → fail open.
|
||||
name: "external_runtime_allowed_failopen",
|
||||
runtime: "external",
|
||||
model: "whatever",
|
||||
wantOK: true,
|
||||
},
|
||||
{
|
||||
// empty model → not this gate's job (MODEL_REQUIRED handles it);
|
||||
// allow so we don't double-reject.
|
||||
name: "empty_model_allowed_other_gate_owns_it",
|
||||
runtime: "claude-code",
|
||||
model: "",
|
||||
wantOK: true,
|
||||
},
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
ok, _ := validateRegisteredModelForRuntime(c.runtime, c.model)
|
||||
if ok != c.wantOK {
|
||||
t.Errorf("validateRegisteredModelForRuntime(%q,%q) ok=%v want %v", c.runtime, c.model, ok, c.wantOK)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -875,7 +875,9 @@ func (h *OrgHandler) Import(c *gin.Context) {
|
||||
rows.Close()
|
||||
|
||||
for _, oid := range orphanIDs {
|
||||
descendantIDs, stopErrs, err := h.workspace.CascadeDelete(ctx, oid)
|
||||
// erase=false: a reconcile is not a user-requested erase —
|
||||
// never prune data volumes on the import-reconcile path (internal#734).
|
||||
descendantIDs, stopErrs, err := h.workspace.CascadeDelete(ctx, oid, false)
|
||||
if err != nil {
|
||||
log.Printf("Org import reconcile: CascadeDelete(%s) failed: %v", oid, err)
|
||||
reconcileErrs = append(reconcileErrs, fmt.Sprintf("delete %s: %v", oid, err))
|
||||
|
||||
@@ -548,6 +548,16 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
|
||||
})
|
||||
}
|
||||
|
||||
// internal#2006: migrate runtime-created schedules from a removed
|
||||
// predecessor of the same agent (role+parent) onto this freshly-created
|
||||
// workspace. Reconcile re-derives template-sourced state below, but
|
||||
// schedules a user added at runtime (source='runtime', via the canvas/API)
|
||||
// bind to the ephemeral workspace_id and would otherwise be abandoned on
|
||||
// the removed row when an agent is recreated with a new id. Runs before the
|
||||
// template upsert loop so a same-named template schedule still wins.
|
||||
// Best-effort: never fails the import.
|
||||
h.migrateRuntimeSchedulesFromRemovedPredecessor(ctx, id, role, ws.Name, parentID)
|
||||
|
||||
// Insert schedules if defined. Resolve each schedule's prompt body from
|
||||
// either inline `prompt:` or `prompt_file:` (file ref relative to the
|
||||
// workspace's files_dir). Inline wins; empty prompt after resolution is
|
||||
@@ -687,6 +697,64 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
|
||||
return h.recurseChildrenForImport(ws, id, absX, absY, defaults, orgBaseDir, results, provisionSem)
|
||||
}
|
||||
|
||||
// migrateRuntimeSchedulesFromRemovedPredecessor re-points runtime-created
|
||||
// schedules (source='runtime') from the most-recent removed predecessor of the
|
||||
// same agent onto newID. Recreating an agent mints a NEW workspace id (the
|
||||
// ON CONFLICT in createWorkspaceTree only matches non-removed rows), so a
|
||||
// schedule a user added at runtime would otherwise be abandoned on the removed
|
||||
// row. Template-sourced schedules are NOT migrated — reconcile re-derives those
|
||||
// from the org template (the upsert loop). The predecessor is matched by the
|
||||
// stable `role` when present (survives the name auto-suffixing that yields
|
||||
// "Agent (2)"), falling back to name+parent. Idempotent (skips names already on
|
||||
// newID) and best-effort (logs, never errors the import). See internal#2006.
|
||||
func (h *OrgHandler) migrateRuntimeSchedulesFromRemovedPredecessor(ctx context.Context, newID string, role interface{}, name string, parentID *string) {
|
||||
var predID string
|
||||
var err error
|
||||
if role != nil {
|
||||
err = db.DB.QueryRowContext(ctx, `
|
||||
SELECT id FROM workspaces
|
||||
WHERE status = 'removed' AND role = $1
|
||||
AND parent_id IS NOT DISTINCT FROM $2
|
||||
AND id <> $3
|
||||
ORDER BY updated_at DESC NULLS LAST
|
||||
LIMIT 1
|
||||
`, role, parentID, newID).Scan(&predID)
|
||||
} else {
|
||||
err = db.DB.QueryRowContext(ctx, `
|
||||
SELECT id FROM workspaces
|
||||
WHERE status = 'removed' AND name = $1
|
||||
AND parent_id IS NOT DISTINCT FROM $2
|
||||
AND id <> $3
|
||||
ORDER BY updated_at DESC NULLS LAST
|
||||
LIMIT 1
|
||||
`, name, parentID, newID).Scan(&predID)
|
||||
}
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return // first-time create — no predecessor to migrate from
|
||||
}
|
||||
if err != nil {
|
||||
log.Printf("Org import: predecessor lookup for %q (new=%s) failed: %v — skipping schedule migration", name, newID, err)
|
||||
return
|
||||
}
|
||||
res, err := db.DB.ExecContext(ctx, `
|
||||
UPDATE workspace_schedules s
|
||||
SET workspace_id = $1, updated_at = now()
|
||||
WHERE s.workspace_id = $2
|
||||
AND s.source = 'runtime'
|
||||
AND NOT EXISTS (
|
||||
SELECT 1 FROM workspace_schedules t
|
||||
WHERE t.workspace_id = $1 AND t.name = s.name
|
||||
)
|
||||
`, newID, predID)
|
||||
if err != nil {
|
||||
log.Printf("Org import: schedule migration %s -> %s (%q) failed: %v", predID, newID, name, err)
|
||||
return
|
||||
}
|
||||
if n, _ := res.RowsAffected(); n > 0 {
|
||||
log.Printf("Org import: migrated %d runtime schedule(s) from removed predecessor %s to new workspace %s (%q)", n, predID, newID, name)
|
||||
}
|
||||
}
|
||||
|
||||
// lookupExistingChild returns the id of an existing workspace under
|
||||
// (parent_id, name) if any, with idempotency-friendly semantics:
|
||||
// - parent_id IS NOT DISTINCT FROM matches NULL too (root workspaces)
|
||||
|
||||
@@ -0,0 +1,75 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"testing"
|
||||
|
||||
sqlmock "github.com/DATA-DOG/go-sqlmock"
|
||||
)
|
||||
|
||||
// TestMigrateRuntimeSchedulesFromRemovedPredecessor verifies the happy path:
|
||||
// a removed predecessor exists for the agent (matched by role), and its
|
||||
// runtime-created schedules are re-pointed onto the freshly-created workspace.
|
||||
// internal#2006 (recreate-orphans-schedules regression).
|
||||
func TestMigrateRuntimeSchedulesFromRemovedPredecessor(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
h := &OrgHandler{}
|
||||
|
||||
// Predecessor lookup (role branch) returns the removed prior workspace.
|
||||
mock.ExpectQuery(`SELECT id FROM workspaces`).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("old-removed-ws"))
|
||||
// Re-point UPDATE migrates 2 runtime schedules.
|
||||
mock.ExpectExec(`UPDATE workspace_schedules`).
|
||||
WillReturnResult(sqlmock.NewResult(0, 2))
|
||||
|
||||
parent := "parent-1"
|
||||
h.migrateRuntimeSchedulesFromRemovedPredecessor(
|
||||
context.Background(), "new-ws", interface{}("code-reviewer"), "Code Reviewer (2)", &parent,
|
||||
)
|
||||
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Fatalf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestMigrateRuntimeSchedules_NoPredecessor verifies the first-time-create path:
|
||||
// no removed predecessor → the function returns after the lookup and MUST NOT
|
||||
// run the re-point UPDATE (sqlmock errors on an unexpected query if it does).
|
||||
func TestMigrateRuntimeSchedules_NoPredecessor(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
h := &OrgHandler{}
|
||||
|
||||
mock.ExpectQuery(`SELECT id FROM workspaces`).
|
||||
WillReturnError(sql.ErrNoRows)
|
||||
// No ExpectExec — an UPDATE here would be an unexpected query → test fails.
|
||||
|
||||
h.migrateRuntimeSchedulesFromRemovedPredecessor(
|
||||
context.Background(), "new-ws", interface{}("researcher"), "Root-Cause Researcher", nil,
|
||||
)
|
||||
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Fatalf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestMigrateRuntimeSchedules_NameFallback verifies the name-branch lookup is
|
||||
// used when the agent has no stable role (role == nil), still followed by the
|
||||
// re-point UPDATE.
|
||||
func TestMigrateRuntimeSchedules_NameFallback(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
h := &OrgHandler{}
|
||||
|
||||
mock.ExpectQuery(`SELECT id FROM workspaces`).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("old-removed-ws"))
|
||||
mock.ExpectExec(`UPDATE workspace_schedules`).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
h.migrateRuntimeSchedulesFromRemovedPredecessor(
|
||||
context.Background(), "new-ws", nil, "Some Agent", nil,
|
||||
)
|
||||
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Fatalf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,104 @@
|
||||
package handlers
|
||||
|
||||
// org_scope.go — cross-tenant isolation helpers (#1953).
|
||||
//
|
||||
// The `workspaces` table has no `org_id` column; an "org" is the subtree of
|
||||
// workspaces reachable through the `parent_id` chain from a single org root
|
||||
// (a row with parent_id IS NULL). Several code paths historically computed an
|
||||
// org-root sibling set as `WHERE parent_id IS NULL`, which matches EVERY
|
||||
// tenant's org root and therefore leaks peer metadata / routing across tenants.
|
||||
//
|
||||
// This file centralises the org-scoping primitive so peer discovery, the MCP
|
||||
// list_peers tool, and a2a routing all derive "the caller's org" the SAME way
|
||||
// the OFFSEC-015 broadcast fix (commit 5a05302c, workspace_broadcast.go) does:
|
||||
// a recursive CTE that walks the parent_id chain up to the org root. Keeping
|
||||
// the CTE in one place means there is a single, testable source of truth for
|
||||
// tenant isolation rather than four hand-copied queries that can drift.
|
||||
//
|
||||
// NOTE: this is the parent_id-chain scoping that the broadcast fix already
|
||||
// ships. It is deliberately NOT an `org_id` column — adding that column is a
|
||||
// separate architecture decision pending CTO sign-off. See #1953.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
)
|
||||
|
||||
// errNoOrgRoot is returned by orgRootID when the workspace id has no row (and
|
||||
// therefore no resolvable org root). Callers translate this into a 404/not-found
|
||||
// at their own layer; it is distinct from a transient DB error so a missing
|
||||
// workspace never gets treated as "belongs to every org".
|
||||
var errNoOrgRoot = errors.New("org root not found for workspace")
|
||||
|
||||
// orgRootSubtreeCTE is the recursive CTE — identical in shape to the OFFSEC-015
|
||||
// broadcast fix — that walks UP the parent_id chain from a single workspace to
|
||||
// its org root. The org root is the row on the chain whose parent_id IS NULL.
|
||||
//
|
||||
// $1 = workspace id to resolve
|
||||
//
|
||||
// The recursive member walks UP the parent_id chain: each step joins to the row
|
||||
// whose id is the current row's parent_id. The topmost ancestor is the single
|
||||
// chain row with parent_id IS NULL — and THAT row's own `id` is the org root.
|
||||
//
|
||||
// We select that parentless row's `id` (aliased root_id). We must NOT carry a
|
||||
// fixed `id AS root_id` from the recursive seed: that value is just the input
|
||||
// workspace id, so a non-root caller (e.g. a child delegating to a sibling)
|
||||
// would resolve to ITSELF instead of its org root, and sameOrg() would wrongly
|
||||
// report two genuinely same-org workspaces as different orgs and 403 a
|
||||
// legitimate a2a route. A workspace that already IS an org root has a one-row
|
||||
// chain whose id == itself, so it correctly resolves to itself.
|
||||
const orgRootSubtreeCTE = `
|
||||
WITH RECURSIVE org_chain AS (
|
||||
SELECT id, parent_id
|
||||
FROM workspaces
|
||||
WHERE id = $1
|
||||
UNION ALL
|
||||
SELECT w.id, w.parent_id
|
||||
FROM workspaces w
|
||||
JOIN org_chain c ON w.id = c.parent_id
|
||||
)
|
||||
SELECT id AS root_id FROM org_chain WHERE parent_id IS NULL LIMIT 1
|
||||
`
|
||||
|
||||
// orgRootID resolves the org root of `workspaceID` by walking the parent_id
|
||||
// chain via orgRootSubtreeCTE. Returns errNoOrgRoot when the workspace (or its
|
||||
// chain) yields no org root row, and the underlying error on any DB failure.
|
||||
//
|
||||
// This is the SAME lookup the broadcast handler performs inline; the three
|
||||
// leak paths in #1953 call this instead of re-deriving "the org" from
|
||||
// `parent_id IS NULL` (which spans all tenants).
|
||||
func orgRootID(ctx context.Context, database *sql.DB, workspaceID string) (string, error) {
|
||||
var root string
|
||||
err := database.QueryRowContext(ctx, orgRootSubtreeCTE, workspaceID).Scan(&root)
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
return "", errNoOrgRoot
|
||||
}
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if root == "" {
|
||||
return "", errNoOrgRoot
|
||||
}
|
||||
return root, nil
|
||||
}
|
||||
|
||||
// sameOrg reports whether workspaces `a` and `b` share an org root, i.e. they
|
||||
// belong to the same tenant. Used by a2a routing to reject resolving/dispatching
|
||||
// to a workspace id outside the caller's org. Fail-CLOSED: any lookup error or
|
||||
// missing org root yields (false, err) so a DB hiccup denies cross-tenant
|
||||
// routing rather than allowing it.
|
||||
func sameOrg(ctx context.Context, database *sql.DB, a, b string) (bool, error) {
|
||||
if a == b {
|
||||
return true, nil
|
||||
}
|
||||
rootA, err := orgRootID(ctx, database, a)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
rootB, err := orgRootID(ctx, database, b)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return rootA == rootB, nil
|
||||
}
|
||||
@@ -0,0 +1,62 @@
|
||||
package handlers
|
||||
|
||||
// internal#718 P4 closure — provider endpoint retirement.
|
||||
//
|
||||
// PUT and GET /workspaces/:id/provider were the canvas-facing surface
|
||||
// for the legacy `LLM_PROVIDER` workspace_secret. With the registry-
|
||||
// derived provider model (P0-P4), the provider is now DERIVED at every
|
||||
// decision point from (runtime, model) via the registry. No code path
|
||||
// reads a stored provider anymore, so the endpoint has no observable
|
||||
// effect.
|
||||
//
|
||||
// Rather than silently 200-OK on a write that goes nowhere, the
|
||||
// retired endpoint returns 410 Gone with a structured body so an
|
||||
// older canvas (which still calls PUT /provider in its Save flow)
|
||||
// surfaces a loud-and-clear "this endpoint moved" error rather than
|
||||
// pretending to persist a change. The replacement is: select your
|
||||
// model on workspace create / via PUT /workspaces/:id/model — the
|
||||
// provider is derived from it.
|
||||
//
|
||||
// Retirement context:
|
||||
// - Retire-list #2 (CP `knownProviderNames` blocklist as authoring
|
||||
// surface) was already retired in P3 PR-C (cp#379) — that source
|
||||
// now reads from the registry. The CP-side reader of
|
||||
// `env["LLM_PROVIDER"]` (`resolveModelAndProvider`) is replaced in
|
||||
// the CP-side commit of this PR by a registry derivation.
|
||||
// - Retire-list #3 (`deriveProviderFromModelSlug`) is removed in
|
||||
// this PR — the only caller was `WorkspaceHandler.Create`, which
|
||||
// wrote the derived value into workspace_secrets.LLM_PROVIDER for
|
||||
// the now-removed CP read path. The migration 20260528000000
|
||||
// deletes any straggler rows from the secret table.
|
||||
//
|
||||
// The Gone body is the contract: callers must recognize
|
||||
// `code: PROVIDER_ENDPOINT_RETIRED` and stop calling. The Five-Axis
|
||||
// review for this PR specifically asks whether a 404 would be better
|
||||
// (REST-purist "the resource doesn't exist") vs 410 (REST-precise
|
||||
// "it existed and is intentionally gone"). 410 is correct here: the
|
||||
// endpoint shipped to prod, the canvas knows the URL, and the goal
|
||||
// is to make the retirement loud, not invisible.
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// ProviderEndpointGone is the replacement gin handler for GET/PUT
|
||||
// /workspaces/:id/provider. Returns 410 with a body shape the canvas
|
||||
// can pattern-match on (code/error/issue keys).
|
||||
//
|
||||
// Wired in internal/router/router.go (the two route lines that used
|
||||
// to reference sech.GetProvider / sech.SetProvider).
|
||||
//
|
||||
// Exported so the router package can reference it as
|
||||
// handlers.ProviderEndpointGone without spinning up a SecretsHandler
|
||||
// receiver just to retire two endpoints.
|
||||
func ProviderEndpointGone(c *gin.Context) {
|
||||
c.JSON(http.StatusGone, gin.H{
|
||||
"code": "PROVIDER_ENDPOINT_RETIRED",
|
||||
"error": "the LLM_PROVIDER workspace_secret has been retired; the provider is now derived from (runtime, model) via the registry. Select your model via PUT /workspaces/:id/model — the provider follows.",
|
||||
"issue": "internal#718",
|
||||
})
|
||||
}
|
||||
@@ -538,7 +538,8 @@ func (h *RegistryHandler) Heartbeat(c *gin.Context) {
|
||||
|
||||
// Read previous current_task to detect changes (before the UPDATE)
|
||||
var prevTask string
|
||||
if err := db.DB.QueryRowContext(ctx, `SELECT COALESCE(current_task, '') FROM workspaces WHERE id = $1`, payload.WorkspaceID).Scan(&prevTask); err != nil {
|
||||
var prevSpend int64
|
||||
if err := db.DB.QueryRowContext(ctx, `SELECT COALESCE(current_task, ''), COALESCE(monthly_spend, 0) FROM workspaces WHERE id = $1`, payload.WorkspaceID).Scan(&prevTask, &prevSpend); err != nil {
|
||||
log.Printf("registry heartbeat: prev_task query failed for workspace %s: %v", payload.WorkspaceID, err)
|
||||
}
|
||||
|
||||
@@ -556,6 +557,25 @@ func (h *RegistryHandler) Heartbeat(c *gin.Context) {
|
||||
payload.MonthlySpend = maxMonthlySpend
|
||||
}
|
||||
|
||||
// Multi-period budget (#49): record the spend INCREMENT into the
|
||||
// workspace_spend_events ledger so the server can compute rolling per-period
|
||||
// windows (hourly/daily/weekly/monthly) — see budget_periods.go. The agent
|
||||
// still reports a cumulative monthly figure; we derive the delta vs the
|
||||
// last-seen cumulative (prevSpend). A DECREASE means the agent reset its
|
||||
// monthly cumulative (new month) → treat the new value as fresh spend.
|
||||
// Best-effort: a ledger failure must never break the heartbeat.
|
||||
if payload.MonthlySpend > 0 {
|
||||
delta := payload.MonthlySpend - prevSpend
|
||||
if delta < 0 {
|
||||
delta = payload.MonthlySpend
|
||||
}
|
||||
if delta > 0 {
|
||||
if err := recordSpendDelta(ctx, db.DB, payload.WorkspaceID, delta); err != nil {
|
||||
log.Printf("registry heartbeat: spend-ledger insert failed for workspace %s: %v", payload.WorkspaceID, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Update heartbeat columns. #73 guard: exclude 'removed' rows so a
|
||||
// late heartbeat from a container that's being torn down doesn't
|
||||
// refresh last_heartbeat_at on a tombstoned workspace (which would
|
||||
|
||||
@@ -24,6 +24,7 @@ var platformManagedDirectLLMBypassKeys = map[string]struct{}{
|
||||
"ANTHROPIC_AUTH_TOKEN": {},
|
||||
"ARCEEAI_API_KEY": {},
|
||||
"CLAUDE_CODE_OAUTH_TOKEN": {},
|
||||
"CODEX_AUTH_JSON": {},
|
||||
"DASHSCOPE_API_KEY": {},
|
||||
"DEEPSEEK_API_KEY": {},
|
||||
"GEMINI_API_KEY": {},
|
||||
@@ -67,14 +68,6 @@ func platformManagedLLMModeForWorkspace(c *gin.Context, workspaceID string) bool
|
||||
return strings.EqualFold(res.ResolvedMode, LLMBillingModePlatformManaged)
|
||||
}
|
||||
|
||||
// platformManagedLLMMode is the legacy org-level gate retained for any test
|
||||
// harness still asserting the env-var-only behavior. Production code paths
|
||||
// must call platformManagedLLMModeForWorkspace instead so a workspace-level
|
||||
// byok override actually takes effect on the secrets-write path.
|
||||
func platformManagedLLMMode() bool {
|
||||
return strings.EqualFold(strings.TrimSpace(os.Getenv("MOLECULE_LLM_BILLING_MODE")), "platform_managed")
|
||||
}
|
||||
|
||||
// rejectPlatformManagedDirectLLMBypassForWorkspace is the per-workspace
|
||||
// successor to rejectPlatformManagedDirectLLMBypass (internal#691). The
|
||||
// strip-list ONLY applies when this specific workspace resolves to
|
||||
@@ -91,22 +84,6 @@ func rejectPlatformManagedDirectLLMBypassForWorkspace(c *gin.Context, workspaceI
|
||||
return true
|
||||
}
|
||||
|
||||
// rejectPlatformManagedDirectLLMBypass is the legacy org-level shim. Retained
|
||||
// only for backwards compatibility with any external/test caller still on the
|
||||
// old shape; new code MUST use the per-workspace variant above. Production
|
||||
// code paths (the secrets.go handlers + workspace.go create-secret path) all
|
||||
// switched in internal#691.
|
||||
func rejectPlatformManagedDirectLLMBypass(c *gin.Context, key string) bool {
|
||||
if !platformManagedLLMMode() || !isPlatformManagedDirectLLMBypassKey(key) {
|
||||
return false
|
||||
}
|
||||
c.JSON(http.StatusBadRequest, gin.H{
|
||||
"error": "direct Hermes custom provider secrets are blocked for platform-managed LLM workspaces; use MODEL/LLM_PROVIDER or the platform LLM proxy env instead",
|
||||
"key": key,
|
||||
})
|
||||
return true
|
||||
}
|
||||
|
||||
type SecretsHandler struct {
|
||||
restartFunc func(workspaceID string) // Optional: auto-restart after secret change
|
||||
}
|
||||
@@ -309,6 +286,16 @@ func (h *SecretsHandler) Values(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
// molecule-core#1994 (corrected model): the remote-pull bundle is the
|
||||
// TENANT's own merged secrets (global_secrets + workspace_secrets, the
|
||||
// latter winning on collision). `global_secrets` is the tenant's store, not
|
||||
// the platform's, so a byok workspace's pull MUST include the tenant's own
|
||||
// global-scope LLM credential — that is exactly what it runs on, direct.
|
||||
// The earlier internal#711 byok strip here rested on the inverted "global =
|
||||
// platform's own" premise and is removed; the platform's own proxy token is
|
||||
// never in a tenant's global_secrets (it lives in server env only and is
|
||||
// injected separately on the platform_managed provision path), so there is
|
||||
// nothing platform-owned to withhold on this path.
|
||||
c.JSON(http.StatusOK, out)
|
||||
}
|
||||
|
||||
@@ -476,9 +463,15 @@ func (h *SecretsHandler) SetGlobal(c *gin.Context) {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body"})
|
||||
return
|
||||
}
|
||||
if rejectPlatformManagedDirectLLMBypass(c, body.Key) {
|
||||
return
|
||||
}
|
||||
// internal#718: the org-level LLM billing rung was retired — billing is
|
||||
// resolved per-workspace, not per-org. A global secret is the tenant's OWN
|
||||
// shared credential; the provision-time provider-matched strip
|
||||
// (workspace_provision) removes any global cred a given workspace's resolved
|
||||
// provider does not accept, so a platform-managed workspace can never USE a
|
||||
// non-matching global vendor/oauth key. The legacy org-env SetGlobal gate
|
||||
// (keyed off the retired MOLECULE_LLM_BILLING_MODE) is therefore removed;
|
||||
// per-workspace writes still enforce the strip-list via
|
||||
// rejectPlatformManagedDirectLLMBypassForWorkspace.
|
||||
|
||||
encrypted, err := crypto.Encrypt([]byte(body.Value))
|
||||
if err != nil {
|
||||
@@ -739,121 +732,19 @@ func (h *SecretsHandler) SetModel(c *gin.Context) {
|
||||
c.JSON(http.StatusOK, gin.H{"status": "saved", "model": body.Model})
|
||||
}
|
||||
|
||||
// GetProvider handles GET /workspaces/:id/provider
|
||||
// Returns the explicit LLM provider override stored as the LLM_PROVIDER
|
||||
// workspace secret. Mirror of GetModel — same shape, same response keys
|
||||
// (provider/source) to keep canvas wiring symmetric.
|
||||
// internal#718 P4 closure: GetProvider, SetProvider, and the shared
|
||||
// setProviderSecret helper were retired together with the
|
||||
// LLM_PROVIDER workspace_secret. The provider is now DERIVED at every
|
||||
// decision point from (runtime, model) via the registry
|
||||
// (internal/providers.Manifest.DeriveProvider), so storing it is
|
||||
// pure write-ghost — no consumer remains.
|
||||
//
|
||||
// Why a sibling endpoint rather than overloading PUT /model: the new
|
||||
// `provider` field (Option B, PR #2441) is orthogonal to the model
|
||||
// slug. A user might keep the same model alias and switch providers
|
||||
// (e.g., route the same alias through a different gateway), or keep
|
||||
// the same provider and switch models. Co-storing them under one
|
||||
// endpoint forces a single Save+Restart round-trip per change; two
|
||||
// endpoints let the canvas update each independently.
|
||||
func (h *SecretsHandler) GetProvider(c *gin.Context) {
|
||||
workspaceID := c.Param("id")
|
||||
ctx := c.Request.Context()
|
||||
|
||||
var bytesVal []byte
|
||||
var version int
|
||||
err := db.DB.QueryRowContext(ctx,
|
||||
`SELECT encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = $1 AND key = 'LLM_PROVIDER'`,
|
||||
workspaceID).Scan(&bytesVal, &version)
|
||||
if err == sql.ErrNoRows {
|
||||
c.JSON(http.StatusOK, gin.H{"provider": "", "source": "default"})
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "query failed"})
|
||||
return
|
||||
}
|
||||
|
||||
decrypted, err := crypto.DecryptVersioned(bytesVal, version)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to decrypt"})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{"provider": string(decrypted), "source": "workspace_secrets"})
|
||||
}
|
||||
|
||||
// setProviderSecret writes (or clears, when value=="") the LLM_PROVIDER
|
||||
// workspace secret. Extracted from SetProvider so non-handler call sites
|
||||
// (notably WorkspaceHandler.Create — first-deploy path that derives
|
||||
// LLM_PROVIDER from the canvas-selected model slug so CP user-data picks
|
||||
// it up as a YAML field in /configs/config.yaml AND it survives across
|
||||
// restarts when CP regenerates the config) can reuse the encryption +
|
||||
// upsert logic without inlining the SQL.
|
||||
// Route registrations in internal/router/router.go now point both
|
||||
// GET and PUT /workspaces/:id/provider at providerEndpointGone, which
|
||||
// returns 410 Gone with a structured body so older canvases that
|
||||
// still call PUT /provider on Save surface a loud failure rather
|
||||
// than silently writing a vanished row.
|
||||
//
|
||||
// Returns nil on success. Caller is responsible for any restart trigger;
|
||||
// the gin handler re-adds that after a successful write.
|
||||
func setProviderSecret(ctx context.Context, workspaceID, provider string) error {
|
||||
if provider == "" {
|
||||
_, err := db.DB.ExecContext(ctx,
|
||||
`DELETE FROM workspace_secrets WHERE workspace_id = $1 AND key = 'LLM_PROVIDER'`,
|
||||
workspaceID)
|
||||
return err
|
||||
}
|
||||
encrypted, err := crypto.Encrypt([]byte(provider))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
version := crypto.CurrentEncryptionVersion()
|
||||
_, err = db.DB.ExecContext(ctx, `
|
||||
INSERT INTO workspace_secrets (workspace_id, key, encrypted_value, encryption_version)
|
||||
VALUES ($1, 'LLM_PROVIDER', $2, $3)
|
||||
ON CONFLICT (workspace_id, key) DO UPDATE
|
||||
SET encrypted_value = $2, encryption_version = $3, updated_at = now()
|
||||
`, workspaceID, encrypted, version)
|
||||
return err
|
||||
}
|
||||
|
||||
// SetProvider handles PUT /workspaces/:id/provider — writes the provider
|
||||
// slug into workspace_secrets as LLM_PROVIDER. Empty string clears the
|
||||
// override. Triggers auto-restart so the new env is in effect on the
|
||||
// next boot — without this the canvas Save+Restart can race the
|
||||
// already-restarting container and miss the window.
|
||||
//
|
||||
// CP user-data (controlplane PR #364) reads LLM_PROVIDER from env and
|
||||
// writes it into /configs/config.yaml at boot, so the choice survives
|
||||
// restart. Without that PR this endpoint still works but the value is
|
||||
// only sticky when the workspace_secrets row is read on every restart
|
||||
// (the secret-load path) — slower failure mode, same eventual behavior.
|
||||
func (h *SecretsHandler) SetProvider(c *gin.Context) {
|
||||
workspaceID := c.Param("id")
|
||||
if !uuidRegex.MatchString(workspaceID) {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid workspace ID"})
|
||||
return
|
||||
}
|
||||
ctx := c.Request.Context()
|
||||
|
||||
var body struct {
|
||||
Provider string `json:"provider"`
|
||||
}
|
||||
if err := c.ShouldBindJSON(&body); err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body"})
|
||||
return
|
||||
}
|
||||
|
||||
if err := setProviderSecret(ctx, workspaceID, body.Provider); err != nil {
|
||||
log.Printf("SetProvider error: %v", err)
|
||||
if body.Provider == "" {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to clear provider"})
|
||||
} else {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to save provider"})
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if h.restartFunc != nil {
|
||||
// RFC internal#524 Layer 1: globalGoAsync (see Set()).
|
||||
wsID := workspaceID
|
||||
globalGoAsync(func() { h.restartFunc(wsID) })
|
||||
}
|
||||
if body.Provider == "" {
|
||||
c.JSON(http.StatusOK, gin.H{"status": "cleared"})
|
||||
return
|
||||
}
|
||||
c.JSON(http.StatusOK, gin.H{"status": "saved", "provider": body.Provider})
|
||||
}
|
||||
// Migration 20260528000000_drop_llm_provider_workspace_secret.up.sql
|
||||
// removes any straggler rows in workspace_secrets (key='LLM_PROVIDER')
|
||||
// so the table is in the same state as a freshly-provisioned tenant.
|
||||
|
||||
@@ -682,151 +682,16 @@ func TestSecretsModel_RoundTrip_KeyIsMODELNotMODEL_PROVIDER(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== GetProvider / SetProvider (Option B PR-2) ====================
|
||||
// ==================== GetProvider / SetProvider — RETIRED ====================
|
||||
//
|
||||
// Mirror of the GetModel/SetModel suite. Same secret-storage shape (key=
|
||||
// 'LLM_PROVIDER' instead of 'MODEL_PROVIDER'), same restart-trigger
|
||||
// contract, same UUID validation gate. We pin the contract symmetrically
|
||||
// so a future refactor that breaks one without the other shows up in CI.
|
||||
|
||||
func TestSecretsGetProvider_Default(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
handler := NewSecretsHandler(nil)
|
||||
|
||||
mock.ExpectQuery("SELECT encrypted_value, encryption_version FROM workspace_secrets").
|
||||
WithArgs("ws-prov").
|
||||
WillReturnError(sql.ErrNoRows)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-prov"}}
|
||||
c.Request = httptest.NewRequest("GET", "/workspaces/ws-prov/provider", nil)
|
||||
|
||||
handler.GetProvider(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected status 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
|
||||
var resp map[string]interface{}
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("failed to parse response: %v", err)
|
||||
}
|
||||
if resp["provider"] != "" {
|
||||
t.Errorf("expected empty provider, got %v", resp["provider"])
|
||||
}
|
||||
if resp["source"] != "default" {
|
||||
t.Errorf("expected source 'default', got %v", resp["source"])
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSecretsGetProvider_DBError(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
handler := NewSecretsHandler(nil)
|
||||
|
||||
mock.ExpectQuery("SELECT encrypted_value, encryption_version FROM workspace_secrets").
|
||||
WithArgs("ws-prov-err").
|
||||
WillReturnError(sql.ErrConnDone)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-prov-err"}}
|
||||
c.Request = httptest.NewRequest("GET", "/workspaces/ws-prov-err/provider", nil)
|
||||
|
||||
handler.GetProvider(c)
|
||||
|
||||
if w.Code != http.StatusInternalServerError {
|
||||
t.Errorf("expected status 500, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSecretsSetProvider_Upsert(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
restartCalled := make(chan string, 1)
|
||||
handler := NewSecretsHandler(func(id string) { restartCalled <- id })
|
||||
|
||||
mock.ExpectExec(`INSERT INTO workspace_secrets`).
|
||||
WithArgs("00000000-0000-0000-0000-000000000003", sqlmock.AnyArg(), sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "00000000-0000-0000-0000-000000000003"}}
|
||||
c.Request = httptest.NewRequest("PUT", "/workspaces/00000000-0000-0000-0000-000000000003/provider",
|
||||
strings.NewReader(`{"provider":"minimax"}`))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
handler.SetProvider(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
select {
|
||||
case id := <-restartCalled:
|
||||
if id != "00000000-0000-0000-0000-000000000003" {
|
||||
t.Errorf("restart called with wrong id: %s", id)
|
||||
}
|
||||
case <-time.After(500 * time.Millisecond):
|
||||
t.Error("restart was not triggered")
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSecretsSetProvider_EmptyClears(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
handler := NewSecretsHandler(func(string) {})
|
||||
|
||||
mock.ExpectExec(`DELETE FROM workspace_secrets`).
|
||||
WithArgs("00000000-0000-0000-0000-000000000004").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "00000000-0000-0000-0000-000000000004"}}
|
||||
c.Request = httptest.NewRequest("PUT", "/workspaces/00000000-0000-0000-0000-000000000004/provider",
|
||||
strings.NewReader(`{"provider":""}`))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
handler.SetProvider(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSecretsSetProvider_InvalidID(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
handler := NewSecretsHandler(nil)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "not-a-uuid"}}
|
||||
c.Request = httptest.NewRequest("PUT", "/workspaces/not-a-uuid/provider",
|
||||
strings.NewReader(`{"provider":"x"}`))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
handler.SetProvider(c)
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Errorf("expected 400 for bad UUID, got %d", w.Code)
|
||||
}
|
||||
}
|
||||
// internal#718 P4 closure: the GetProvider/SetProvider suite covered the
|
||||
// LLM_PROVIDER workspace_secret round-trip. Both handlers and the
|
||||
// shared setProviderSecret helper were removed when the secret itself
|
||||
// was retired. The replacement endpoint behavior (410 Gone with a
|
||||
// structured body) is covered by
|
||||
// `llm_provider_removal_p4_test.go::TestPutProvider_410Gone`,
|
||||
// `TestGetProvider_410Gone`, and
|
||||
// `TestProviderEndpointGone_BodyShape`.
|
||||
|
||||
// ==================== Values — Phase 30.2 decrypted pull ====================
|
||||
|
||||
@@ -865,6 +730,12 @@ func TestSecretsValues_LegacyWorkspaceGrandfathered(t *testing.T) {
|
||||
WithArgs(testWsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
|
||||
AddRow("WS_KEY", []byte("ws_plainvalue"), 0))
|
||||
// internal#711: Values now resolves billing mode to gate the global LLM-cred
|
||||
// merge. Neither key here is a platform-managed LLM bypass key, so the mode
|
||||
// is immaterial to the assertions — but the resolver query must be mocked.
|
||||
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(testWsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModePlatformManaged))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c := secretsValuesRequest(w, "") // no auth — grandfathered
|
||||
@@ -942,6 +813,12 @@ func TestSecretsValues_ValidTokenReturnsDecryptedMerge(t *testing.T) {
|
||||
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
|
||||
AddRow("ONLY_WS", []byte("ws_val"), 0).
|
||||
AddRow("SHARED_KEY", []byte("ws_wins"), 0))
|
||||
// internal#711: billing-mode resolver query. None of these keys is a
|
||||
// platform-managed LLM bypass key, so the resolved mode does not affect the
|
||||
// merge assertions; platform_managed keeps the existing pass-through.
|
||||
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(testWsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModePlatformManaged))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c := secretsValuesRequest(w, "Bearer good-token")
|
||||
@@ -963,6 +840,71 @@ func TestSecretsValues_ValidTokenReturnsDecryptedMerge(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestSecretsValues_ByokServesTenantGlobalLLMCred is the molecule-core#1994
|
||||
// (corrected-model) regression guard for the remote-pull path. `global_secrets`
|
||||
// is the TENANT's store, so a byok workspace's pull MUST include the tenant's
|
||||
// own global-scope LLM credential — that is exactly what byok runs on, direct.
|
||||
//
|
||||
// Pre-fix (internal#711) this path STRIPPED the global-origin oauth on byok,
|
||||
// resting on the inverted premise that a global LLM cred was "the platform's
|
||||
// own"; that killed legitimate byok workspaces whose oauth lived at global
|
||||
// scope. The strip is removed: the merged bundle (tenant globals + workspace
|
||||
// overrides) is served verbatim.
|
||||
//
|
||||
// Mutation: re-add the byok global-LLM-cred strip in secrets.go Values() →
|
||||
// CLAUDE_CODE_OAUTH_TOKEN disappears from the body → this test RED.
|
||||
func TestSecretsValues_ByokServesTenantGlobalLLMCred(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
handler := NewSecretsHandler(nil)
|
||||
|
||||
mock.ExpectQuery(`SELECT COUNT\(\*\) FROM workspace_auth_tokens`).
|
||||
WithArgs(testWsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(1))
|
||||
mock.ExpectQuery(`SELECT t\.id, t\.workspace_id.*FROM workspace_auth_tokens t.*JOIN workspaces`).
|
||||
WithArgs(sqlmock.AnyArg()).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id", "workspace_id"}).AddRow("tok-1", testWsID))
|
||||
mock.ExpectExec(`UPDATE workspace_auth_tokens SET last_used_at`).
|
||||
WithArgs("tok-1").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
// global_secrets holds the TENANT's own global-scope OAuth token (shared
|
||||
// across all the tenant's workspaces) + a non-LLM global.
|
||||
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM global_secrets`).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
|
||||
AddRow("CLAUDE_CODE_OAUTH_TOKEN", []byte("TENANT-OWN-GLOBAL-OAUTH"), 0).
|
||||
AddRow("SENTRY_DSN", []byte("https://sentry.example/123"), 0))
|
||||
// This workspace set no LLM secret of its own — it relies on the tenant
|
||||
// global-scope oauth.
|
||||
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id`).
|
||||
WithArgs(testWsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
|
||||
AddRow("MODEL", []byte("opus"), 0))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c := secretsValuesRequest(w, "Bearer good-token")
|
||||
handler.Values(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
var body map[string]string
|
||||
_ = json.Unmarshal(w.Body.Bytes(), &body)
|
||||
// 1. The tenant's own global-scope OAuth token SURVIVES — byok runs on it.
|
||||
if body["CLAUDE_CODE_OAUTH_TOKEN"] != "TENANT-OWN-GLOBAL-OAUTH" {
|
||||
t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN = %q, want the tenant's own global-scope token served for byok pull", body["CLAUDE_CODE_OAUTH_TOKEN"])
|
||||
}
|
||||
// 2. The workspace's own non-LLM secret survives.
|
||||
if body["MODEL"] != "opus" {
|
||||
t.Fatalf("MODEL = %q, want opus preserved", body["MODEL"])
|
||||
}
|
||||
// 3. Unrelated non-LLM global secrets are untouched.
|
||||
if body["SENTRY_DSN"] != "https://sentry.example/123" {
|
||||
t.Fatalf("SENTRY_DSN = %q, want non-LLM globals untouched", body["SENTRY_DSN"])
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSecretsValues_InvalidWorkspaceID(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
handler := NewSecretsHandler(nil)
|
||||
@@ -1037,6 +979,61 @@ func TestSetGlobal_AutoRestartsAffectedWorkspaces(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestSetGlobal_AllowsTenantOwnedVendorKeyDespiteLegacyOrgEnv pins the
|
||||
// internal#718 correction: the org-level LLM billing rung is RETIRED (billing
|
||||
// is resolved per-workspace, not per-org). A global secret is the tenant's OWN
|
||||
// shared credential and is always writable at global scope; the provision-time
|
||||
// provider-matched strip (workspace_provision) keeps any platform-managed
|
||||
// workspace from USING a non-matching global cred, and per-workspace secret
|
||||
// writes still enforce the strip-list via the per-workspace guard. So even with
|
||||
// the legacy MOLECULE_LLM_BILLING_MODE env still set to platform_managed, a
|
||||
// global vendor/oauth key write MUST SUCCEED (200) and persist — the retired
|
||||
// org rung no longer gates it.
|
||||
//
|
||||
// Mutation: re-add the org-level rejectPlatformManagedDirectLLMBypass guard to
|
||||
// SetGlobal → the write 400s before the INSERT → this test RED.
|
||||
func TestSetGlobal_AllowsTenantOwnedVendorKeyDespiteLegacyOrgEnv(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
restarted := make(chan string, 2)
|
||||
handler := NewSecretsHandler(func(id string) { restarted <- id })
|
||||
|
||||
// Legacy org env still platform_managed — it must no longer gate the write.
|
||||
t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged)
|
||||
|
||||
mock.ExpectExec("INSERT INTO global_secrets").
|
||||
WithArgs("CLAUDE_CODE_OAUTH_TOKEN", sqlmock.AnyArg(), sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectQuery("SELECT id FROM workspaces").
|
||||
WithArgs("CLAUDE_CODE_OAUTH_TOKEN").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("ws-a"))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
body := `{"key":"CLAUDE_CODE_OAUTH_TOKEN","value":"sk-ant-oat01-tenant-own"}`
|
||||
c.Request = httptest.NewRequest("POST", "/admin/secrets", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
handler.SetGlobal(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200 (global write allowed; org rung retired), got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
// Wait on the async restart fan-out so its SELECT drains before db swap.
|
||||
select {
|
||||
case id := <-restarted:
|
||||
if id != "ws-a" {
|
||||
t.Errorf("expected ws-a restarted, got %s", id)
|
||||
}
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatal("auto-restart not fired for affected workspace")
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestDeleteGlobal_AutoRestartsAffectedWorkspaces covers the delete branch of #15.
|
||||
func TestDeleteGlobal_AutoRestartsAffectedWorkspaces(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
|
||||
@@ -95,6 +95,38 @@ type modelSpec struct {
|
||||
Name string `json:"name,omitempty" yaml:"name"`
|
||||
Provider string `json:"provider,omitempty" yaml:"provider"`
|
||||
RequiredEnv []string `json:"required_env,omitempty" yaml:"required_env"`
|
||||
// BillingMode is the billing source the DERIVED provider implies:
|
||||
// "platform_managed" (the closed core-only platform provider; Molecule
|
||||
// owns the upstream key + the bill) or "byok" (any other provider; the
|
||||
// tenant supplies its own key). Set ONLY on registry-served models
|
||||
// (RegistryModels) where DeriveProvider resolved an owning provider;
|
||||
// empty on template-served models. internal#718 P3 — the canvas reads
|
||||
// this to show the billing-mode of the DERIVED provider instead of its
|
||||
// hardcoded billingModeForProvider rule.
|
||||
BillingMode string `json:"billing_mode,omitempty" yaml:"-"`
|
||||
}
|
||||
|
||||
// registryProviderView is the canvas-facing projection of a single registry
|
||||
// Provider entry for a registry-known runtime: the stable name, the dropdown
|
||||
// display label, the auth-env-var NAMES (never values), and the billing mode
|
||||
// the provider implies. Sourced from the provider registry
|
||||
// (internal/providers) so the canvas drops its hardcoded VENDOR_LABELS map
|
||||
// and billingModeForProvider rule (internal#718 P3, retire-list #4/#5).
|
||||
type registryProviderView struct {
|
||||
// Name is the registry provider key (e.g. "anthropic-oauth", "platform").
|
||||
Name string `json:"name"`
|
||||
// DisplayName is the canvas dropdown label (registry Provider.DisplayName).
|
||||
DisplayName string `json:"display_name,omitempty"`
|
||||
// AuthEnv is the env-var NAMES any one of which satisfies auth for this
|
||||
// provider (registry Provider.AuthEnv). Names only, never secret values.
|
||||
AuthEnv []string `json:"auth_env,omitempty"`
|
||||
// BillingMode is "platform_managed" for the closed platform provider,
|
||||
// "byok" otherwise — keyed off the registry IsPlatform predicate so the
|
||||
// canvas shows the DERIVED provider's billing source.
|
||||
BillingMode string `json:"billing_mode,omitempty"`
|
||||
// Deprecated mirrors the registry's deprecated flag so the canvas can
|
||||
// grey the provider out without breaking saved configs.
|
||||
Deprecated bool `json:"deprecated,omitempty"`
|
||||
}
|
||||
|
||||
// providerRegistryEntry mirrors a row from a template's top-level
|
||||
@@ -162,8 +194,29 @@ type templateSummary struct {
|
||||
// (omitempty); the canvas's existing per-model fallback continues
|
||||
// to work for them.
|
||||
ProviderRegistry []providerRegistryEntry `json:"provider_registry,omitempty"`
|
||||
Skills []string `json:"skills"`
|
||||
SkillCount int `json:"skill_count"`
|
||||
// RegistryBacked is true when this template's runtime is known to the
|
||||
// provider registry (internal/providers runtimes: block) and the
|
||||
// RegistryProviders / RegistryModels fields below were populated from it.
|
||||
// The canvas treats a registry-backed payload as AUTHORITATIVE for the
|
||||
// selectable provider+model list (it drops its prefix-inference fallback)
|
||||
// — "only registered selectable" follows because the canvas can render
|
||||
// no option the registry did not serve. False = the runtime is not in the
|
||||
// registry (federation / external / mock); the canvas keeps using the
|
||||
// template-served Models/Providers + its heuristic. internal#718 P3.
|
||||
RegistryBacked bool `json:"registry_backed,omitempty"`
|
||||
// RegistryProviders is the runtime's NATIVE provider set from the
|
||||
// registry (ProvidersForRuntime), each with its display label, auth-env
|
||||
// names, and billing mode. Empty when !RegistryBacked. This is the SSOT
|
||||
// the canvas Provider dropdown consumes instead of VENDOR_LABELS.
|
||||
RegistryProviders []registryProviderView `json:"registry_providers,omitempty"`
|
||||
// RegistryModels is the runtime's NATIVE model set from the registry
|
||||
// (ModelsForRuntime), each annotated with its DERIVED provider and the
|
||||
// billing mode that provider implies. Empty when !RegistryBacked. This is
|
||||
// the SSOT the canvas Model dropdown consumes — a template can no longer
|
||||
// surface a model the registry does not list for the runtime.
|
||||
RegistryModels []modelSpec `json:"registry_models,omitempty"`
|
||||
Skills []string `json:"skills"`
|
||||
SkillCount int `json:"skill_count"`
|
||||
// ProvisionTimeoutSeconds lets a slow runtime declare its expected
|
||||
// cold-boot duration in its template manifest. Canvas's
|
||||
// ProvisioningTimeout banner respects this per-workspace via the
|
||||
@@ -171,6 +224,15 @@ type templateSummary struct {
|
||||
// 0 = template hasn't declared one, falls through to canvas's
|
||||
// runtime-profile default.
|
||||
ProvisionTimeoutSeconds int `json:"provision_timeout_seconds,omitempty"`
|
||||
// Displayable lets a template opt OUT of the canvas runtime picker
|
||||
// declaratively (config.yaml `displayable: false`) while still being a
|
||||
// provisionable runtime. nil/absent or true → shown; only an explicit
|
||||
// false hides it. The canvas runtime dropdown is SSOT-driven off this
|
||||
// list (no hardcoded frontend allowlist), so this is the single place a
|
||||
// runtime is hidden from the picker. Pointer so "unset" is distinct from
|
||||
// "false" and omitempty keeps the payload unchanged for existing
|
||||
// templates that never declare it.
|
||||
Displayable *bool `json:"displayable,omitempty"`
|
||||
}
|
||||
|
||||
// resolveTemplateDir finds the template directory for a workspace on the host.
|
||||
@@ -217,6 +279,7 @@ func (h *TemplatesHandler) List(c *gin.Context) {
|
||||
Runtime string `yaml:"runtime"`
|
||||
Model string `yaml:"model"`
|
||||
Skills []string `yaml:"skills"`
|
||||
Displayable *bool `yaml:"displayable"`
|
||||
// Top-level `providers:` block — structured registry. Distinct
|
||||
// from runtime_config.providers (slug list) below. Both shapes
|
||||
// coexist in production: claude-code ships the structured
|
||||
@@ -243,9 +306,13 @@ func (h *TemplatesHandler) List(c *gin.Context) {
|
||||
log.Printf("templates list: skip %s: yaml.Unmarshal: %v", id, err)
|
||||
return
|
||||
}
|
||||
// normalizedRuntime strips the "-default" vanilla-variant suffix
|
||||
// (claude-code-default → claude-code). Hoisted out of the
|
||||
// known-runtime guard so the registry enrichment below can key off
|
||||
// the same normalised name the guard validated.
|
||||
normalizedRuntime := strings.TrimSuffix(strings.TrimSpace(raw.Runtime), "-default")
|
||||
if raw.Runtime != "" {
|
||||
runtime := strings.TrimSuffix(strings.TrimSpace(raw.Runtime), "-default")
|
||||
if _, ok := knownRuntimes[runtime]; !ok {
|
||||
if _, ok := knownRuntimes[normalizedRuntime]; !ok {
|
||||
log.Printf("templates list: skip %s: unsupported runtime %q", id, raw.Runtime)
|
||||
return
|
||||
}
|
||||
@@ -262,7 +329,7 @@ func (h *TemplatesHandler) List(c *gin.Context) {
|
||||
tier = h.wh.DefaultTier()
|
||||
}
|
||||
|
||||
templates = append(templates, templateSummary{
|
||||
summary := templateSummary{
|
||||
ID: id,
|
||||
Name: raw.Name,
|
||||
Description: raw.Description,
|
||||
@@ -277,7 +344,18 @@ func (h *TemplatesHandler) List(c *gin.Context) {
|
||||
Skills: raw.Skills,
|
||||
SkillCount: len(raw.Skills),
|
||||
ProvisionTimeoutSeconds: raw.RuntimeConfig.ProvisionTimeoutSeconds,
|
||||
})
|
||||
Displayable: raw.Displayable,
|
||||
}
|
||||
|
||||
// internal#718 P3: serve the SELECTABLE provider/model list from
|
||||
// the provider registry for a registry-known runtime. Additive —
|
||||
// the template-served Models/Providers above stay for non-registry
|
||||
// runtimes + older canvases; this adds the authoritative
|
||||
// registry_backed/registry_providers/registry_models block the
|
||||
// current canvas prefers. Fail-open for unknown runtimes.
|
||||
enrichFromRegistry(&summary, normalizedRuntime)
|
||||
|
||||
templates = append(templates, summary)
|
||||
})
|
||||
}
|
||||
walk(h.cacheDir)
|
||||
|
||||
@@ -0,0 +1,112 @@
|
||||
package handlers
|
||||
|
||||
// templates_registry.go — internal#718 P3: serve the GET /templates selectable
|
||||
// provider/model list FROM the provider registry (workspace-server/internal/
|
||||
// providers) instead of from each template's hand-authored config.yaml
|
||||
// `providers:` / `runtime_config.models` block.
|
||||
//
|
||||
// The registry (P2-A synced copy of the canonical CP providers.yaml) is the
|
||||
// SSOT for "which providers + models does runtime R natively support" and
|
||||
// "which derived provider owns model M" (DeriveProvider) and "is that provider
|
||||
// the closed platform set" (IsPlatform). This file projects that into the
|
||||
// templates payload's registry_backed / registry_providers / registry_models
|
||||
// fields so the canvas can drop its hardcoded VENDOR_LABELS /
|
||||
// billingModeForProvider vocabularies (retire-list #4/#5) and physically can't
|
||||
// render an option the registry didn't serve.
|
||||
//
|
||||
// Federation-ready, fail-OPEN: a runtime ABSENT from the registry's runtimes:
|
||||
// block (external / mock / kimi / a future third-party runtime) yields
|
||||
// RegistryBacked=false and an empty registry block — the template's own fields
|
||||
// stay authoritative. No behavior change for non-registry runtimes.
|
||||
//
|
||||
// NOTE: this reuses the package-level providerRegistry() accessor +
|
||||
// LLMBillingModePlatformManaged / LLMBillingModeBYOK constants from
|
||||
// llm_billing_mode.go (added by P2-B, internal#718 #1972, now on main) — both
|
||||
// the billing-derivation and this templates projection wrap the same
|
||||
// providers.LoadManifest() SSOT and the same platform_managed/byok wire
|
||||
// strings, so there is one accessor + one constant set for the package.
|
||||
|
||||
import (
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/providers"
|
||||
)
|
||||
|
||||
// billingModeForRegistryProvider maps a registry Provider to the billing mode
|
||||
// it implies: platform_managed for the closed core-only platform provider,
|
||||
// byok for everything else. Keyed off the registry IsPlatform predicate —
|
||||
// the same one billing/credential emission (llm_billing_mode.go) keys off the
|
||||
// DERIVED provider — so the canvas shows the true billing source of the
|
||||
// resolved provider. Returns the same LLMBillingMode* wire strings the Config
|
||||
// tab's billing-mode switch sends.
|
||||
func billingModeForRegistryProvider(p providers.Provider) string {
|
||||
if p.IsPlatform() {
|
||||
return LLMBillingModePlatformManaged
|
||||
}
|
||||
return LLMBillingModeBYOK
|
||||
}
|
||||
|
||||
// enrichFromRegistry populates the registry-served fields on a templateSummary
|
||||
// when its runtime is known to the provider registry. It is a no-op (leaves
|
||||
// RegistryBacked=false and the registry slices nil) for a runtime the registry
|
||||
// does not know — the federation/fail-open path.
|
||||
//
|
||||
// runtime is the template's already-normalised runtime string (the caller
|
||||
// strips the "-default" suffix before calling, matching List's existing
|
||||
// knownRuntimes check).
|
||||
func enrichFromRegistry(summary *templateSummary, runtime string) {
|
||||
m, err := providerRegistry()
|
||||
if err != nil || m == nil {
|
||||
return // fail open — registry load defect; keep template-served fields.
|
||||
}
|
||||
|
||||
provs, err := m.ProvidersForRuntime(runtime)
|
||||
if err != nil {
|
||||
// Runtime not in the registry runtimes: block (external / mock / kimi
|
||||
// / future third-party). Fail open: the template's own fields stay
|
||||
// authoritative; no registry annotation.
|
||||
return
|
||||
}
|
||||
|
||||
// registry_providers — the runtime's native provider set, in registry
|
||||
// declared order, projected to the canvas-facing view.
|
||||
views := make([]registryProviderView, 0, len(provs))
|
||||
for _, p := range provs {
|
||||
views = append(views, registryProviderView{
|
||||
Name: p.Name,
|
||||
DisplayName: p.DisplayName,
|
||||
AuthEnv: p.AuthEnv,
|
||||
BillingMode: billingModeForRegistryProvider(p),
|
||||
Deprecated: p.Deprecated,
|
||||
})
|
||||
}
|
||||
|
||||
// registry_models — the runtime's native model ids, each annotated with
|
||||
// the DERIVED owning provider + the billing mode it implies. DeriveProvider
|
||||
// is the SSOT for model→provider; we pass nil availableAuthEnv because a
|
||||
// template manifest has no per-workspace auth env, and the registry's
|
||||
// exact-id mapping resolves every native model id unambiguously (the
|
||||
// claude-code kimi split is by exact id, not a shared prefix).
|
||||
models, err := m.ModelsForRuntime(runtime)
|
||||
if err != nil {
|
||||
// ProvidersForRuntime succeeded but ModelsForRuntime did not — should
|
||||
// be impossible (both gate on the same Runtimes entry), but fail open
|
||||
// rather than serve a half-populated block.
|
||||
return
|
||||
}
|
||||
regModels := make([]modelSpec, 0, len(models))
|
||||
for _, id := range models {
|
||||
ms := modelSpec{ID: id}
|
||||
if derived, derr := m.DeriveProvider(runtime, id, nil); derr == nil {
|
||||
ms.Provider = derived.Name
|
||||
ms.BillingMode = billingModeForRegistryProvider(derived)
|
||||
}
|
||||
// If DeriveProvider errors (ambiguous/overlap — a manifest defect the
|
||||
// loader's tests pin against), still serve the id without a provider
|
||||
// annotation rather than dropping it; the canvas treats an
|
||||
// un-annotated registry model as selectable-but-unlabelled.
|
||||
regModels = append(regModels, ms)
|
||||
}
|
||||
|
||||
summary.RegistryBacked = true
|
||||
summary.RegistryProviders = views
|
||||
summary.RegistryModels = regModels
|
||||
}
|
||||
@@ -1329,3 +1329,311 @@ func TestCWE78_DeleteFile_TraversalVariants(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// internal#718 P3 — GET /templates serves the selectable provider/model list
|
||||
// FROM the provider registry (workspace-server/internal/providers), not from
|
||||
// each template's hand-authored config.yaml. Additive: the registry-served
|
||||
// fields (registry_backed / registry_providers / registry_models) ride
|
||||
// ALONGSIDE the existing template-served fields so non-registry runtimes and
|
||||
// older canvases keep working. The canvas (PR-B) prefers the registry block;
|
||||
// "only registered selectable" follows because the registry block is the
|
||||
// authoritative list for a registry-known runtime.
|
||||
// ============================================================================
|
||||
|
||||
// TestTemplatesList_RegistryServesSelectableModels pins the core P3 contract:
|
||||
// for a runtime the provider registry knows (claude-code), /templates serves
|
||||
// the registry's NATIVE model ids — regardless of what the template's
|
||||
// config.yaml runtime_config.models happens to list. A template author can no
|
||||
// longer surface an unregistered model into the canvas dropdown.
|
||||
func TestTemplatesList_RegistryServesSelectableModels(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
tmplDir := filepath.Join(tmpDir, "claude-code-default")
|
||||
if err := os.MkdirAll(tmplDir, 0755); err != nil {
|
||||
t.Fatalf("mkdir: %v", err)
|
||||
}
|
||||
// Deliberately list a BOGUS model the registry does not know. The
|
||||
// registry-served list must NOT contain it.
|
||||
configYaml := `name: Claude Code
|
||||
runtime: claude-code
|
||||
runtime_config:
|
||||
model: claude-sonnet-4-6
|
||||
models:
|
||||
- id: totally-made-up-model
|
||||
name: Not In Registry
|
||||
skills: []
|
||||
`
|
||||
if err := os.WriteFile(filepath.Join(tmplDir, "config.yaml"), []byte(configYaml), 0644); err != nil {
|
||||
t.Fatalf("write: %v", err)
|
||||
}
|
||||
|
||||
handler := NewTemplatesHandler(tmpDir, nil, nil)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/templates", nil)
|
||||
handler.List(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d", w.Code)
|
||||
}
|
||||
var resp []templateSummary
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("parse: %v", err)
|
||||
}
|
||||
if len(resp) != 1 {
|
||||
t.Fatalf("expected 1 template, got %d", len(resp))
|
||||
}
|
||||
got := resp[0]
|
||||
|
||||
if !got.RegistryBacked {
|
||||
t.Fatalf("claude-code is a registry-known runtime; RegistryBacked must be true")
|
||||
}
|
||||
|
||||
// The registry-served model set must be the claude-code native set
|
||||
// (anthropic-oauth: sonnet/opus/haiku, anthropic-api: claude-*-4-*,
|
||||
// kimi-coding: kimi-*, minimax: MiniMax-*, platform: vendor/model ids).
|
||||
// It must NOT contain the template's bogus id.
|
||||
regModelIDs := map[string]bool{}
|
||||
for _, m := range got.RegistryModels {
|
||||
regModelIDs[m.ID] = true
|
||||
}
|
||||
if regModelIDs["totally-made-up-model"] {
|
||||
t.Errorf("RegistryModels leaked the template's unregistered model id")
|
||||
}
|
||||
for _, want := range []string{"sonnet", "opus", "claude-opus-4-7", "anthropic/claude-opus-4-7"} {
|
||||
if !regModelIDs[want] {
|
||||
t.Errorf("RegistryModels missing native model %q; got %v", want, regModelIDs)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestTemplatesList_RegistryAnnotatesDerivedProviderAndBilling pins that each
|
||||
// registry-served model carries its DERIVED provider name + a billing_mode
|
||||
// reflecting whether that derived provider is the closed platform set
|
||||
// (platform_managed) or BYOK (byok). This is what the canvas Config tab reads
|
||||
// to show the billing-mode of the DERIVED provider (folds in #1931 intent),
|
||||
// instead of its hardcoded billingModeForProvider rule.
|
||||
func TestTemplatesList_RegistryAnnotatesDerivedProviderAndBilling(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
tmplDir := filepath.Join(tmpDir, "claude-code-default")
|
||||
if err := os.MkdirAll(tmplDir, 0755); err != nil {
|
||||
t.Fatalf("mkdir: %v", err)
|
||||
}
|
||||
configYaml := `name: Claude Code
|
||||
runtime: claude-code
|
||||
runtime_config:
|
||||
model: claude-sonnet-4-6
|
||||
skills: []
|
||||
`
|
||||
if err := os.WriteFile(filepath.Join(tmplDir, "config.yaml"), []byte(configYaml), 0644); err != nil {
|
||||
t.Fatalf("write: %v", err)
|
||||
}
|
||||
|
||||
handler := NewTemplatesHandler(tmpDir, nil, nil)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/templates", nil)
|
||||
handler.List(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d", w.Code)
|
||||
}
|
||||
var resp []templateSummary
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("parse: %v", err)
|
||||
}
|
||||
got := resp[0]
|
||||
|
||||
billByModel := map[string]string{}
|
||||
provByModel := map[string]string{}
|
||||
for _, m := range got.RegistryModels {
|
||||
billByModel[m.ID] = m.BillingMode
|
||||
provByModel[m.ID] = m.Provider
|
||||
}
|
||||
|
||||
// A BYOK API model derives to anthropic-api → byok.
|
||||
if provByModel["claude-opus-4-7"] != "anthropic-api" {
|
||||
t.Errorf("claude-opus-4-7 derived provider: want anthropic-api, got %q", provByModel["claude-opus-4-7"])
|
||||
}
|
||||
if billByModel["claude-opus-4-7"] != "byok" {
|
||||
t.Errorf("claude-opus-4-7 billing_mode: want byok, got %q", billByModel["claude-opus-4-7"])
|
||||
}
|
||||
// A platform-namespaced model derives to the closed platform provider →
|
||||
// platform_managed.
|
||||
if provByModel["anthropic/claude-opus-4-7"] != "platform" {
|
||||
t.Errorf("anthropic/claude-opus-4-7 derived provider: want platform, got %q", provByModel["anthropic/claude-opus-4-7"])
|
||||
}
|
||||
if billByModel["anthropic/claude-opus-4-7"] != "platform_managed" {
|
||||
t.Errorf("anthropic/claude-opus-4-7 billing_mode: want platform_managed, got %q", billByModel["anthropic/claude-opus-4-7"])
|
||||
}
|
||||
|
||||
// registry_providers carries the provider display_name + auth_env +
|
||||
// billing_mode for the dropdown labels — sourced from the registry, not
|
||||
// the canvas VENDOR_LABELS map.
|
||||
byName := map[string]registryProviderView{}
|
||||
for _, p := range got.RegistryProviders {
|
||||
byName[p.Name] = p
|
||||
}
|
||||
oauth, ok := byName["anthropic-oauth"]
|
||||
if !ok {
|
||||
t.Fatalf("registry_providers missing anthropic-oauth; got %v", byName)
|
||||
}
|
||||
if oauth.DisplayName != "Claude Code subscription" {
|
||||
t.Errorf("anthropic-oauth display_name: want %q, got %q", "Claude Code subscription", oauth.DisplayName)
|
||||
}
|
||||
if oauth.BillingMode != "byok" {
|
||||
t.Errorf("anthropic-oauth billing_mode: want byok, got %q", oauth.BillingMode)
|
||||
}
|
||||
if len(oauth.AuthEnv) != 1 || oauth.AuthEnv[0] != "CLAUDE_CODE_OAUTH_TOKEN" {
|
||||
t.Errorf("anthropic-oauth auth_env: want [CLAUDE_CODE_OAUTH_TOKEN], got %v", oauth.AuthEnv)
|
||||
}
|
||||
plat, ok := byName["platform"]
|
||||
if !ok || plat.BillingMode != "platform_managed" {
|
||||
t.Errorf("platform provider billing_mode: want platform_managed, got %+v", plat)
|
||||
}
|
||||
}
|
||||
|
||||
// TestTemplatesList_NonRegistryRuntimeFallsOpenToTemplate pins federation-
|
||||
// readiness: for a runtime the registry does NOT know (a hypothetical
|
||||
// third-party / external-like runtime), /templates does NOT set
|
||||
// RegistryBacked and does NOT synthesize a registry block — the template's
|
||||
// own config.yaml fields remain the source, unchanged. No behavior change for
|
||||
// non-registry runtimes.
|
||||
func TestTemplatesList_NonRegistryRuntimeFallsOpenToTemplate(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
tmplDir := filepath.Join(tmpDir, "byo-runtime")
|
||||
if err := os.MkdirAll(tmplDir, 0755); err != nil {
|
||||
t.Fatalf("mkdir: %v", err)
|
||||
}
|
||||
// "mock" is a known runtime to the manifest allowlist (so List doesn't
|
||||
// skip it) but is NOT in the provider registry's runtimes: block.
|
||||
configYaml := `name: Mock Runtime
|
||||
runtime: mock
|
||||
runtime_config:
|
||||
model: canned-reply
|
||||
providers: [some-byo-provider]
|
||||
models:
|
||||
- id: canned-reply
|
||||
name: Canned Reply
|
||||
skills: []
|
||||
`
|
||||
if err := os.WriteFile(filepath.Join(tmplDir, "config.yaml"), []byte(configYaml), 0644); err != nil {
|
||||
t.Fatalf("write: %v", err)
|
||||
}
|
||||
|
||||
handler := NewTemplatesHandler(tmpDir, nil, nil)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/templates", nil)
|
||||
handler.List(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d", w.Code)
|
||||
}
|
||||
var resp []templateSummary
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("parse: %v", err)
|
||||
}
|
||||
if len(resp) != 1 {
|
||||
t.Fatalf("expected 1 template, got %d", len(resp))
|
||||
}
|
||||
got := resp[0]
|
||||
|
||||
if got.RegistryBacked {
|
||||
t.Errorf("mock is NOT a registry runtime; RegistryBacked must be false")
|
||||
}
|
||||
if len(got.RegistryModels) != 0 || len(got.RegistryProviders) != 0 {
|
||||
t.Errorf("non-registry runtime must not synthesize a registry block; got models=%v providers=%v",
|
||||
got.RegistryModels, got.RegistryProviders)
|
||||
}
|
||||
// Template-served fields untouched.
|
||||
if len(got.Models) != 1 || got.Models[0].ID != "canned-reply" {
|
||||
t.Errorf("template Models unchanged: got %+v", got.Models)
|
||||
}
|
||||
if !reflect.DeepEqual(got.Providers, []string{"some-byo-provider"}) {
|
||||
t.Errorf("template Providers unchanged: got %v", got.Providers)
|
||||
}
|
||||
}
|
||||
|
||||
// TestTemplatesList_DisplayableFlag verifies the SSOT-driven runtime-picker
|
||||
// opt-out: a template's config.yaml `displayable: false` surfaces as a
|
||||
// non-nil false on the /templates row (canvas hides it), while an absent
|
||||
// flag stays nil (canvas shows it) and an explicit true surfaces as true.
|
||||
// This is the backend half of removing the hardcoded frontend allowlist —
|
||||
// the picker trusts this list, so hiding a runtime must be declarative here.
|
||||
func TestTemplatesList_DisplayableFlag(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
tmpDir := t.TempDir()
|
||||
|
||||
mk := func(dir, yaml string) {
|
||||
d := filepath.Join(tmpDir, dir)
|
||||
if err := os.MkdirAll(d, 0755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := os.WriteFile(filepath.Join(d, "config.yaml"), []byte(yaml), 0644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
// absent → nil
|
||||
mk("adk-shown", "name: ADK Shown\nruntime: claude-code\n")
|
||||
// explicit false → hidden marker
|
||||
mk("adk-hidden", "name: ADK Hidden\nruntime: claude-code\ndisplayable: false\n")
|
||||
// explicit true → shown marker
|
||||
mk("adk-explicit", "name: ADK Explicit\nruntime: claude-code\ndisplayable: true\n")
|
||||
|
||||
handler := NewTemplatesHandler(tmpDir, nil, nil)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("GET", "/templates", nil)
|
||||
handler.List(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d", w.Code)
|
||||
}
|
||||
var resp []templateSummary
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("parse: %v", err)
|
||||
}
|
||||
byID := map[string]templateSummary{}
|
||||
for _, s := range resp {
|
||||
byID[s.ID] = s
|
||||
}
|
||||
|
||||
if s, ok := byID["adk-shown"]; !ok {
|
||||
t.Fatal("adk-shown missing")
|
||||
} else if s.Displayable != nil {
|
||||
t.Errorf("adk-shown: expected nil Displayable (absent), got %v", *s.Displayable)
|
||||
}
|
||||
|
||||
if s, ok := byID["adk-hidden"]; !ok {
|
||||
t.Fatal("adk-hidden missing")
|
||||
} else if s.Displayable == nil || *s.Displayable != false {
|
||||
t.Errorf("adk-hidden: expected non-nil false Displayable, got %v", s.Displayable)
|
||||
}
|
||||
|
||||
if s, ok := byID["adk-explicit"]; !ok {
|
||||
t.Fatal("adk-explicit missing")
|
||||
} else if s.Displayable == nil || *s.Displayable != true {
|
||||
t.Errorf("adk-explicit: expected non-nil true Displayable, got %v", s.Displayable)
|
||||
}
|
||||
|
||||
// JSON contract: omitempty drops the field entirely when nil so existing
|
||||
// templates' payloads are byte-unchanged; present when set.
|
||||
var rawRows []map[string]json.RawMessage
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &rawRows); err != nil {
|
||||
t.Fatalf("raw parse: %v", err)
|
||||
}
|
||||
for _, row := range rawRows {
|
||||
id := ""
|
||||
_ = json.Unmarshal(row["id"], &id)
|
||||
_, present := row["displayable"]
|
||||
if id == "adk-shown" && present {
|
||||
t.Error("adk-shown: displayable key should be omitted when nil")
|
||||
}
|
||||
if (id == "adk-hidden" || id == "adk-explicit") && !present {
|
||||
t.Errorf("%s: displayable key should be present when set", id)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -428,6 +428,54 @@ func (h *WorkspaceHandler) Create(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
// internal#718 P4 PR-2: ONLY-REGISTERED validation at the create boundary —
|
||||
// FLIPPED from WARN to HARD-REJECT (was the P2-B WARN-mode signal).
|
||||
//
|
||||
// For a runtime the provider registry knows (first-party:
|
||||
// claude-code/codex/hermes/openclaw) this checks the (runtime, model) pair
|
||||
// against the registry's native model set. Fails OPEN for runtimes the
|
||||
// registry doesn't know (langgraph/external/kimi/mock/federated) so
|
||||
// non-first-party / federated flows are UNCHANGED. Skipped for external
|
||||
// workspaces (the URL is the contract, not the model — see MODEL_REQUIRED
|
||||
// rationale above).
|
||||
//
|
||||
// THE FLIP (was WARN, now 422):
|
||||
// * P2-B carried the gate in WARN mode (X-Molecule-Model-Unregistered
|
||||
// response header + log line, create proceeds) because the legacy
|
||||
// colon-namespaced BYOK vocabulary ('anthropic:claude-opus-4-7' etc.)
|
||||
// was live across the create corpus but not yet in the registry's
|
||||
// exact-id model sets — hard-rejecting would have 422'd legitimate
|
||||
// existing flows.
|
||||
// * P4 PR-1 reconciled that colon vocab into the registry as
|
||||
// first-class native-set entries (each runtime native set now lists
|
||||
// both bare/slash AND colon forms for the BYOK ids the live corpus
|
||||
// uses; openclaw's pre-existing colon-form precedent extended to
|
||||
// claude-code). DeriveProvider / Manifest.ModelsForRuntime now
|
||||
// resolves every legitimate model in the corpus.
|
||||
// * With the reconcile landed, an unregistered (runtime, model) pair
|
||||
// is a real misconfiguration — the corpus has no legitimate model
|
||||
// this validator now rejects. We flip to 422
|
||||
// UNREGISTERED_MODEL_FOR_RUNTIME so the caller fails LOUDLY at the
|
||||
// boundary instead of provisioning a workspace that will wedge at
|
||||
// adapter init (the codex 'anthropic:claude-opus-4-7' wedge class
|
||||
// the MODEL_REQUIRED gate also targets).
|
||||
//
|
||||
// The registry model set is code-generated from the canonical
|
||||
// providers.yaml (P2-A artifact); the check stays in sync with the SSOT
|
||||
// via the verify-providers-gen + sync-providers-yaml CI gates.
|
||||
if !isExternal {
|
||||
if ok, why := validateRegisteredModelForRuntime(payload.Runtime, payload.Model); !ok {
|
||||
log.Printf("Create: 422 UNREGISTERED_MODEL_FOR_RUNTIME (runtime=%q model=%q): %s [internal#718 P4 PR-2 hard-reject]", payload.Runtime, payload.Model, why)
|
||||
c.JSON(http.StatusUnprocessableEntity, gin.H{
|
||||
"error": why,
|
||||
"runtime": payload.Runtime,
|
||||
"model": payload.Model,
|
||||
"code": "UNREGISTERED_MODEL_FOR_RUNTIME",
|
||||
})
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
ctx := c.Request.Context()
|
||||
|
||||
// Convert empty role to NULL
|
||||
@@ -599,38 +647,39 @@ func (h *WorkspaceHandler) Create(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
// Persist canvas-selected model + derived provider as workspace
|
||||
// secrets so they survive restart and are picked up by CP user-data
|
||||
// when regenerating /configs/config.yaml. Without this, the
|
||||
// applyRuntimeModelEnv fallback chain (workspace_provision.go)
|
||||
// cannot recover the user's choice on a Restart payload (which
|
||||
// rebuilds from the workspaces row, where there is no model column),
|
||||
// and hermes silently boots with the template-default model. See
|
||||
// failed-workspace 95ed3ff2 (2026-05-02): canvas POSTed
|
||||
// minimax/MiniMax-M2.7-highspeed, MODEL_PROVIDER was never written,
|
||||
// container fell through to nousresearch/hermes-4-70b, derive-
|
||||
// provider.sh produced the wrong provider, hermes gateway 401'd,
|
||||
// /health poll failed, molecule-runtime never registered.
|
||||
// Persist canvas-selected model as the MODEL workspace_secret so it
|
||||
// survives restart and is picked up by CP user-data when regenerating
|
||||
// /configs/config.yaml. Without this, the applyRuntimeModelEnv
|
||||
// fallback chain (workspace_provision.go) cannot recover the user's
|
||||
// choice on a Restart payload (which rebuilds from the workspaces
|
||||
// row, where there is no model column), and hermes silently boots
|
||||
// with the template-default model. See failed-workspace 95ed3ff2
|
||||
// (2026-05-02): canvas POSTed minimax/MiniMax-M2.7-highspeed,
|
||||
// MODEL_PROVIDER was never written, container fell through to
|
||||
// nousresearch/hermes-4-70b, derive-provider.sh produced the wrong
|
||||
// provider, hermes gateway 401'd, /health poll failed,
|
||||
// molecule-runtime never registered.
|
||||
//
|
||||
// Both writes are non-fatal: a failure here logs and continues so
|
||||
// the workspace row stays consistent. The runtime can still boot
|
||||
// (with the template default) and a later Save+Restart will re-
|
||||
// persist via the SecretsHandler endpoints. The DB error path here
|
||||
// is rare (the same DB just committed a workspace row a microsecond
|
||||
// ago) so failing the create response would be unfriendly.
|
||||
// internal#718 P4 closure: the prior `setProviderSecret` write
|
||||
// (LLM_PROVIDER row, derived from the canvas-supplied
|
||||
// payload.LLMProvider OR from deriveProviderFromModelSlug) has been
|
||||
// REMOVED. The provider is now DERIVED at every decision point from
|
||||
// (runtime, model) via the registry — billing (P2-B), CP user-data
|
||||
// (this PR's CP-side commit replaces resolveModelAndProvider's
|
||||
// env["LLM_PROVIDER"] read with a DeriveProvider call), and
|
||||
// validation (P3 PR-C provisioner). Storing it is pure write-ghost
|
||||
// with no remaining consumer. `payload.LLMProvider` is preserved on
|
||||
// the request struct for backward-compatibility with older canvases
|
||||
// that still send it; the value is intentionally ignored here.
|
||||
//
|
||||
// The setModelSecret write is non-fatal: a failure here logs and
|
||||
// continues so the workspace row stays consistent. The runtime can
|
||||
// still boot (with the template default) and a later
|
||||
// Save+Restart will re-persist via the SecretsHandler endpoints.
|
||||
if payload.Model != "" {
|
||||
if err := setModelSecret(ctx, id, payload.Model); err != nil {
|
||||
log.Printf("Create workspace %s: failed to persist MODEL_PROVIDER %q: %v (non-fatal)", id, payload.Model, err)
|
||||
}
|
||||
if explicitProvider := strings.TrimSpace(payload.LLMProvider); explicitProvider != "" {
|
||||
if err := setProviderSecret(ctx, id, explicitProvider); err != nil {
|
||||
log.Printf("Create workspace %s: failed to persist LLM_PROVIDER %q: %v (non-fatal)", id, explicitProvider, err)
|
||||
}
|
||||
} else if derived := deriveProviderFromModelSlug(payload.Model); derived != "" {
|
||||
if err := setProviderSecret(ctx, id, derived); err != nil {
|
||||
log.Printf("Create workspace %s: failed to persist LLM_PROVIDER %q: %v (non-fatal)", id, derived, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Insert canvas layout — non-fatal: workspace can be dragged into position later
|
||||
|
||||
@@ -22,8 +22,8 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models"
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
@@ -259,11 +259,13 @@ func TestWorkspaceBudget_A2A_ExceededReturns402(t *testing.T) {
|
||||
// Cache a URL so resolveAgentURL doesn't need a DB query after budget check
|
||||
mr.Set(fmt.Sprintf("ws:%s:url", "ws-over-budget"), "http://localhost:9999")
|
||||
|
||||
// Budget check query: spend = limit → exceeded
|
||||
mock.ExpectQuery("SELECT budget_limit, COALESCE").
|
||||
// Budget check: monthly limit 500, monthly spend 500 → exceeded → 402
|
||||
mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
|
||||
WithArgs("ws-over-budget").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
|
||||
AddRow(int64(500), int64(500)))
|
||||
WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).AddRow([]byte(`{"monthly":500}`)))
|
||||
mock.ExpectQuery(`FROM workspace_spend_events`).
|
||||
WithArgs("ws-over-budget").
|
||||
WillReturnRows(spendRows(0, 0, 0, 500))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
@@ -295,10 +297,12 @@ func TestWorkspaceBudget_A2A_AboveLimitReturns402(t *testing.T) {
|
||||
mr.Set(fmt.Sprintf("ws:%s:url", "ws-way-over"), "http://localhost:9999")
|
||||
|
||||
// spend > limit
|
||||
mock.ExpectQuery("SELECT budget_limit, COALESCE").
|
||||
mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
|
||||
WithArgs("ws-way-over").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
|
||||
AddRow(int64(100), int64(9999)))
|
||||
WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).AddRow([]byte(`{"monthly":100}`)))
|
||||
mock.ExpectQuery(`FROM workspace_spend_events`).
|
||||
WithArgs("ws-way-over").
|
||||
WillReturnRows(spendRows(0, 0, 0, 9999))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
@@ -334,11 +338,13 @@ func TestWorkspaceBudget_A2A_UnderLimitPassesThrough(t *testing.T) {
|
||||
|
||||
mr.Set(fmt.Sprintf("ws:%s:url", "ws-under-budget"), agentServer.URL)
|
||||
|
||||
// Budget check: spend (100) < limit (500) → pass-through
|
||||
mock.ExpectQuery("SELECT budget_limit, COALESCE").
|
||||
// Budget check: monthly spend (100) < limit (500) → pass-through
|
||||
mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
|
||||
WithArgs("ws-under-budget").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
|
||||
AddRow(int64(500), int64(100)))
|
||||
WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).AddRow([]byte(`{"monthly":500}`)))
|
||||
mock.ExpectQuery(`FROM workspace_spend_events`).
|
||||
WithArgs("ws-under-budget").
|
||||
WillReturnRows(spendRows(0, 0, 0, 100))
|
||||
|
||||
// Activity log INSERT from logA2ASuccess
|
||||
mock.ExpectExec("INSERT INTO activity_logs").
|
||||
@@ -380,11 +386,11 @@ func TestWorkspaceBudget_A2A_NilLimitPassesThrough(t *testing.T) {
|
||||
|
||||
mr.Set(fmt.Sprintf("ws:%s:url", "ws-no-limit"), agentServer.URL)
|
||||
|
||||
// budget_limit NULL → no enforcement regardless of monthly_spend
|
||||
mock.ExpectQuery("SELECT budget_limit, COALESCE").
|
||||
// no limits configured → checkWorkspaceBudget returns early (no spend query),
|
||||
// enforcement skipped regardless of spend
|
||||
mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
|
||||
WithArgs("ws-no-limit").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
|
||||
AddRow(nil, int64(999999))) // huge spend but no limit set
|
||||
WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).AddRow([]byte(`{}`)))
|
||||
|
||||
mock.ExpectExec("INSERT INTO activity_logs").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
@@ -425,7 +431,7 @@ func TestWorkspaceBudget_A2A_DBErrorFailOpen(t *testing.T) {
|
||||
mr.Set(fmt.Sprintf("ws:%s:url", "ws-db-err-budget"), agentServer.URL)
|
||||
|
||||
// Budget check fails with DB error → fail-open (request proceeds)
|
||||
mock.ExpectQuery("SELECT budget_limit, COALESCE").
|
||||
mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
|
||||
WithArgs("ws-db-err-budget").
|
||||
WillReturnError(sql.ErrConnDone)
|
||||
|
||||
|
||||
@@ -65,6 +65,14 @@ func validateWorkspaceCompute(compute models.WorkspaceCompute) error {
|
||||
if err := validateWorkspaceDisplayDimensions(compute.Display.Width, compute.Display.Height); err != nil {
|
||||
return err
|
||||
}
|
||||
// internal#734: the durable-data choice. CP re-validates the same enum at
|
||||
// its provision edge (IsValidDataPersistence → 400); validating here too
|
||||
// gives the user a clear workspace-server error before the CP round-trip.
|
||||
switch compute.DataPersistence {
|
||||
case "", "persist", "ephemeral":
|
||||
default:
|
||||
return fmt.Errorf("unsupported compute.data_persistence (want persist|ephemeral)")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -11,8 +11,8 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models"
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
@@ -36,6 +36,23 @@ func TestValidateWorkspaceCompute_RejectsUnknownInstanceType(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// internal#734: data_persistence enum. "" (auto), "persist", "ephemeral" are
|
||||
// the only accepted values; anything else is a clear 400 before the CP call.
|
||||
func TestValidateWorkspaceCompute_DataPersistence(t *testing.T) {
|
||||
for _, ok := range []string{"", "persist", "ephemeral"} {
|
||||
c := models.WorkspaceCompute{DataPersistence: ok}
|
||||
if err := validateWorkspaceCompute(c); err != nil {
|
||||
t.Errorf("data_persistence=%q must be accepted: %v", ok, err)
|
||||
}
|
||||
}
|
||||
for _, bad := range []string{"persistent", "off", "none", "Ephemeral", "true"} {
|
||||
c := models.WorkspaceCompute{DataPersistence: bad}
|
||||
if err := validateWorkspaceCompute(c); err == nil {
|
||||
t.Errorf("data_persistence=%q must be rejected", bad)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateWorkspaceCompute_RejectsOutOfRangeRootVolume(t *testing.T) {
|
||||
for _, rootGB := range []int{29, 501} {
|
||||
compute := models.WorkspaceCompute{Volume: models.WorkspaceComputeVolume{RootGB: rootGB}}
|
||||
@@ -126,7 +143,7 @@ func TestWorkspaceCreate_WithInvalidCompute_ReturnsBadRequest(t *testing.T) {
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
body := `{
|
||||
"name":"Oversized Agent",
|
||||
"model":"gpt-4",
|
||||
"model":"claude-opus-4-7",
|
||||
"compute":{"instance_type":"p4d.24xlarge"}
|
||||
}`
|
||||
c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body))
|
||||
|
||||
@@ -399,7 +399,13 @@ func (h *WorkspaceHandler) Delete(c *gin.Context) {
|
||||
// disable, broadcast). The HTTP-specific bits — direct-children 409
|
||||
// gate above, ?purge=true hard-delete below, response shaping —
|
||||
// stay in this handler.
|
||||
descendantIDs, stopErrs, err := h.CascadeDelete(ctx, id)
|
||||
// internal#734: the user can ask to erase saved data (browser profile /
|
||||
// cookies / downloads / agent memory) on delete. Opt-in — default keeps the
|
||||
// data on its volume for the orphan-sweeper grace. Only a genuine
|
||||
// permanent-delete reaches here (restart/reconcile use other paths), so this
|
||||
// is the one place prune may be requested.
|
||||
erase := c.Query("erase_data") == "true"
|
||||
descendantIDs, stopErrs, err := h.CascadeDelete(ctx, id, erase)
|
||||
if err != nil {
|
||||
// Audit 2026-05-09 (Core-Security): raw `err.Error()` here was
|
||||
// exposed to HTTP clients verbatim, including wrapped lib/pq
|
||||
@@ -515,7 +521,13 @@ func destructiveDeleteCounts(ctx context.Context, id string) (childCount int, sc
|
||||
// Caller is responsible for the children-confirmation gate (the HTTP handler
|
||||
// returns 409 when children exist + ?confirm=true is missing); this helper
|
||||
// always cascades.
|
||||
func (h *WorkspaceHandler) CascadeDelete(ctx context.Context, id string) ([]string, []error, error) {
|
||||
// CascadeDelete tears down a workspace and its descendants (stop compute,
|
||||
// remove volumes, revoke tokens, disable schedules, broadcast). erase=true
|
||||
// (internal#734) means the user asked to erase saved data, so the CP compute
|
||||
// teardown prunes each workspace's durable data volume; the HTTP delete passes
|
||||
// the user's choice, the org-import reconcile passes false (a reconcile is not
|
||||
// a user-erase).
|
||||
func (h *WorkspaceHandler) CascadeDelete(ctx context.Context, id string, erase bool) ([]string, []error, error) {
|
||||
if err := validateWorkspaceID(id); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
@@ -579,7 +591,7 @@ func (h *WorkspaceHandler) CascadeDelete(ctx context.Context, id string) ([]stri
|
||||
// pending EC2 is queryable and handed off to the CP-orphan-sweeper —
|
||||
// rather than the bare one-shot StopWorkspaceAuto that produced the
|
||||
// silent-leak class (task #15 / workspace-ec2-leak).
|
||||
if err := h.stopWorkspaceForDelete(cleanupCtx, wsID); err != nil {
|
||||
if err := h.stopWorkspaceForDelete(cleanupCtx, wsID, erase); err != nil {
|
||||
log.Printf("CascadeDelete %s stop failed: %v — leaving cleanup for orphan sweeper", wsID, err)
|
||||
stopErrs = append(stopErrs, fmt.Errorf("stop %s: %w", wsID, err))
|
||||
return
|
||||
|
||||
@@ -521,7 +521,7 @@ func TestValidateWorkspaceDir_Empty(t *testing.T) {
|
||||
|
||||
func TestCascadeDelete_InvalidUUID(t *testing.T) {
|
||||
h := &WorkspaceHandler{}
|
||||
descendants, stopErrs, err := h.CascadeDelete(context.Background(), "not-a-uuid")
|
||||
descendants, stopErrs, err := h.CascadeDelete(context.Background(), "not-a-uuid", false)
|
||||
if err == nil {
|
||||
t.Error("expected error for invalid UUID")
|
||||
}
|
||||
@@ -542,7 +542,7 @@ func TestCascadeDelete_DescendantQueryError(t *testing.T) {
|
||||
WithArgs(wsID).
|
||||
WillReturnError(sql.ErrConnDone)
|
||||
|
||||
deleted, stopErrs, err := h.CascadeDelete(context.Background(), wsID)
|
||||
deleted, stopErrs, err := h.CascadeDelete(context.Background(), wsID, false)
|
||||
if err == nil {
|
||||
t.Error("CascadeDelete returned nil error; want descendant query error")
|
||||
}
|
||||
@@ -569,7 +569,7 @@ func TestCascadeDelete_DescendantRowsError(t *testing.T) {
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(rows)
|
||||
|
||||
deleted, stopErrs, err := h.CascadeDelete(context.Background(), wsID)
|
||||
deleted, stopErrs, err := h.CascadeDelete(context.Background(), wsID, false)
|
||||
if err == nil {
|
||||
t.Fatal("CascadeDelete returned nil error; want descendant rows error")
|
||||
}
|
||||
|
||||
@@ -45,7 +45,7 @@ func TestStopWorkspaceForDelete_CPRetriesTransientThenSucceeds(t *testing.T) {
|
||||
}}
|
||||
h := &WorkspaceHandler{cpProv: stub}
|
||||
|
||||
err := h.stopWorkspaceForDelete(context.Background(), "ws-del-1")
|
||||
err := h.stopWorkspaceForDelete(context.Background(), "ws-del-1", false)
|
||||
if err != nil {
|
||||
t.Fatalf("expected nil error on eventual success, got %v", err)
|
||||
}
|
||||
@@ -73,7 +73,7 @@ func TestStopWorkspaceForDelete_CPExhaustsEmitsDurableEventAndReturnsError(t *te
|
||||
mock.ExpectExec("INSERT INTO structure_events").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
err := h.stopWorkspaceForDelete(context.Background(), "ws-doomed")
|
||||
err := h.stopWorkspaceForDelete(context.Background(), "ws-doomed", false)
|
||||
if err == nil {
|
||||
t.Fatal("expected terminal error on retry exhaustion, got nil")
|
||||
}
|
||||
@@ -96,7 +96,7 @@ func TestStopWorkspaceForDelete_CPExhaustsEmitsDurableEventAndReturnsError(t *te
|
||||
|
||||
func TestStopWorkspaceForDelete_NoBackendIsNoOp(t *testing.T) {
|
||||
h := &WorkspaceHandler{} // cpProv nil, provisioner nil
|
||||
if err := h.stopWorkspaceForDelete(context.Background(), "ws-x"); err != nil {
|
||||
if err := h.stopWorkspaceForDelete(context.Background(), "ws-x", false); err != nil {
|
||||
t.Errorf("expected nil no-op with no backend, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -235,9 +235,13 @@ func (h *WorkspaceHandler) StopWorkspaceAuto(ctx context.Context, workspaceID st
|
||||
// container won't heal on retry (matches RestartWorkspaceAuto's Docker
|
||||
// rationale); the orphan-container sweeper (registry/orphan_sweeper.go) is
|
||||
// the Docker-side backstop.
|
||||
func (h *WorkspaceHandler) stopWorkspaceForDelete(ctx context.Context, workspaceID string) error {
|
||||
// stopWorkspaceForDelete terminates a workspace's compute on the delete path.
|
||||
// erase=true (internal#734) means the user asked to erase saved data, so the CP
|
||||
// teardown prunes the durable data volume. The local-docker path always removes
|
||||
// its volume via CascadeDelete's RemoveVolume, so erase is a CP-only concern.
|
||||
func (h *WorkspaceHandler) stopWorkspaceForDelete(ctx context.Context, workspaceID string, erase bool) error {
|
||||
if h.cpProv != nil {
|
||||
if err := h.cpStopWithRetryErr(ctx, workspaceID, "Delete"); err != nil {
|
||||
if err := h.cpStopWithRetryErr(ctx, workspaceID, "Delete", erase); err != nil {
|
||||
h.emitDeleteTerminateRetryExhausted(ctx, workspaceID, err)
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -75,3 +75,21 @@ func formatMissingEnvError(missing []string) string {
|
||||
strings.Join(missing, ", "),
|
||||
)
|
||||
}
|
||||
|
||||
// formatMissingBYOKCredentialError builds the user-facing message for a
|
||||
// provision failure caused by a non-platform (byok/subscription) workspace
|
||||
// that has no usable LLM credential of its own (internal#711). The platform's
|
||||
// scope:global LLM credentials are NOT a valid fallback for a non-platform
|
||||
// workspace — resolving to them would bill the platform's Anthropic credits —
|
||||
// so the provision fails closed here rather than starting the workspace on
|
||||
// stripped/absent creds. Rendered verbatim in the canvas Events tab.
|
||||
func formatMissingBYOKCredentialError(mode string) string {
|
||||
return fmt.Sprintf(
|
||||
"this workspace's LLM billing mode is %q (not platform-managed) but it has no LLM credential of its own. "+
|
||||
"Add a workspace-scoped credential (e.g. CLAUDE_CODE_OAUTH_TOKEN or your provider's API key) under "+
|
||||
"Config → Secrets, or switch the workspace to platform-managed billing via "+
|
||||
"/admin/workspaces/:id/llm-billing-mode, then retry. The platform's shared LLM credentials are not "+
|
||||
"used for non-platform workspaces.",
|
||||
mode,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -330,6 +330,7 @@ func (h *WorkspaceHandler) buildProvisionerConfig(
|
||||
Runtime: payload.Runtime,
|
||||
InstanceType: payload.Compute.InstanceType,
|
||||
DiskGB: int32(payload.Compute.Volume.RootGB),
|
||||
DataPersistence: payload.Compute.DataPersistence,
|
||||
Display: provisioner.WorkspaceDisplayConfig{
|
||||
Mode: payload.Compute.Display.Mode,
|
||||
Width: payload.Compute.Display.Width,
|
||||
@@ -710,131 +711,21 @@ func (h *WorkspaceHandler) defaultTemplateProvidersYAML(runtime string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
// deriveProviderFromModelSlug maps a hermes-agent model slug prefix to
|
||||
// its provider name — a Go translation of the case statement in
|
||||
// workspace-configs-templates/hermes/scripts/derive-provider.sh that we
|
||||
// can run at provision time so LLM_PROVIDER lands in workspace_secrets
|
||||
// (and from there, into /configs/config.yaml via CP user-data) before
|
||||
// the container ever boots.
|
||||
// internal#718 P4 closure — `deriveProviderFromModelSlug` (retire-list #3)
|
||||
// has been removed together with its only caller (WorkspaceHandler.Create's
|
||||
// setProviderSecret write) and the LLM_PROVIDER workspace_secret it
|
||||
// populated.
|
||||
//
|
||||
// Returns "" when the prefix isn't recognized OR when the runtime-only
|
||||
// override would be needed to pick a provider — the caller skips the
|
||||
// LLM_PROVIDER write in that case so derive-provider.sh keeps the final
|
||||
// say at boot. derive-provider.sh remains the source of truth: this is
|
||||
// strictly a *gating* hint that survives restarts and gives CP a YAML
|
||||
// field to populate. Without it, "Save+Restart" would lose the user's
|
||||
// provider choice every time CP regenerates the config.
|
||||
//
|
||||
// Two intentional differences from the shell version:
|
||||
//
|
||||
// 1. nousresearch/* and openai/* both return "openrouter" here. The
|
||||
// shell script special-cases "prefer nous if HERMES_API_KEY set" /
|
||||
// "prefer custom if OPENAI_API_KEY set", but those depend on
|
||||
// runtime env that may not yet be loaded at provision time. We pick
|
||||
// the safe default ("openrouter" reaches both Hermes 3 and OpenAI
|
||||
// models without extra config); derive-provider.sh's runtime check
|
||||
// can still upgrade to nous/custom when the keys are present.
|
||||
//
|
||||
// 2. Unknown prefixes return "" instead of "auto". Persisting "auto"
|
||||
// would block a future "Save+Restart" with a known prefix from
|
||||
// re-deriving — the CP YAML field is sticky once written. Returning
|
||||
// "" means the caller skips the write and the runtime falls through
|
||||
// to derive-provider.sh's *=auto branch on its own.
|
||||
//
|
||||
// Cover the same prefix list as derive-provider.sh's case statement;
|
||||
// keep both files in sync when a new provider is added (table-driven
|
||||
// test in workspace_provision_shared_test.go pins the mapping).
|
||||
func deriveProviderFromModelSlug(model string) string {
|
||||
if model == "" {
|
||||
return ""
|
||||
}
|
||||
idx := strings.Index(model, "/")
|
||||
if idx <= 0 {
|
||||
return ""
|
||||
}
|
||||
prefix := model[:idx]
|
||||
switch prefix {
|
||||
// Direct-SDK providers (clean 1:1 prefix→provider mapping).
|
||||
case "minimax":
|
||||
return "minimax"
|
||||
case "minimax-cn":
|
||||
return "minimax-cn"
|
||||
case "anthropic":
|
||||
return "anthropic"
|
||||
case "gemini":
|
||||
return "gemini"
|
||||
case "deepseek":
|
||||
return "deepseek"
|
||||
case "zai":
|
||||
return "zai"
|
||||
case "kimi-coding":
|
||||
return "kimi-coding"
|
||||
case "kimi-coding-cn":
|
||||
return "kimi-coding-cn"
|
||||
case "alibaba", "dashscope", "qwen":
|
||||
return "alibaba"
|
||||
case "xiaomi", "mimo":
|
||||
return "xiaomi"
|
||||
case "arcee", "arcee-ai":
|
||||
return "arcee"
|
||||
case "nvidia", "nim":
|
||||
return "nvidia"
|
||||
case "ollama-cloud":
|
||||
return "ollama-cloud"
|
||||
case "huggingface", "hf":
|
||||
return "huggingface"
|
||||
case "ai-gateway", "aigateway":
|
||||
return "ai-gateway"
|
||||
case "kilocode":
|
||||
return "kilocode"
|
||||
case "opencode-zen":
|
||||
return "opencode-zen"
|
||||
case "opencode-go":
|
||||
return "opencode-go"
|
||||
// Aggregator + explicit catch-alls.
|
||||
case "openrouter":
|
||||
return "openrouter"
|
||||
case "custom":
|
||||
return "custom"
|
||||
// Runtime-only override candidates. derive-provider.sh's
|
||||
// HERMES_API_KEY / OPENAI_API_KEY checks happen at boot; we pick the
|
||||
// safe default (openrouter reaches both Hermes 3 and OpenAI without
|
||||
// extra config) and let the script upgrade to nous/custom at runtime.
|
||||
case "nousresearch", "openai":
|
||||
return "openrouter"
|
||||
// Additional 1:1 prefix→provider mappings — kept aligned with upstream's
|
||||
// HERMES_INFERENCE_PROVIDER list (NousResearch/hermes-agent v0.12.0,
|
||||
// 2026-04-30) and the additional case clauses in derive-provider.sh.
|
||||
// The drift gate in derive_provider_drift_test.go enforces parity.
|
||||
case "xai", "grok":
|
||||
return "xai"
|
||||
case "bedrock", "aws":
|
||||
return "bedrock"
|
||||
case "tencent", "tencent-tokenhub":
|
||||
return "tencent-tokenhub"
|
||||
case "gmi":
|
||||
return "gmi"
|
||||
case "qwen-oauth":
|
||||
return "qwen-oauth"
|
||||
case "lmstudio", "lm-studio":
|
||||
return "lmstudio"
|
||||
case "minimax-oauth":
|
||||
return "minimax-oauth"
|
||||
case "alibaba-coding-plan":
|
||||
return "alibaba-coding-plan"
|
||||
case "google-gemini-cli":
|
||||
return "google-gemini-cli"
|
||||
case "openai-codex":
|
||||
return "openai-codex"
|
||||
case "copilot-acp":
|
||||
return "copilot-acp"
|
||||
case "copilot":
|
||||
return "copilot"
|
||||
}
|
||||
// Unknown prefix → don't persist a guess. derive-provider.sh's
|
||||
// *=auto fallback handles it at runtime.
|
||||
return ""
|
||||
}
|
||||
// The hand-rolled prefix switch was a Go mirror of
|
||||
// workspace-configs-templates/hermes/scripts/derive-provider.sh kept in
|
||||
// sync via a drift test. The replacement is providers.Manifest.DeriveProvider
|
||||
// (synced in P2-A), which derives the provider from (runtime, model)
|
||||
// against the registry SSOT at every decision point — billing (P2-B),
|
||||
// CP user-data emission (this PR's CP-side commit), validation
|
||||
// (P3 PR-C). The shell script in the hermes template continues to be the
|
||||
// runtime fallback for unregistered models; codegen of the template's
|
||||
// providers block from the registry is the P4 follow-up gated on
|
||||
// registry data growth.
|
||||
|
||||
// applyRuntimeModelEnv exposes the workspace's selected model via an
|
||||
// env var the target runtime's install.sh / start.sh knows to read.
|
||||
@@ -883,12 +774,7 @@ func applyRuntimeModelEnv(envVars map[string]string, runtime, model string) {
|
||||
// can no longer confuse a provider slug for a model id. CP-side
|
||||
// slot-separation (cp#213 + cp#220) merged the analogous fix on
|
||||
// the CP side; this is the workspace-server companion.
|
||||
if model == "" {
|
||||
model = envVars["MOLECULE_MODEL"]
|
||||
}
|
||||
if model == "" {
|
||||
model = envVars["MODEL"]
|
||||
}
|
||||
model = effectiveModelForBilling(model, envVars)
|
||||
if model == "" {
|
||||
return
|
||||
}
|
||||
@@ -921,6 +807,31 @@ func applyRuntimeModelEnv(envVars map[string]string, runtime, model string) {
|
||||
}
|
||||
}
|
||||
|
||||
// effectiveModelForBilling resolves the picked model id from an explicit
|
||||
// argument with the SAME fallback chain applyRuntimeModelEnv uses to set the
|
||||
// container MODEL env: explicit arg → envVars["MOLECULE_MODEL"] →
|
||||
// envVars["MODEL"] (the workspace_secret). It is the single source of truth
|
||||
// for "what model is this workspace going to run", shared by both
|
||||
// applyRuntimeModelEnv (which exports it to the container) and
|
||||
// applyPlatformManagedLLMEnv (which derives the billing mode from it).
|
||||
//
|
||||
// molecule-core#1994: the billing resolver MUST consult the same effective
|
||||
// model the container will actually run. Pre-fix it used the raw payload.Model
|
||||
// only, which is "" on a re-provision (the payload is rebuilt from the DB with
|
||||
// no Model), so it derived from an empty model → defaulted closed to
|
||||
// platform_managed and diverged from the read endpoint (which reads the stored
|
||||
// MODEL secret). Returns "" only when no model is resolvable anywhere — the
|
||||
// legitimate "unset → platform default" case the resolver fails closed on.
|
||||
func effectiveModelForBilling(model string, envVars map[string]string) string {
|
||||
if model == "" {
|
||||
model = envVars["MOLECULE_MODEL"]
|
||||
}
|
||||
if model == "" {
|
||||
model = envVars["MODEL"]
|
||||
}
|
||||
return model
|
||||
}
|
||||
|
||||
// applyPlatformManagedLLMEnv wires the control-plane LLM proxy into a
|
||||
// workspace only when the RESOLVED billing mode for this workspace is
|
||||
// platform_managed. "Resolved" means: the workspace-level override (if any)
|
||||
@@ -943,16 +854,94 @@ func applyRuntimeModelEnv(envVars map[string]string, runtime, model string) {
|
||||
// MOLECULE_LLM_BILLING_MODE_RESOLVED so an in-container debug check can
|
||||
// answer "what mode is this workspace running under" without DB queries
|
||||
// (RFC Observability hot-spot).
|
||||
func applyPlatformManagedLLMEnv(ctx context.Context, envVars map[string]string, workspaceID, runtime, model string) {
|
||||
orgMode := strings.ToLower(strings.TrimSpace(os.Getenv("MOLECULE_LLM_BILLING_MODE")))
|
||||
res, resolveErr := ResolveLLMBillingMode(ctx, workspaceID, orgMode)
|
||||
//
|
||||
// molecule-core#1994 (credential-handling follow-on, CTO-confirmed model).
|
||||
// `global_secrets` is the TENANT's own secret store, shared across all of
|
||||
// that tenant's workspaces — it is NOT the platform's. The platform's own
|
||||
// LLM credential is the CP proxy usage token (MOLECULE_LLM_USAGE_TOKEN),
|
||||
// injected SEPARATELY on the platform_managed path below; it is never stored
|
||||
// in any tenant's global_secrets.
|
||||
//
|
||||
// Consequently the byok/disabled branch does NOT strip the tenant's
|
||||
// global-origin LLM creds. Under the corrected model the tenant's own
|
||||
// credential — whether at global scope (a global_secrets row, e.g. the key
|
||||
// they configured via the org-import required-env preflight / the settings
|
||||
// Secrets tab) or at workspace scope (a workspace_secrets row) — is exactly
|
||||
// what byok must run on, direct. The earlier internal#711 strip rested on the
|
||||
// inverted premise that a global-scope LLM cred was "the platform's own"; it
|
||||
// was wrong and it killed legitimate byok workspaces (MISSING_BYOK_CREDENTIAL
|
||||
// for tenants whose oauth lived at global scope — Reno Stars Marketing agent,
|
||||
// confirmed live 2026-05-28). Removing the strip is only safe because the
|
||||
// platform's own credential is never co-mingled into a tenant's global_secrets
|
||||
// (guarded at the write boundary: SetGlobal rejects bypass-list keys for a
|
||||
// platform-managed tenant; the platform proxy token is read from server env
|
||||
// only, never persisted to a tenant store).
|
||||
//
|
||||
// The boolean return still reports whether the workspace has at least one
|
||||
// usable LLM credential. The caller (prepareProvisionContext) uses it to FAIL
|
||||
// CLOSED — a byok workspace with no usable LLM credential at ANY scope is
|
||||
// aborted with a clear MISSING_BYOK_CREDENTIAL error at provision time rather
|
||||
// than started credential-less.
|
||||
// platformLLMEnvResult is the structured outcome of applyPlatformManagedLLMEnv.
|
||||
// ResolvedMode is the per-workspace billing/provider mode the resolver
|
||||
// landed on. HasUsableLLMCred reports whether the workspace has at least one
|
||||
// platform-managed-shaped LLM credential key in its env — the tenant's own,
|
||||
// at global or workspace scope. Only the non-platform (byok) path consults
|
||||
// HasUsableLLMCred for the fail-closed decision; the platform_managed path
|
||||
// always returns true (it forces the CP proxy usage token, which IS the
|
||||
// usable credential).
|
||||
type platformLLMEnvResult struct {
|
||||
ResolvedMode string
|
||||
HasUsableLLMCred bool
|
||||
// Source records which layer decided the mode (internal#718 P2-B):
|
||||
// derived_provider (registry derivation), derived_default (derive failed →
|
||||
// platform default), workspace_override (explicit operator pin), or
|
||||
// constant_fallback (DB error). Surfaced for observability + asserted by the
|
||||
// behavior-delta tests so a regression of "derived, not stored" flips red.
|
||||
Source BillingModeSource
|
||||
}
|
||||
|
||||
// globalKeys is the provenance side-channel from loadWorkspaceSecrets: the set
|
||||
// of env keys that originated from the operator-controlled global_secrets table
|
||||
// (a workspace_secrets row of the same name overrides and clears the flag). It
|
||||
// is consumed ONLY on the byok/disabled branch's provider-matched strip
|
||||
// (internal#728 Bug 1): a global-origin LLM bypass cred that does NOT match the
|
||||
// resolved provider's auth_env is stripped so a greedy runtime (claude-code
|
||||
// prefers CLAUDE_CODE_OAUTH_TOKEN) cannot route a non-anthropic model to the
|
||||
// wrong upstream. May be nil (no global-origin keys / unknown provenance) — a
|
||||
// nil set strips nothing, preserving the pre-#728 behavior for callers that do
|
||||
// not thread provenance.
|
||||
func applyPlatformManagedLLMEnv(ctx context.Context, envVars map[string]string, workspaceID, runtime, model string, globalKeys map[string]struct{}) platformLLMEnvResult {
|
||||
// internal#718 P2-B: the platform-vs-byok decision now DERIVES the provider
|
||||
// from (runtime, model) via the registry and keys off IsPlatform(derived) —
|
||||
// NOT a stored LLM_PROVIDER and NOT the org rung. This path already carries
|
||||
// runtime + model + the workspace env, so it calls the DERIVED resolver
|
||||
// directly (no DB round-trip for runtime/model). availableAuthEnv is the set
|
||||
// of recognized provider auth-env-var NAMES present in envVars (the same
|
||||
// disambiguation input the registry uses to split oauth-vs-api). The org-env
|
||||
// MOLECULE_LLM_BILLING_MODE is NO LONGER read into the decision (retired).
|
||||
availableAuthEnv := availableAuthEnvNames(envVars)
|
||||
// molecule-core#1994: derive billing mode from the EFFECTIVE model, not the
|
||||
// raw payload.Model. On a re-provision (restart/resume/auto-restart) the
|
||||
// payload is rebuilt from the DB with Name+Tier+Runtime only — payload.Model
|
||||
// is "" (workspace_restart.go via withStoredCompute, which backfills Compute
|
||||
// but NOT Model). With an empty model DeriveProvider errors → the resolver
|
||||
// defaults closed to platform_managed and bakes the CP proxy, DIVERGING from
|
||||
// the read endpoint (which reads the stored MODEL workspace_secret and derives
|
||||
// byok). The stored model already lives in the merged envVars (loaded by
|
||||
// loadWorkspaceSecrets); resolve it with the SAME fallback chain
|
||||
// applyRuntimeModelEnv uses so the provision-path derive inputs match the
|
||||
// read-path's — keeping the two resolvers in parity (the #1994 regression
|
||||
// guard test asserts this).
|
||||
effectiveModel := effectiveModelForBilling(model, envVars)
|
||||
res, resolveErr := ResolveLLMBillingModeDerived(ctx, workspaceID, runtime, effectiveModel, availableAuthEnv)
|
||||
if resolveErr != nil {
|
||||
// resolveErr != nil ⇒ resolver hit a DB error AND already defaulted
|
||||
// res.ResolvedMode to platform_managed. Log + proceed; the safe default
|
||||
// is already in place, no early return needed.
|
||||
log.Printf("workspace_provision: resolve billing mode workspace=%s err=%v (defaulting to platform_managed)", workspaceID, resolveErr)
|
||||
}
|
||||
log.Printf("workspace_provision: billing mode workspace=%s resolved=%s source=%s org_default=%s", workspaceID, res.ResolvedMode, res.Source, res.OrgDefault)
|
||||
log.Printf("workspace_provision: billing mode workspace=%s resolved=%s source=%s derived_provider=%s", workspaceID, res.ResolvedMode, res.Source, derefOrEmpty(res.ProviderSelection))
|
||||
// internal#703: MOLECULE_LLM_BILLING_MODE in the container must reflect the
|
||||
// RESOLVED per-workspace mode, not a hardcoded literal. Pre-fix this var was
|
||||
// only emitted (hardcoded "platform_managed") on the strip path below, so a
|
||||
@@ -966,18 +955,60 @@ func applyPlatformManagedLLMEnv(ctx context.Context, envVars map[string]string,
|
||||
// pulling logs or hitting the admin route.
|
||||
envVars["MOLECULE_LLM_BILLING_MODE_RESOLVED"] = res.ResolvedMode
|
||||
if res.ResolvedMode != LLMBillingModePlatformManaged {
|
||||
// byok or disabled — DO NOT strip vendor keys, DO NOT force-route to CP,
|
||||
// DO NOT override the workspace own ANTHROPIC_BASE_URL / OAuth token.
|
||||
// Leave envVars alone so CLAUDE_CODE_OAUTH_TOKEN / vendor API keys
|
||||
// pulled from workspace_secrets survive into the container, and the
|
||||
// workspace talks to its own provider directly (internal#703).
|
||||
return
|
||||
// byok or disabled — DO NOT force-route to CP, DO NOT override the
|
||||
// workspace's own ANTHROPIC_BASE_URL, and DO NOT strip the tenant's own
|
||||
// (provider-matching) LLM credentials.
|
||||
//
|
||||
// molecule-core#1994 (corrected model): `global_secrets` is the
|
||||
// TENANT's store, not the platform's. The tenant's own credential —
|
||||
// at global OR workspace scope — is exactly what byok runs on, direct.
|
||||
// The platform's own credential is never in a tenant's global_secrets
|
||||
// (guarded at the SetGlobal write boundary + the proxy token is
|
||||
// server-env-only), so leaving the tenant's globals in place cannot
|
||||
// re-open the platform-credit drain.
|
||||
//
|
||||
// internal#728 Bug 1 (provider-matched credential injection): #1994
|
||||
// removed the BLANKET strip, which was correct for the platform-key
|
||||
// co-mingling it targeted but left EVERY claude-code workspace
|
||||
// inheriting the tenant-global CLAUDE_CODE_OAUTH_TOKEN. A claude-code
|
||||
// runtime greedily prefers that oauth (`llm-auth: detected oauth` →
|
||||
// api.anthropic.com), so a workspace whose RESOLVED provider is NOT
|
||||
// anthropic-oauth (minimax, kimi-byok, …) routes its non-Anthropic
|
||||
// model to Anthropic and errors (`Claude Code returned an error
|
||||
// result`; DevB MiniMax-M2.7 live-confirmed 2026-05-28).
|
||||
//
|
||||
// The precise, provider-AWARE replacement for the over-removed strip:
|
||||
// keep ONLY the global-origin bypass creds whose env-var name is in the
|
||||
// RESOLVED provider's auth_env; strip the rest. This is NOT a return to
|
||||
// the blanket strip — it is keyed off the derived provider:
|
||||
// - minimax (auth_env: MINIMAX_API_KEY, ANTHROPIC_AUTH_TOKEN,
|
||||
// ANTHROPIC_API_KEY) → global-origin CLAUDE_CODE_OAUTH_TOKEN is
|
||||
// NOT a match → stripped (fixes DevB).
|
||||
// - anthropic-oauth (auth_env: CLAUDE_CODE_OAUTH_TOKEN) → the
|
||||
// global-origin oauth IS a match → kept (PM/reno opus byok NOT
|
||||
// regressed — the #1994 ByokGlobalScopeOAuthSurvives guard holds).
|
||||
// WORKSPACE-origin creds (the user explicitly set them via the canvas
|
||||
// Secrets tab → NOT in globalKeys) are NEVER stripped here, even when
|
||||
// they don't match: the user authored them deliberately (JRS kimi
|
||||
// workspace-key, reno's own oauth). Only the inherited operator-store
|
||||
// channel is provider-gated.
|
||||
stripNonMatchingGlobalOriginLLMCreds(envVars, globalKeys, runtime, effectiveModel, availableAuthEnv)
|
||||
return platformLLMEnvResult{
|
||||
ResolvedMode: res.ResolvedMode,
|
||||
HasUsableLLMCred: hasAnyPlatformManagedLLMKey(envVars),
|
||||
Source: res.Source,
|
||||
}
|
||||
}
|
||||
baseURL := firstNonEmptyEnv("MOLECULE_LLM_BASE_URL", "OPENAI_BASE_URL")
|
||||
anthropicBaseURL := firstNonEmptyEnv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "ANTHROPIC_BASE_URL")
|
||||
token := firstNonEmptyEnv("MOLECULE_LLM_USAGE_TOKEN", "OPENAI_API_KEY")
|
||||
if baseURL == "" || token == "" {
|
||||
return
|
||||
// Proxy not configured (boot race / misconfig). On the platform_managed
|
||||
// path the workspace IS entitled to platform creds, so we do NOT strip
|
||||
// here — but we report HasUsableLLMCred from whatever survived so the
|
||||
// caller's fail-closed branch (non-platform only) is never reached on
|
||||
// this path.
|
||||
return platformLLMEnvResult{ResolvedMode: res.ResolvedMode, HasUsableLLMCred: true, Source: res.Source}
|
||||
}
|
||||
stripPlatformManagedLLMBypassEnv(envVars)
|
||||
|
||||
@@ -1006,6 +1037,10 @@ func applyPlatformManagedLLMEnv(ctx context.Context, envVars map[string]string,
|
||||
envVars["MOLECULE_MODEL"] = defaultModel
|
||||
}
|
||||
}
|
||||
// platform_managed: the CP proxy usage token (injected as ANTHROPIC_API_KEY
|
||||
// / OPENAI_API_KEY above) IS the usable credential, so the workspace is
|
||||
// never fail-closed on this path.
|
||||
return platformLLMEnvResult{ResolvedMode: res.ResolvedMode, HasUsableLLMCred: true, Source: res.Source}
|
||||
}
|
||||
|
||||
func stripPlatformManagedLLMBypassEnv(envVars map[string]string) {
|
||||
@@ -1014,6 +1049,80 @@ func stripPlatformManagedLLMBypassEnv(envVars map[string]string) {
|
||||
}
|
||||
}
|
||||
|
||||
// hasAnyPlatformManagedLLMKey reports whether envVars carries at least one
|
||||
// non-empty platform-managed-shaped LLM credential key (the tenant's own, at
|
||||
// global or workspace scope). Used by the byok fail-closed branch: a byok
|
||||
// workspace with no LLM credential at ANY scope must be aborted with
|
||||
// MISSING_BYOK_CREDENTIAL rather than started credential-less.
|
||||
func hasAnyPlatformManagedLLMKey(envVars map[string]string) bool {
|
||||
for key := range platformManagedDirectLLMBypassKeys {
|
||||
if strings.TrimSpace(envVars[key]) != "" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// stripNonMatchingGlobalOriginLLMCreds is the byok-branch provider-matched
|
||||
// credential injection (internal#728 Bug 1). It removes from envVars every
|
||||
// platform-managed LLM bypass key that:
|
||||
//
|
||||
// 1. originated from the operator-controlled global_secrets store
|
||||
// (present in globalKeys — a workspace_secrets row of the same name
|
||||
// overrides + clears the flag, so user-authored creds are exempt), AND
|
||||
// 2. is NOT in the RESOLVED provider's auth_env set.
|
||||
//
|
||||
// The motivating regression: #1994 dropped the blanket strip, so a claude-code
|
||||
// workspace resolving to `minimax` still inherited the tenant-global
|
||||
// CLAUDE_CODE_OAUTH_TOKEN; the runtime prefers that oauth and routes the
|
||||
// MiniMax model to api.anthropic.com → error. Keeping only the resolved
|
||||
// provider's own auth_env keys (minimax: MINIMAX_API_KEY/ANTHROPIC_AUTH_TOKEN/
|
||||
// ANTHROPIC_API_KEY — not the oauth) removes the stray oauth while preserving
|
||||
// anthropic-oauth's CLAUDE_CODE_OAUTH_TOKEN for an opus byok workspace.
|
||||
//
|
||||
// Fail-OPEN by design: if the provider cannot be derived (empty model /
|
||||
// unknown runtime / ambiguous) or the registry is unavailable, we strip
|
||||
// NOTHING — we never strip a credential we cannot prove is non-matching, so a
|
||||
// derive miss can never fail-close a legitimate byok workspace (mirrors the
|
||||
// resolver's own default-closed-to-platform contract: the worst case is we
|
||||
// keep a stray cred, never that we remove the only usable one). The earlier
|
||||
// internal#711 blanket strip's fail-direction (remove first) was the bug;
|
||||
// this strip's fail-direction is keep-first.
|
||||
func stripNonMatchingGlobalOriginLLMCreds(envVars map[string]string, globalKeys map[string]struct{}, runtime, model string, availableAuthEnv []string) {
|
||||
if len(globalKeys) == 0 {
|
||||
return // no operator-store-origin keys to consider — nothing to strip.
|
||||
}
|
||||
manifest, err := providerRegistry()
|
||||
if err != nil || manifest == nil {
|
||||
return // registry unavailable — fail open, strip nothing.
|
||||
}
|
||||
provider, dErr := manifest.DeriveProvider(runtime, model, availableAuthEnv)
|
||||
if dErr != nil {
|
||||
return // underivable provider — fail open, strip nothing.
|
||||
}
|
||||
// The resolved provider's accepted auth-env-var NAMES (case-insensitive
|
||||
// for parity with isPlatformManagedDirectLLMBypassKey, which upper-cases).
|
||||
keep := make(map[string]struct{}, len(provider.AuthEnv))
|
||||
for _, e := range provider.AuthEnv {
|
||||
keep[strings.ToUpper(strings.TrimSpace(e))] = struct{}{}
|
||||
}
|
||||
for key := range globalKeys {
|
||||
upper := strings.ToUpper(strings.TrimSpace(key))
|
||||
if _, isBypass := platformManagedDirectLLMBypassKeys[upper]; !isBypass {
|
||||
continue // not an LLM bypass cred (e.g. a non-LLM operator secret) — leave it.
|
||||
}
|
||||
if _, matches := keep[upper]; matches {
|
||||
continue // matches the resolved provider's auth_env — this is what byok runs on.
|
||||
}
|
||||
// Global-origin LLM bypass cred that does NOT match the resolved
|
||||
// provider — the stray that a greedy runtime would mis-prefer. Strip.
|
||||
if _, present := envVars[key]; present {
|
||||
log.Printf("workspace_provision: byok provider-matched strip — removing global-origin LLM cred %s (resolved provider=%s does not accept it)", key, provider.Name)
|
||||
delete(envVars, key)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func runtimeUsesAnthropicNativeProxy(runtime string) bool {
|
||||
return strings.EqualFold(strings.TrimSpace(runtime), "claude-code")
|
||||
}
|
||||
@@ -1065,6 +1174,14 @@ func loadWorkspaceSecrets(ctx context.Context, workspaceID string) (map[string]s
|
||||
var v []byte
|
||||
var ver int
|
||||
if globalRows.Scan(&k, &v, &ver) == nil {
|
||||
// internal#718 P4 closure: LLM_PROVIDER is retired even
|
||||
// at the global rung. The same provider-from-(runtime,model)
|
||||
// derivation runs per-workspace, so a global default
|
||||
// would be pure ghost. Symmetric with the workspace_secrets
|
||||
// drop below.
|
||||
if k == "LLM_PROVIDER" {
|
||||
continue
|
||||
}
|
||||
decrypted, decErr := crypto.DecryptVersioned(v, ver)
|
||||
if decErr != nil {
|
||||
log.Printf("Provisioner: FATAL — failed to decrypt global secret %s (version=%d): %v — aborting provision of workspace %s", k, ver, decErr, workspaceID)
|
||||
@@ -1087,6 +1204,18 @@ func loadWorkspaceSecrets(ctx context.Context, workspaceID string) (map[string]s
|
||||
var v []byte
|
||||
var ver int
|
||||
if wsRows.Scan(&k, &v, &ver) == nil {
|
||||
// internal#718 P4 closure: LLM_PROVIDER is a retired
|
||||
// secret key. Migration 20260528000000 deletes any
|
||||
// straggler rows; this drop is defence-in-depth so a
|
||||
// rolling deploy (new code, old DB) never re-emits the
|
||||
// retired key into the provisioner env (which would
|
||||
// reach the CP-side resolveModelAndProvider — now
|
||||
// itself retired, but the env contract belongs to
|
||||
// core). Idempotent: a fresh tenant has zero
|
||||
// LLM_PROVIDER rows and this branch is unreached.
|
||||
if k == "LLM_PROVIDER" {
|
||||
continue
|
||||
}
|
||||
decrypted, decErr := crypto.DecryptVersioned(v, ver)
|
||||
if decErr != nil {
|
||||
log.Printf("Provisioner: FATAL — failed to decrypt workspace secret %s (version=%d) for %s: %v — aborting provision", k, ver, workspaceID, decErr)
|
||||
|
||||
@@ -42,6 +42,7 @@ type trackingCPProv struct {
|
||||
mu sync.Mutex
|
||||
started []string
|
||||
stopped []string
|
||||
pruned []string // internal#734: workspaces stopped via StopAndPrune
|
||||
startErr error
|
||||
stopErr error
|
||||
}
|
||||
@@ -61,6 +62,13 @@ func (r *trackingCPProv) Stop(_ context.Context, workspaceID string) error {
|
||||
r.mu.Unlock()
|
||||
return r.stopErr
|
||||
}
|
||||
func (r *trackingCPProv) StopAndPrune(_ context.Context, workspaceID string) error {
|
||||
r.mu.Lock()
|
||||
r.stopped = append(r.stopped, workspaceID)
|
||||
r.pruned = append(r.pruned, workspaceID)
|
||||
r.mu.Unlock()
|
||||
return r.stopErr
|
||||
}
|
||||
func (r *trackingCPProv) GetConsoleOutput(_ context.Context, _ string) (string, error) {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
@@ -10,9 +10,9 @@ import (
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/provisioner"
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
)
|
||||
|
||||
// Issue #2486 reproduction harness: 7 simultaneous claude-code provisions
|
||||
@@ -71,6 +71,9 @@ func (r *recordingCPProv) Start(_ context.Context, cfg provisioner.WorkspaceConf
|
||||
func (r *recordingCPProv) Stop(_ context.Context, _ string) error {
|
||||
panic("recordingCPProv.Stop not expected in concurrent-repro test")
|
||||
}
|
||||
func (r *recordingCPProv) StopAndPrune(_ context.Context, _ string) error {
|
||||
panic("recordingCPProv.StopAndPrune not expected in concurrent-repro test")
|
||||
}
|
||||
|
||||
func (r *recordingCPProv) GetConsoleOutput(_ context.Context, _ string) (string, error) {
|
||||
panic("recordingCPProv.GetConsoleOutput not expected in concurrent-repro test")
|
||||
|
||||
@@ -0,0 +1,110 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseTopLevelRuntime(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
yaml string
|
||||
want string
|
||||
}{
|
||||
{"top-level claude-code", "name: x\nruntime: claude-code\ntier: 2\n", "claude-code"},
|
||||
{"top-level google-adk", "runtime: google-adk\n", "google-adk"},
|
||||
{"quoted value", `runtime: "google-adk"` + "\n", "google-adk"},
|
||||
{"single-quoted value", "runtime: 'codex'\n", "codex"},
|
||||
{"ignores runtime_config nested model", "runtime: google-adk\nruntime_config:\n model: vertex:gemini-2.5-pro\n", "google-adk"},
|
||||
{"runtime_config only, no top-level runtime", "name: y\nruntime_config:\n model: x\n", ""},
|
||||
{"indented runtime is not top-level", "wrapper:\n runtime: claude-code\n", ""},
|
||||
{"empty", "", ""},
|
||||
{"no runtime key", "name: z\ntier: 4\n", ""},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
if got := parseTopLevelRuntime([]byte(tc.yaml)); got != tc.want {
|
||||
t.Fatalf("parseTopLevelRuntime(%q) = %q, want %q", tc.yaml, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSeededConfigRuntime(t *testing.T) {
|
||||
// in-memory configFiles wins over template dir.
|
||||
t.Run("from configFiles", func(t *testing.T) {
|
||||
cf := map[string][]byte{"config.yaml": []byte("runtime: google-adk\n")}
|
||||
if got := seededConfigRuntime("/nonexistent", cf); got != "google-adk" {
|
||||
t.Fatalf("got %q, want google-adk", got)
|
||||
}
|
||||
})
|
||||
|
||||
// falls back to template dir's config.yaml.
|
||||
t.Run("from template dir", func(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
if err := os.WriteFile(filepath.Join(dir, "config.yaml"), []byte("name: a\nruntime: claude-code\n"), 0o600); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if got := seededConfigRuntime(dir, nil); got != "claude-code" {
|
||||
t.Fatalf("got %q, want claude-code", got)
|
||||
}
|
||||
})
|
||||
|
||||
// nothing available → "".
|
||||
t.Run("indeterminate", func(t *testing.T) {
|
||||
if got := seededConfigRuntime("", nil); got != "" {
|
||||
t.Fatalf("got %q, want empty", got)
|
||||
}
|
||||
if got := seededConfigRuntime("/does/not/exist", map[string][]byte{}); got != "" {
|
||||
t.Fatalf("got %q, want empty", got)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestRuntimeSeedMismatchAbort(t *testing.T) {
|
||||
adkCfg := map[string][]byte{"config.yaml": []byte("runtime: google-adk\n")}
|
||||
ccCfg := map[string][]byte{"config.yaml": []byte("name: Claude Code Agent\nruntime: claude-code\n")}
|
||||
|
||||
t.Run("mismatch fails loud (the #2027 demo bug)", func(t *testing.T) {
|
||||
// requested google-adk, but seeding the claude-code-default config.
|
||||
abort := runtimeSeedMismatchAbort("google-adk", "", ccCfg)
|
||||
if abort == nil {
|
||||
t.Fatal("expected abort for google-adk requested but claude-code seeded, got nil")
|
||||
}
|
||||
if abort.Extra["requested_runtime"] != "google-adk" || abort.Extra["seeded_runtime"] != "claude-code" {
|
||||
t.Fatalf("abort.Extra mismatch: %+v", abort.Extra)
|
||||
}
|
||||
if abort.Extra["issue"] != "2027" {
|
||||
t.Fatalf("expected issue 2027 tag, got %v", abort.Extra["issue"])
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("match is allowed", func(t *testing.T) {
|
||||
if abort := runtimeSeedMismatchAbort("google-adk", "", adkCfg); abort != nil {
|
||||
t.Fatalf("expected no abort when seeded runtime matches, got %q", abort.Msg)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("empty requested runtime is allowed (org-template default path)", func(t *testing.T) {
|
||||
if abort := runtimeSeedMismatchAbort("", "", ccCfg); abort != nil {
|
||||
t.Fatalf("expected no abort for unspecified runtime, got %q", abort.Msg)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("indeterminate seed is allowed (CP mode, no local config bytes)", func(t *testing.T) {
|
||||
if abort := runtimeSeedMismatchAbort("google-adk", "", nil); abort != nil {
|
||||
t.Fatalf("expected no abort when seeded runtime is indeterminate, got %q", abort.Msg)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("mismatch via template dir also fails loud", func(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
if err := os.WriteFile(filepath.Join(dir, "config.yaml"), []byte("runtime: claude-code\n"), 0o600); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if abort := runtimeSeedMismatchAbort("hermes", dir, nil); abort == nil {
|
||||
t.Fatal("expected abort for hermes requested but claude-code template seeded")
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -37,8 +37,11 @@ package handlers
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/events"
|
||||
@@ -193,7 +196,40 @@ func (h *WorkspaceHandler) prepareProvisionContext(
|
||||
// continue to rely on workspace_secrets / org-import persona-env
|
||||
// merge for their git auth.
|
||||
applyAgentGitHTTPCreds(envVars, payload.Role)
|
||||
applyPlatformManagedLLMEnv(ctx, envVars, workspaceID, payload.Runtime, payload.Model)
|
||||
// molecule-core#1994: per-workspace LLM billing-mode resolution + env wiring.
|
||||
// On platform_managed it forces the CP proxy usage token; on byok/disabled
|
||||
// it keeps the tenant's own provider-MATCHING creds (global OR workspace
|
||||
// scope) and reports whether a usable LLM credential is present.
|
||||
//
|
||||
// internal#728 Bug 1: globalSecretKeys (loadWorkspaceSecrets provenance)
|
||||
// lets the byok branch strip ONLY operator-store-origin LLM creds that do
|
||||
// NOT match the resolved provider's auth_env — so a non-anthropic-oauth
|
||||
// claude-code workspace no longer inherits the stray tenant-global
|
||||
// CLAUDE_CODE_OAUTH_TOKEN the runtime would greedily prefer. User-authored
|
||||
// workspace_secrets (provenance flag cleared) are exempt.
|
||||
llmRes := applyPlatformManagedLLMEnv(ctx, envVars, workspaceID, payload.Runtime, payload.Model, globalSecretKeys)
|
||||
// Fail closed for a BYOK workspace with no usable LLM credential at ANY
|
||||
// scope: do NOT start it credential-less. Mirror the "model+provider+
|
||||
// credential REQUIRED at create" spirit with an actionable error surfaced
|
||||
// at provision time.
|
||||
//
|
||||
// Scoped to byok specifically (NOT disabled): "byok" means "the user
|
||||
// intends to run an LLM on their own credential" — a missing one is a
|
||||
// misconfiguration worth surfacing loudly. "disabled" means "this
|
||||
// workspace runs no platform-billed LLM at all" (terminal / file work, or
|
||||
// a runtime that talks to a non-bypass-key endpoint), so aborting would
|
||||
// regress a legitimate no-LLM workspace.
|
||||
//
|
||||
// The bypass-key check is intentionally broad — any present bypass key
|
||||
// (the tenant's own, at global or workspace scope) clears it.
|
||||
if llmRes.ResolvedMode == LLMBillingModeBYOK && !llmRes.HasUsableLLMCred {
|
||||
msg := formatMissingBYOKCredentialError(llmRes.ResolvedMode)
|
||||
log.Printf("Provisioner: ABORT workspace=%s — byok billing mode has no usable LLM credential (MISSING_BYOK_CREDENTIAL, molecule-core#1994)", workspaceID)
|
||||
return nil, &provisionAbort{
|
||||
Msg: msg,
|
||||
Extra: map[string]interface{}{"error": msg, "code": "MISSING_BYOK_CREDENTIAL", "billing_mode": llmRes.ResolvedMode, "issue": "1994"},
|
||||
}
|
||||
}
|
||||
applyRuntimeModelEnv(envVars, payload.Runtime, payload.Model)
|
||||
if payload.Role != "" {
|
||||
envVars["MOLECULE_AGENT_ROLE"] = payload.Role
|
||||
@@ -230,6 +266,22 @@ func (h *WorkspaceHandler) prepareProvisionContext(
|
||||
}
|
||||
}
|
||||
|
||||
// Preflight: runtime-seed match (issue #2027). Fail LOUD when a workspace
|
||||
// NAMED a runtime but the config.yaml we're about to seed declares a
|
||||
// different top-level runtime — the symmetric counterpart to selectImage's
|
||||
// ErrUnresolvableRuntime guard, on the config/template side. Pre-fix, when a
|
||||
// runtime's workspace template wasn't in the tenant cache at provision time
|
||||
// (or sanitizeRuntime coerced an unknown runtime), seeding silently fell
|
||||
// back to the claude-code-default template: the image+env said e.g.
|
||||
// google-adk but the seeded config said claude-code, so the agent booted
|
||||
// mislabeled and personaless yet looked 'online' and returned canned
|
||||
// non-answers. Refusing loudly turns that silent wrong-agent into a visible
|
||||
// WORKSPACE_PROVISION_FAILED the operator can act on.
|
||||
if abort := runtimeSeedMismatchAbort(payload.Runtime, templatePath, configFiles); abort != nil {
|
||||
log.Printf("Provisioner: ABORT workspace=%s — %s", workspaceID, abort.Msg)
|
||||
return nil, abort
|
||||
}
|
||||
|
||||
cfg := h.buildProvisionerConfig(ctx, workspaceID, templatePath, configFiles, payload, envVars, pluginsPath)
|
||||
cfg.ResetClaudeSession = resetClaudeSession
|
||||
|
||||
@@ -240,6 +292,76 @@ func (h *WorkspaceHandler) prepareProvisionContext(
|
||||
}, nil
|
||||
}
|
||||
|
||||
// runtimeSeedMismatchAbort returns a non-nil abort when a workspace NAMED a
|
||||
// runtime but the config.yaml about to be seeded declares a *different*
|
||||
// top-level runtime — the fail-loud counterpart to selectImage's
|
||||
// ErrUnresolvableRuntime (issue #2027). It catches the silent
|
||||
// claude-code-default substitution that occurs when a runtime's workspace
|
||||
// template isn't cached at provision time (or sanitizeRuntime coerced an
|
||||
// unknown runtime to claude-code): both surface as a seeded config whose
|
||||
// runtime contradicts the requested one.
|
||||
//
|
||||
// Pure (modulo reading the template dir's config.yaml). An empty
|
||||
// requestedRuntime (unspecified / org-template default path) or an
|
||||
// indeterminate seeded runtime (e.g. CP mode with no local config bytes) is
|
||||
// allowed — we only fail on a concrete, contradictory signal, never on
|
||||
// absence of one.
|
||||
func runtimeSeedMismatchAbort(requestedRuntime, templatePath string, configFiles map[string][]byte) *provisionAbort {
|
||||
if requestedRuntime == "" {
|
||||
return nil
|
||||
}
|
||||
seeded := seededConfigRuntime(templatePath, configFiles)
|
||||
if seeded == "" || seeded == requestedRuntime {
|
||||
return nil
|
||||
}
|
||||
msg := fmt.Sprintf(
|
||||
"runtime seed mismatch: workspace requested runtime %q but the seeded config.yaml declares %q — the %q workspace template was not available at provision time (silent %q fallback). Refusing to launch a mislabeled agent; refresh the template cache (POST /admin/templates/refresh) and re-provision.",
|
||||
requestedRuntime, seeded, requestedRuntime, seeded,
|
||||
)
|
||||
return &provisionAbort{
|
||||
Msg: msg,
|
||||
Extra: map[string]interface{}{
|
||||
"error": msg,
|
||||
"requested_runtime": requestedRuntime,
|
||||
"seeded_runtime": seeded,
|
||||
"issue": "2027",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// seededConfigRuntime extracts the top-level `runtime:` from the config.yaml
|
||||
// that will be seeded into the workspace — preferring the in-memory
|
||||
// configFiles, falling back to the template directory on disk. Returns ""
|
||||
// when no config.yaml is available or it declares no top-level runtime.
|
||||
func seededConfigRuntime(templatePath string, configFiles map[string][]byte) string {
|
||||
if data, ok := configFiles["config.yaml"]; ok {
|
||||
return parseTopLevelRuntime(data)
|
||||
}
|
||||
if templatePath != "" {
|
||||
if data, err := os.ReadFile(filepath.Join(templatePath, "config.yaml")); err == nil {
|
||||
return parseTopLevelRuntime(data)
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// parseTopLevelRuntime returns the value of the top-level `runtime:` key in a
|
||||
// config.yaml, ignoring the nested `runtime_config:` block. A small dedicated
|
||||
// line scanner (mirrors the one the Create handler uses to read a template's
|
||||
// runtime) so the provision-time guard needs no YAML dependency.
|
||||
func parseTopLevelRuntime(data []byte) string {
|
||||
for _, raw := range strings.Split(string(data), "\n") {
|
||||
trimmed := strings.TrimLeft(raw, " \t")
|
||||
if len(raw) > len(trimmed) {
|
||||
continue // indented — inside a nested block (e.g. runtime_config:)
|
||||
}
|
||||
if strings.HasPrefix(trimmed, "runtime:") && !strings.HasPrefix(trimmed, "runtime_config") {
|
||||
return strings.Trim(strings.TrimSpace(strings.TrimPrefix(trimmed, "runtime:")), `"'`)
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// mintWorkspaceSecrets issues + persists the workspace auth token
|
||||
// AND the platform→workspace inbound secret (#2312). Both modes MUST
|
||||
// call this — Docker mints + writes to local config volume; SaaS
|
||||
|
||||
@@ -494,6 +494,108 @@ func TestPrepareProvisionContext_WorkspaceSecretWinsOverPersonaToken(t *testing.
|
||||
}
|
||||
}
|
||||
|
||||
// TestPrepareProvisionContext_ByokWithTenantGlobalOAuthSucceeds is the
|
||||
// molecule-core#1994 (corrected-model) end-to-end inversion of the former
|
||||
// internal#711 fail-closed test, for the live Reno Stars byok agents. A byok
|
||||
// workspace whose LLM credential is the TENANT's own scope:global
|
||||
// CLAUDE_CODE_OAUTH_TOKEN (a global_secrets row, no workspace override) must:
|
||||
//
|
||||
// 1. KEEP that oauth in the prepared container env (it is the tenant's own
|
||||
// credential — exactly what byok runs on, direct), and
|
||||
// 2. NOT abort — the provision proceeds.
|
||||
//
|
||||
// Pre-fix (internal#711) prepared.EnvVars stripped the global oauth and the
|
||||
// provision aborted MISSING_BYOK_CREDENTIAL → the agent was dead. This is the
|
||||
// discriminating end-to-end guard for the fix.
|
||||
func TestPrepareProvisionContext_ByokWithTenantGlobalOAuthSucceeds(t *testing.T) {
|
||||
const wsID = "352e3c2b-0546-4e9c-b487-1e2ff1cf29fc" // Reno Stars SEO agent
|
||||
t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged)
|
||||
|
||||
mock := setupTestDB(t)
|
||||
// global_secrets carries the TENANT's own scope:global OAuth token + the
|
||||
// stored MODEL (so the resolver derives byok from opus).
|
||||
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM global_secrets`).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
|
||||
AddRow("CLAUDE_CODE_OAUTH_TOKEN", []byte("TENANT-OWN-GLOBAL-OAUTH"), 0))
|
||||
// Workspace set its own MODEL (no LLM cred of its own — relies on global).
|
||||
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
|
||||
AddRow("MODEL", []byte("opus"), 0))
|
||||
// Resolver: workspace override = byok.
|
||||
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModeBYOK))
|
||||
|
||||
handler := NewWorkspaceHandler(&captureBroadcaster{}, nil, "http://localhost:8080", t.TempDir())
|
||||
payload := models.CreateWorkspacePayload{
|
||||
Name: "Reno Stars SEO",
|
||||
Runtime: "claude-code",
|
||||
Tier: 1,
|
||||
}
|
||||
prepared, abort := handler.prepareProvisionContext(
|
||||
context.Background(), wsID, "/nonexistent", nil, payload, false)
|
||||
|
||||
if abort != nil {
|
||||
t.Fatalf("expected provision to proceed (byok on tenant's own global oauth), got abort=%v", abort.Extra)
|
||||
}
|
||||
if prepared == nil {
|
||||
t.Fatalf("prepared context is nil despite no abort")
|
||||
}
|
||||
// The tenant's own global oauth must be present in the container env.
|
||||
if prepared.EnvVars["CLAUDE_CODE_OAUTH_TOKEN"] != "TENANT-OWN-GLOBAL-OAUTH" {
|
||||
t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN = %q, want the tenant's own global oauth preserved for byok",
|
||||
prepared.EnvVars["CLAUDE_CODE_OAUTH_TOKEN"])
|
||||
}
|
||||
// byok must not have been routed through the platform proxy.
|
||||
if _, ok := prepared.EnvVars["MOLECULE_LLM_USAGE_TOKEN"]; ok {
|
||||
t.Fatalf("byok provision must NOT inject the platform usage token")
|
||||
}
|
||||
if got := prepared.EnvVars["MOLECULE_LLM_BILLING_MODE_RESOLVED"]; got != LLMBillingModeBYOK {
|
||||
t.Fatalf("MOLECULE_LLM_BILLING_MODE_RESOLVED = %q, want byok", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestPrepareProvisionContext_ByokNoCredentialAtAnyScopeFailsClosed is the
|
||||
// companion: the fail-closed abort is UNCHANGED for a byok workspace with no
|
||||
// LLM credential at ANY scope (no global row, no workspace row). It still
|
||||
// aborts MISSING_BYOK_CREDENTIAL rather than starting credential-less.
|
||||
func TestPrepareProvisionContext_ByokNoCredentialAtAnyScopeFailsClosed(t *testing.T) {
|
||||
const wsID = "352e3c2b-0546-4e9c-b487-1e2ff1cf29fc"
|
||||
t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged)
|
||||
|
||||
mock := setupTestDB(t)
|
||||
// No global LLM cred — only the stored MODEL so the resolver derives byok.
|
||||
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM global_secrets`).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}))
|
||||
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
|
||||
AddRow("MODEL", []byte("opus"), 0))
|
||||
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModeBYOK))
|
||||
|
||||
handler := NewWorkspaceHandler(&captureBroadcaster{}, nil, "http://localhost:8080", t.TempDir())
|
||||
payload := models.CreateWorkspacePayload{
|
||||
Name: "Reno Stars SEO",
|
||||
Runtime: "claude-code",
|
||||
Tier: 1,
|
||||
}
|
||||
prepared, abort := handler.prepareProvisionContext(
|
||||
context.Background(), wsID, "/nonexistent", nil, payload, false)
|
||||
|
||||
if abort == nil {
|
||||
t.Fatalf("expected MISSING_BYOK_CREDENTIAL abort, got success (prepared=%v)", prepared)
|
||||
}
|
||||
if code, _ := abort.Extra["code"].(string); code != "MISSING_BYOK_CREDENTIAL" {
|
||||
t.Fatalf("abort.Extra[code] = %v, want MISSING_BYOK_CREDENTIAL", abort.Extra["code"])
|
||||
}
|
||||
if mode, _ := abort.Extra["billing_mode"].(string); mode != LLMBillingModeBYOK {
|
||||
t.Fatalf("abort.Extra[billing_mode] = %v, want %q", abort.Extra["billing_mode"], LLMBillingModeBYOK)
|
||||
}
|
||||
}
|
||||
|
||||
// TestReadOrLazyHealInboundSecret pins the four branches of the
|
||||
// shared lazy-heal helper directly. Each call site (chat_files,
|
||||
// registry) has its own integration test, but those go through the
|
||||
@@ -595,103 +697,49 @@ func TestReadOrLazyHealInboundSecret(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
// TestDeriveProviderFromModelSlug pins the slug→provider mapping shared
|
||||
// with workspace-configs-templates/hermes/scripts/derive-provider.sh.
|
||||
// Sync-test: when a new prefix is added to the shell script, add it
|
||||
// here too. The two intentional differences from the shell version
|
||||
// (nousresearch/openai both → "openrouter" at provision time;
|
||||
// unknown/no-prefix → "" instead of "auto") are exercised explicitly.
|
||||
func TestDeriveProviderFromModelSlug(t *testing.T) {
|
||||
t.Parallel()
|
||||
cases := []struct {
|
||||
name string
|
||||
model string
|
||||
want string
|
||||
}{
|
||||
{"minimax", "minimax/MiniMax-M2.7-highspeed", "minimax"},
|
||||
{"minimax-cn keeps cn suffix", "minimax-cn/MiniMax-M2.7", "minimax-cn"},
|
||||
{"anthropic", "anthropic/claude-sonnet-4-6", "anthropic"},
|
||||
{"gemini", "gemini/gemini-2.5-pro", "gemini"},
|
||||
{"deepseek", "deepseek/deepseek-v3", "deepseek"},
|
||||
{"zai", "zai/glm-4.6", "zai"},
|
||||
{"kimi-coding", "kimi-coding/kimi-k2", "kimi-coding"},
|
||||
{"kimi-coding-cn keeps cn suffix", "kimi-coding-cn/kimi-k2", "kimi-coding-cn"},
|
||||
{"alibaba via dashscope alias", "dashscope/qwen3", "alibaba"},
|
||||
{"alibaba via qwen alias", "qwen/qwen3-coder", "alibaba"},
|
||||
{"xiaomi via mimo alias", "mimo/mimo-vl", "xiaomi"},
|
||||
{"arcee via arcee-ai alias", "arcee-ai/arcee-blitz", "arcee"},
|
||||
{"nvidia via nim alias", "nim/llama-3.3-nemotron-super", "nvidia"},
|
||||
{"ollama-cloud", "ollama-cloud/qwen3", "ollama-cloud"},
|
||||
{"huggingface via hf alias", "hf/Qwen/Qwen3", "huggingface"},
|
||||
{"ai-gateway", "ai-gateway/anthropic-claude-sonnet-4-6", "ai-gateway"},
|
||||
{"kilocode", "kilocode/kilo-1", "kilocode"},
|
||||
{"opencode-zen", "opencode-zen/zen-1", "opencode-zen"},
|
||||
{"opencode-go", "opencode-go/code-1", "opencode-go"},
|
||||
{"openrouter passthrough", "openrouter/anthropic/claude-sonnet-4-6", "openrouter"},
|
||||
{"custom passthrough", "custom/my-private-endpoint", "custom"},
|
||||
// Runtime-only override candidates default to openrouter at
|
||||
// provision time (derive-provider.sh upgrades to nous/custom at
|
||||
// boot if HERMES_API_KEY/OPENAI_API_KEY are present).
|
||||
{"nousresearch defaults to openrouter at provision time", "nousresearch/hermes-4-70b", "openrouter"},
|
||||
{"openai defaults to openrouter at provision time", "openai/gpt-5", "openrouter"},
|
||||
// hermes-agent v0.12.0 / 2026-04-30 provider list — the drift gate
|
||||
// in derive_provider_drift_test.go pins parity with the shell case
|
||||
// statement.
|
||||
{"xai", "xai/grok-4", "xai"},
|
||||
{"xai via grok alias", "grok/grok-4", "xai"},
|
||||
{"bedrock", "bedrock/anthropic.claude-sonnet-4-6", "bedrock"},
|
||||
{"bedrock via aws alias", "aws/anthropic.claude-sonnet-4-6", "bedrock"},
|
||||
{"tencent", "tencent/hunyuan-coder", "tencent-tokenhub"},
|
||||
{"tencent-tokenhub passthrough", "tencent-tokenhub/hunyuan-coder", "tencent-tokenhub"},
|
||||
{"gmi", "gmi/gmi-coder-1", "gmi"},
|
||||
{"qwen-oauth", "qwen-oauth/qwen3-coder", "qwen-oauth"},
|
||||
{"lmstudio", "lmstudio/qwen3-coder", "lmstudio"},
|
||||
{"lmstudio via lm-studio alias", "lm-studio/qwen3-coder", "lmstudio"},
|
||||
{"minimax-oauth", "minimax-oauth/MiniMax-M2.7", "minimax-oauth"},
|
||||
{"alibaba-coding-plan", "alibaba-coding-plan/qwen3-coder", "alibaba-coding-plan"},
|
||||
{"google-gemini-cli", "google-gemini-cli/gemini-2.5-pro", "google-gemini-cli"},
|
||||
{"openai-codex", "openai-codex/gpt-5-codex", "openai-codex"},
|
||||
{"copilot-acp", "copilot-acp/claude-sonnet-4-6", "copilot-acp"},
|
||||
{"copilot", "copilot/claude-sonnet-4-6", "copilot"},
|
||||
// Unknowns return "" so the caller skips the LLM_PROVIDER write
|
||||
// and lets derive-provider.sh's *=auto branch decide at runtime.
|
||||
{"unknown prefix returns empty", "totally-unknown-model/foo", ""},
|
||||
{"empty input returns empty", "", ""},
|
||||
{"no slash returns empty", "no-slash-here", ""},
|
||||
{"leading slash returns empty", "/leading-slash", ""},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
tc := tc
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
got := deriveProviderFromModelSlug(tc.model)
|
||||
if got != tc.want {
|
||||
t.Errorf("deriveProviderFromModelSlug(%q) = %q, want %q", tc.model, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
// internal#718 P4 closure: TestDeriveProviderFromModelSlug was the
|
||||
// table-driven sync test that pinned deriveProviderFromModelSlug
|
||||
// (retire-list #3) against
|
||||
// workspace-configs-templates/hermes/scripts/derive-provider.sh.
|
||||
//
|
||||
// Both the Go function and this test (with its 35+ slug→provider
|
||||
// cases) are retired. The slug→provider mapping is now covered by
|
||||
// providers.Manifest.DeriveProvider against the registry SSOT
|
||||
// (TestDeriveProvider_RealManifest in
|
||||
// internal/providers/derive_provider_test.go). The shell script
|
||||
// remains the in-container fallback; its byte-identity with the
|
||||
// registry view of hermes is a P4 follow-up gated on registry data
|
||||
// growth (see PR-2 codegen of hermes config.yaml from the registry).
|
||||
//
|
||||
// TestWorkspaceCreate_FirstDeploy_PersistsModelAndProvider, which
|
||||
// asserted that Create writes BOTH MODEL and LLM_PROVIDER rows, is
|
||||
// replaced by TestWorkspaceCreate_FirstDeploy_OnlyPersistsMODEL
|
||||
// below — the LLM_PROVIDER half of the contract is retired.
|
||||
//
|
||||
// TestWorkspaceCreate_FirstDeploy_UnknownModel_OnlyMintModelProvider
|
||||
// is subsumed by the same: with LLM_PROVIDER never written, the
|
||||
// known-vs-unknown distinction at Create disappears.
|
||||
|
||||
// TestWorkspaceCreate_FirstDeploy_PersistsModelAndProvider pins the
|
||||
// fix for failed-workspace 95ed3ff2 (2026-05-02). Pre-fix: the canvas
|
||||
// POSTed minimax/MiniMax-M2.7 in payload.Model, the workspace row was
|
||||
// created, but neither the model nor the derived provider was ever
|
||||
// written to workspace_secrets. On any subsequent restart, the
|
||||
// applyRuntimeModelEnv fallback found nothing and hermes booted with
|
||||
// the template default (nousresearch/hermes-4-70b) → wrong provider
|
||||
// keys → /health poll failed → never registered.
|
||||
// TestWorkspaceCreate_FirstDeploy_OnlyPersistsMODEL pins the post-P4
|
||||
// contract: WorkspaceHandler.Create writes the MODEL workspace_secret
|
||||
// (so the canvas-picked model survives restart and applyRuntimeModelEnv
|
||||
// finds it via the fallback chain) and writes NOTHING ELSE in the
|
||||
// secret-mint window. Specifically: NO LLM_PROVIDER row is written,
|
||||
// regardless of payload.LLMProvider or the slug-prefix.
|
||||
//
|
||||
// Post-fix: the create handler writes both rows after committing the
|
||||
// workspace row. This test asserts the SQL writes happen with the
|
||||
// correct keys + values.
|
||||
// Pre-P4 the create handler also wrote LLM_PROVIDER via setProviderSecret
|
||||
// — either from payload.LLMProvider verbatim or from
|
||||
// deriveProviderFromModelSlug(payload.Model). Both code paths were
|
||||
// retired in internal#718 P4 closure together with the LLM_PROVIDER
|
||||
// workspace_secret itself (no consumer remains; the provider is derived
|
||||
// at every decision point from (runtime, model) via the registry).
|
||||
//
|
||||
// 2026-05-19 follow-up: the workspace_secrets row that holds the
|
||||
// picked model id was renamed MODEL_PROVIDER → MODEL (the column name
|
||||
// was misleading and bled into applyRuntimeModelEnv as a slug
|
||||
// fallback). The sqlmock regex below now anchors on 'MODEL' instead
|
||||
// of 'MODEL_PROVIDER'. See fix/workspace-server-rename-
|
||||
// MODEL_PROVIDER-to-MODEL + the 20260519000000 rename migration.
|
||||
func TestWorkspaceCreate_FirstDeploy_PersistsModelAndProvider(t *testing.T) {
|
||||
// sqlmock failure on this expectation set is the canonical regression
|
||||
// signal: if a future PR re-introduces an LLM_PROVIDER write at create,
|
||||
// sqlmock surfaces "ExpectExec was not called" for any added insert.
|
||||
// The "MODEL anchor uses no LLM_PROVIDER" assertion below is the
|
||||
// stronger version of the same gate.
|
||||
func TestWorkspaceCreate_FirstDeploy_OnlyPersistsMODEL(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
@@ -706,43 +754,35 @@ func TestWorkspaceCreate_FirstDeploy_PersistsModelAndProvider(t *testing.T) {
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectCommit()
|
||||
|
||||
// The fix: MODEL is upserted with the verbatim model slug
|
||||
// (renamed from MODEL_PROVIDER on 2026-05-19 — see file-level
|
||||
// docstring). SQL has 3 placeholders ($1=workspace_id, $2=
|
||||
// encrypted_value reused in the conflict-update, $3=version
|
||||
// reused in the conflict-update), so sqlmock sees 3 args. The
|
||||
// 'MODEL' / 'LLM_PROVIDER' key is a literal in the SQL — we
|
||||
// distinguish the two writes with the regex match below. The
|
||||
// 'MODEL' anchor uses a word boundary (`[^_A-Z]`) so it does
|
||||
// NOT silently match the legacy 'MODEL_PROVIDER' name.
|
||||
// MODEL upsert — the only post-commit workspace_secrets write that
|
||||
// survived the P4 closure. The 'MODEL' key is literal in the SQL.
|
||||
mock.ExpectExec(`INSERT INTO workspace_secrets[\s\S]*'MODEL'`).
|
||||
WithArgs(sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
// The fix: LLM_PROVIDER is upserted with the derived provider name.
|
||||
mock.ExpectExec(`INSERT INTO workspace_secrets[\s\S]*'LLM_PROVIDER'`).
|
||||
WithArgs(sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
// Post-mint side effects (canvas layout + structure_events broadcast
|
||||
// + the external-workspace UPDATE/IssueToken chain). Order matches
|
||||
// workspace.go.
|
||||
// workspace.go. CRITICALLY: no second `INSERT INTO workspace_secrets`
|
||||
// is expected — sqlmock fails if Create attempts an LLM_PROVIDER
|
||||
// write.
|
||||
mock.ExpectExec("INSERT INTO canvas_layouts").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectExec("INSERT INTO structure_events").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
// External branch with no URL: status → awaiting_agent + IssueToken.
|
||||
mock.ExpectExec(`UPDATE workspaces SET status =`).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
// wsauth.IssueToken inserts into workspace_auth_tokens.
|
||||
mock.ExpectExec("INSERT INTO workspace_auth_tokens").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
// awaiting_agent broadcast.
|
||||
mock.ExpectExec("INSERT INTO structure_events").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
body := `{"name":"External Minimax Agent","runtime":"external","external":true,"model":"minimax/MiniMax-M2.7"}`
|
||||
// Body carries an explicit llm_provider AND a slug-prefixed model — both
|
||||
// of which would have triggered an LLM_PROVIDER write pre-P4. The
|
||||
// payload field is preserved for backward-compat (older canvases
|
||||
// still send it) but the value is intentionally ignored by Create.
|
||||
body := `{"name":"External Minimax Agent","runtime":"external","external":true,"model":"minimax/MiniMax-M2.7","llm_provider":"minimax"}`
|
||||
c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
@@ -752,7 +792,7 @@ func TestWorkspaceCreate_FirstDeploy_PersistsModelAndProvider(t *testing.T) {
|
||||
t.Fatalf("expected status 201, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("sqlmock expectations not met — first-deploy did NOT persist MODEL + LLM_PROVIDER (this is the prod bug recurrence): %v", err)
|
||||
t.Errorf("sqlmock expectations not met — Create wrote an unexpected workspace_secrets row (likely a re-introduced LLM_PROVIDER write): %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -808,56 +848,12 @@ func TestWorkspaceCreate_FirstDeploy_NoModel_Returns422(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestWorkspaceCreate_FirstDeploy_UnknownModel_OnlyMintModelProvider
|
||||
// asserts the asymmetric case: an unknown model prefix still gets
|
||||
// MODEL persisted (so the user's exact slug survives restart and
|
||||
// applyRuntimeModelEnv finds it), but LLM_PROVIDER is skipped (so
|
||||
// derive-provider.sh's *=auto branch can decide at runtime instead of
|
||||
// being pre-empted by a guess). The MODEL key was renamed from
|
||||
// MODEL_PROVIDER on 2026-05-19 — see file-level docstring.
|
||||
func TestWorkspaceCreate_FirstDeploy_UnknownModel_OnlyMintModelProvider(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
|
||||
mock.ExpectBegin()
|
||||
mock.ExpectExec("INSERT INTO workspaces").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectCommit()
|
||||
|
||||
// Only MODEL — LLM_PROVIDER must NOT be written for unknown
|
||||
// prefixes. Same 3-arg shape as above; key is literal in SQL.
|
||||
mock.ExpectExec(`INSERT INTO workspace_secrets[\s\S]*'MODEL'`).
|
||||
WithArgs(sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
mock.ExpectExec("INSERT INTO canvas_layouts").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectExec("INSERT INTO structure_events").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectExec(`UPDATE workspaces SET status =`).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectExec("INSERT INTO workspace_auth_tokens").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectExec("INSERT INTO structure_events").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
body := `{"name":"Unknown Model Agent","runtime":"external","external":true,"model":"totally-unknown-model/foo"}`
|
||||
c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
handler.Create(c)
|
||||
|
||||
if w.Code != http.StatusCreated {
|
||||
t.Fatalf("expected status 201, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("sqlmock expectations not met — unknown-prefix model should mint MODEL but skip LLM_PROVIDER: %v", err)
|
||||
}
|
||||
}
|
||||
// internal#718 P4 closure: the asymmetric "known prefix → both
|
||||
// MODEL+LLM_PROVIDER; unknown prefix → MODEL only" contract is moot —
|
||||
// Create never writes LLM_PROVIDER for ANY model now. The equivalent
|
||||
// coverage is TestWorkspaceCreate_FirstDeploy_OnlyPersistsMODEL above
|
||||
// (uses a slug-prefixed model that pre-P4 WOULD have triggered an
|
||||
// LLM_PROVIDER write; sqlmock fails if Create attempts one).
|
||||
|
||||
// TestApplyRuntimeModelEnv_SetsUniversalMODELForAllRuntimes pins the
|
||||
// fix for Bug B (2026-05-02): canvas-selected model was silently dropped
|
||||
@@ -972,7 +968,7 @@ func TestApplyPlatformManagedLLMEnv_NonClaudeRuntimeDefaultsOpenAIProxyWhenNoWor
|
||||
t.Setenv("MOLECULE_LLM_DEFAULT_MODEL", "moonshot/kimi-k2.6")
|
||||
|
||||
envVars := map[string]string{}
|
||||
applyPlatformManagedLLMEnv(context.Background(), envVars, "", "codex", "")
|
||||
applyPlatformManagedLLMEnv(context.Background(), envVars, "", "codex", "", nil)
|
||||
applyRuntimeModelEnv(envVars, "codex", "")
|
||||
|
||||
if got := envVars["OPENAI_BASE_URL"]; got != "https://api.example.test/api/v1/internal/llm/openai/v1" {
|
||||
@@ -1002,7 +998,7 @@ func TestApplyPlatformManagedLLMEnv_StripsWorkspaceOpenAIKeyForClaudeCode(t *tes
|
||||
"OPENAI_BASE_URL": "https://api.openai.com/v1",
|
||||
"MODEL": "openai/gpt-5.5",
|
||||
}
|
||||
applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "")
|
||||
applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "", nil)
|
||||
|
||||
if _, ok := envVars["OPENAI_API_KEY"]; ok {
|
||||
t.Fatalf("OPENAI_API_KEY should be stripped for claude-code platform-managed mode")
|
||||
@@ -1028,7 +1024,7 @@ func TestApplyPlatformManagedLLMEnv_ClaudeCodeUsesAnthropicProxyOverOAuth(t *tes
|
||||
"CLAUDE_CODE_OAUTH_TOKEN": "user-oauth-token",
|
||||
"MODEL": "sonnet",
|
||||
}
|
||||
applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "")
|
||||
applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "", nil)
|
||||
|
||||
if _, ok := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; ok {
|
||||
t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN should be stripped in platform-managed mode")
|
||||
@@ -1051,7 +1047,7 @@ func TestApplyPlatformManagedLLMEnv_ClaudeCodeInjectsAnthropicProxyWhenNoWorkspa
|
||||
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
|
||||
|
||||
envVars := map[string]string{}
|
||||
applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "minimax/MiniMax-M2.7")
|
||||
applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "minimax/MiniMax-M2.7", nil)
|
||||
|
||||
if got := envVars["ANTHROPIC_BASE_URL"]; got != "https://api.example.test/api/v1/internal/llm/anthropic/v1" {
|
||||
t.Fatalf("ANTHROPIC_BASE_URL = %q", got)
|
||||
@@ -1074,7 +1070,7 @@ func TestApplyPlatformManagedLLMEnv_ClaudeCodeStripsVendorBYOK(t *testing.T) {
|
||||
"MINIMAX_API_KEY": "user-minimax-key",
|
||||
"MODEL": "MiniMax-M2.7",
|
||||
}
|
||||
applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "")
|
||||
applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "", nil)
|
||||
|
||||
if _, ok := envVars["MINIMAX_API_KEY"]; ok {
|
||||
t.Fatalf("MINIMAX_API_KEY should be stripped in platform-managed mode")
|
||||
@@ -1090,20 +1086,38 @@ func TestApplyPlatformManagedLLMEnv_ClaudeCodeStripsVendorBYOK(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// internal#718 P2-B: byok is now DERIVED, not org-env-driven. A claude-code
|
||||
// workspace with NO explicit override + a non-platform-deriving model
|
||||
// (kimi-for-coding → kimi-coding) resolves byok and must NOT get the CP proxy
|
||||
// creds injected. (Pre-P2 this was driven by the org env MOLECULE_LLM_BILLING_MODE
|
||||
// with an empty workspace id; that mechanism is retired.)
|
||||
func TestApplyPlatformManagedLLMEnv_NoopsOutsidePlatformManaged(t *testing.T) {
|
||||
t.Setenv("MOLECULE_LLM_BILLING_MODE", "byok")
|
||||
const wsID = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"
|
||||
mock := setupTestDB(t)
|
||||
// No explicit override → derive from (claude-code, kimi-for-coding) → byok.
|
||||
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(nil))
|
||||
|
||||
t.Setenv("MOLECULE_LLM_BILLING_MODE", "platform_managed") // org env ignored now
|
||||
t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
|
||||
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
|
||||
|
||||
envVars := map[string]string{}
|
||||
applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "")
|
||||
res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "kimi-for-coding", nil)
|
||||
|
||||
if res.ResolvedMode != LLMBillingModeBYOK {
|
||||
t.Fatalf("resolved mode = %q, want byok (derived from non-platform model)", res.ResolvedMode)
|
||||
}
|
||||
if _, ok := envVars["OPENAI_API_KEY"]; ok {
|
||||
t.Fatalf("OPENAI_API_KEY should not be set outside platform-managed mode")
|
||||
}
|
||||
if _, ok := envVars["MOLECULE_LLM_USAGE_TOKEN"]; ok {
|
||||
t.Fatalf("MOLECULE_LLM_USAGE_TOKEN should not be set outside platform-managed mode")
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestApplyPlatformManagedLLMEnv_ClaudeCodeByokKeepsOwnProviderEnv is the
|
||||
@@ -1137,7 +1151,7 @@ func TestApplyPlatformManagedLLMEnv_ClaudeCodeByokKeepsOwnProviderEnv(t *testing
|
||||
"CLAUDE_CODE_OAUTH_TOKEN": "user-oauth-token",
|
||||
"MODEL": "sonnet",
|
||||
}
|
||||
applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "")
|
||||
applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "", nil)
|
||||
|
||||
// 1. OAuth token intact — not stripped.
|
||||
if got := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; got != "user-oauth-token" {
|
||||
@@ -1168,6 +1182,310 @@ func TestApplyPlatformManagedLLMEnv_ClaudeCodeByokKeepsOwnProviderEnv(t *testing
|
||||
}
|
||||
}
|
||||
|
||||
// TestApplyPlatformManagedLLMEnv_ByokGlobalScopeOAuthSurvivesAndRunsDirect is
|
||||
// the molecule-core#1994 (corrected-model) inversion of the former
|
||||
// internal#711 strip test, exercised through applyPlatformManagedLLMEnv. The
|
||||
// live failure this guards: the Reno Stars Marketing/SEO byok agents whose
|
||||
// Claude oauth lives at GLOBAL scope (the tenant's own credential, shared
|
||||
// across the tenant's workspaces) were stripped + failed-closed under the
|
||||
// inverted "global == platform's own" premise → MISSING_BYOK_CREDENTIAL →
|
||||
// dead. Under the corrected model `global_secrets` is the TENANT's store, so
|
||||
// that oauth is exactly what byok runs on: it must SURVIVE and route direct.
|
||||
//
|
||||
// Mutation (load-bearing): re-add stripGlobalOriginLLMCreds on the byok branch
|
||||
// → the oauth disappears → this test RED on both survival + HasUsableLLMCred.
|
||||
func TestApplyPlatformManagedLLMEnv_ByokGlobalScopeOAuthSurvivesAndRunsDirect(t *testing.T) {
|
||||
const wsID = "352e3c2b-0546-4e9c-b487-1e2ff1cf29fc" // Reno Stars SEO agent
|
||||
mock := setupTestDB(t)
|
||||
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModeBYOK))
|
||||
|
||||
t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged)
|
||||
t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
|
||||
t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "https://api.example.test/api/v1/internal/llm/anthropic")
|
||||
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
|
||||
|
||||
// The tenant's own oauth at GLOBAL scope (a global_secrets row). The
|
||||
// workspace set no separate row of its own; it relies on the tenant global.
|
||||
envVars := map[string]string{
|
||||
"CLAUDE_CODE_OAUTH_TOKEN": "TENANT-OWN-GLOBAL-OAUTH",
|
||||
"MODEL": "opus",
|
||||
}
|
||||
|
||||
res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "", nil)
|
||||
|
||||
// 1. The tenant's own global-scope oauth SURVIVES — byok runs on it.
|
||||
if envVars["CLAUDE_CODE_OAUTH_TOKEN"] != "TENANT-OWN-GLOBAL-OAUTH" {
|
||||
t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN = %q, want the tenant's own global-scope token preserved for byok", envVars["CLAUDE_CODE_OAUTH_TOKEN"])
|
||||
}
|
||||
// 2. No CP proxy creds forced (byok = workspace talks to its own provider).
|
||||
if got, ok := envVars["ANTHROPIC_API_KEY"]; ok {
|
||||
t.Fatalf("ANTHROPIC_API_KEY must NOT be injected for byok, got %q", got)
|
||||
}
|
||||
if _, ok := envVars["MOLECULE_LLM_USAGE_TOKEN"]; ok {
|
||||
t.Fatalf("MOLECULE_LLM_USAGE_TOKEN must NOT be injected for byok")
|
||||
}
|
||||
// 3. byok WITH a usable credential → caller does NOT fail closed.
|
||||
if res.ResolvedMode != LLMBillingModeBYOK {
|
||||
t.Fatalf("ResolvedMode = %q, want %q", res.ResolvedMode, LLMBillingModeBYOK)
|
||||
}
|
||||
if !res.HasUsableLLMCred {
|
||||
t.Fatalf("HasUsableLLMCred = false, want true (tenant's own global-scope oauth is the usable credential)")
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// internal#718 P2-B BEHAVIOR DELTA — billing/credential decision DERIVES the
|
||||
// provider (no stored LLM_PROVIDER, no override). These three tests are the
|
||||
// explicit delta the RFC calls out, exercised through the real provision path
|
||||
// (applyPlatformManagedLLMEnv) with the registry derivation driving the mode:
|
||||
// - platform-derived → platform_managed → platform creds (UNCHANGED)
|
||||
// - non-platform-derived → byok → #1963 strip + fail-closed (THE FIX)
|
||||
// - unset model → platform default (CTO-confirmed)
|
||||
// All use NO explicit override (override read returns NULL) so the DERIVATION
|
||||
// is what decides — this is what supersedes #1966's stored-LLM_PROVIDER read.
|
||||
// =========================================================================
|
||||
|
||||
// PLATFORM-DERIVED → UNCHANGED. A claude-code workspace with a platform-
|
||||
// namespaced model (anthropic/claude-opus-4-7) derives to the closed `platform`
|
||||
// provider → platform_managed → CP proxy creds injected, exactly as before.
|
||||
func TestApplyPlatformManagedLLMEnv_DERIVED_PlatformModelKeepsPlatformCreds(t *testing.T) {
|
||||
const wsID = "11111111-2222-3333-4444-555555555555"
|
||||
mock := setupTestDB(t)
|
||||
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(nil)) // NO override → derive
|
||||
|
||||
t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModeBYOK) // org env IGNORED now
|
||||
t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
|
||||
t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "https://api.example.test/api/v1/internal/llm/anthropic")
|
||||
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
|
||||
|
||||
envVars := map[string]string{}
|
||||
res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "anthropic/claude-opus-4-7", nil)
|
||||
|
||||
if res.ResolvedMode != LLMBillingModePlatformManaged {
|
||||
t.Fatalf("platform-derived model must resolve platform_managed, got %q (source=%s)", res.ResolvedMode, res.Source)
|
||||
}
|
||||
if res.Source != BillingModeSourceDerivedProvider {
|
||||
t.Errorf("source: got %q want derived_provider", res.Source)
|
||||
}
|
||||
// Platform path injects the CP proxy creds (UNCHANGED behavior).
|
||||
if got := envVars["ANTHROPIC_API_KEY"]; got != "tenant-admin-token" {
|
||||
t.Errorf("platform path must inject the CP proxy token as ANTHROPIC_API_KEY, got %q", got)
|
||||
}
|
||||
if !res.HasUsableLLMCred {
|
||||
t.Errorf("platform path always has a usable cred (the proxy token)")
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// NON-PLATFORM-DERIVED + NO CREDENTIAL AT ALL → byok + FAIL-CLOSED. This is
|
||||
// the legitimate remaining fail-closed path under the corrected model
|
||||
// (molecule-core#1994): a claude-code workspace with a non-platform model
|
||||
// (kimi-for-coding → byok) and NO override and NO LLM credential at ANY scope
|
||||
// (no global row, no workspace row) has nothing to run on → HasUsableLLMCred=
|
||||
// false → caller (prepareProvisionContext) aborts MISSING_BYOK_CREDENTIAL. The
|
||||
// fail-closed branch is unchanged by the strip removal; only its trigger
|
||||
// narrowed from "no workspace-scoped cred" to "no cred at any scope".
|
||||
func TestApplyPlatformManagedLLMEnv_DERIVED_ByokNoCredentialFailsClosed(t *testing.T) {
|
||||
const wsID = "99999999-8888-7777-6666-555555555555"
|
||||
mock := setupTestDB(t)
|
||||
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(nil)) // NO override → derive
|
||||
|
||||
t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged) // org env IGNORED now
|
||||
t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
|
||||
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
|
||||
|
||||
// No LLM credential at all — neither global nor workspace scope.
|
||||
envVars := map[string]string{}
|
||||
|
||||
res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "kimi-for-coding", nil)
|
||||
|
||||
// 1. DERIVED byok (NOT the old platform_managed default).
|
||||
if res.ResolvedMode != LLMBillingModeBYOK {
|
||||
t.Fatalf("non-platform-derived model must resolve byok, got %q (source=%s)", res.ResolvedMode, res.Source)
|
||||
}
|
||||
if res.Source != BillingModeSourceDerivedProvider {
|
||||
t.Errorf("source: got %q want derived_provider", res.Source)
|
||||
}
|
||||
// 2. No CP proxy creds forced.
|
||||
if got, ok := envVars["ANTHROPIC_API_KEY"]; ok {
|
||||
t.Fatalf("ANTHROPIC_API_KEY must NOT be injected for byok, got %q", got)
|
||||
}
|
||||
// 3. No usable cred at any scope → caller fails closed.
|
||||
if res.HasUsableLLMCred {
|
||||
t.Fatalf("HasUsableLLMCred = true, want false (no LLM credential present at any scope)")
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// UNSET model → PLATFORM DEFAULT (CTO-confirmed "unset → platform default").
|
||||
// No model means nothing to derive; the workspace defaults closed to
|
||||
// platform_managed and keeps the platform creds (UNCHANGED for the no-model case).
|
||||
func TestApplyPlatformManagedLLMEnv_DERIVED_UnsetModelPlatformDefault(t *testing.T) {
|
||||
const wsID = "00000000-1111-2222-3333-444444444444"
|
||||
mock := setupTestDB(t)
|
||||
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(nil)) // NO override
|
||||
|
||||
t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModeBYOK) // org env IGNORED now
|
||||
t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
|
||||
t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "https://api.example.test/api/v1/internal/llm/anthropic")
|
||||
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
|
||||
|
||||
envVars := map[string]string{}
|
||||
res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "", nil)
|
||||
|
||||
if res.ResolvedMode != LLMBillingModePlatformManaged {
|
||||
t.Fatalf("unset model must default platform_managed, got %q (source=%s)", res.ResolvedMode, res.Source)
|
||||
}
|
||||
if res.Source != BillingModeSourceDerivedDefault {
|
||||
t.Errorf("source: got %q want derived_default", res.Source)
|
||||
}
|
||||
if got := envVars["ANTHROPIC_API_KEY"]; got != "tenant-admin-token" {
|
||||
t.Errorf("unset-model platform default must inject the CP proxy token, got %q", got)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestApplyPlatformManagedLLMEnv_ByokKeepsWorkspaceOwnOAuth is the
|
||||
// workspace-scope companion to the global-scope survival test: a byok
|
||||
// workspace that set its own CLAUDE_CODE_OAUTH_TOKEN via the canvas Secrets
|
||||
// tab (a workspace_secrets row) keeps it and runs direct. Under the corrected
|
||||
// model (molecule-core#1994) the tenant's credential survives at EITHER scope;
|
||||
// this pins the workspace-scope half.
|
||||
func TestApplyPlatformManagedLLMEnv_ByokKeepsWorkspaceOwnOAuth(t *testing.T) {
|
||||
const wsID = "6b66de8d-9337-4fb4-be8d-6d49dca0d809" // Reno Stars Marketing agent
|
||||
mock := setupTestDB(t)
|
||||
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModeBYOK))
|
||||
|
||||
t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged)
|
||||
t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
|
||||
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
|
||||
|
||||
// Workspace set its OWN OAuth token (a workspace_secrets row).
|
||||
envVars := map[string]string{
|
||||
"CLAUDE_CODE_OAUTH_TOKEN": "CUSTOMER-OWN-OAUTH-TOKEN",
|
||||
"MODEL": "opus",
|
||||
}
|
||||
|
||||
res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "", nil)
|
||||
|
||||
if got := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; got != "CUSTOMER-OWN-OAUTH-TOKEN" {
|
||||
t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN = %q, want the workspace's own token left intact", got)
|
||||
}
|
||||
if !res.HasUsableLLMCred {
|
||||
t.Fatalf("HasUsableLLMCred = false, want true (workspace brought its own credential)")
|
||||
}
|
||||
if res.ResolvedMode != LLMBillingModeBYOK {
|
||||
t.Fatalf("ResolvedMode = %q, want %q", res.ResolvedMode, LLMBillingModeBYOK)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestApplyPlatformManagedLLMEnv_DisabledKeepsTenantGlobalNoProxy proves the
|
||||
// corrected-model behavior for "disabled": the tenant's own global-scope LLM
|
||||
// cred is NOT stripped and the CP proxy is NOT forced. "disabled" means the
|
||||
// workspace runs no platform-billed LLM, but the tenant's own credential is
|
||||
// still the tenant's to keep; the caller's fail-closed abort is byok-only so a
|
||||
// disabled workspace boots regardless. The previous internal#711 behavior
|
||||
// stripped the global cred here on the same inverted premise; that strip is
|
||||
// removed.
|
||||
//
|
||||
// Mutation (load-bearing): re-add stripGlobalOriginLLMCreds on the non-platform
|
||||
// branch → the oauth disappears → this test RED on the survival assertion.
|
||||
func TestApplyPlatformManagedLLMEnv_DisabledKeepsTenantGlobalNoProxy(t *testing.T) {
|
||||
const wsID = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
|
||||
mock := setupTestDB(t)
|
||||
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModeDisabled))
|
||||
|
||||
t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged)
|
||||
|
||||
envVars := map[string]string{
|
||||
"CLAUDE_CODE_OAUTH_TOKEN": "TENANT-OWN-GLOBAL-OAUTH",
|
||||
}
|
||||
|
||||
res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "", nil)
|
||||
|
||||
// The tenant's own global cred survives (not stripped).
|
||||
if envVars["CLAUDE_CODE_OAUTH_TOKEN"] != "TENANT-OWN-GLOBAL-OAUTH" {
|
||||
t.Fatalf("tenant's own global cred must survive for disabled mode; got %q", envVars["CLAUDE_CODE_OAUTH_TOKEN"])
|
||||
}
|
||||
// No proxy forced for disabled.
|
||||
if _, ok := envVars["MOLECULE_LLM_USAGE_TOKEN"]; ok {
|
||||
t.Fatalf("disabled must not inject the platform usage token")
|
||||
}
|
||||
if res.ResolvedMode != LLMBillingModeDisabled {
|
||||
t.Fatalf("ResolvedMode = %q, want %q", res.ResolvedMode, LLMBillingModeDisabled)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestApplyPlatformManagedLLMEnv_PlatformManagedStillReceivesGlobalCreds is
|
||||
// the no-regression guard for the metered platform_managed path
|
||||
// (molecule-core#1994): a platform-managed workspace MUST still strip any
|
||||
// direct oauth and route through the CP proxy. The direct OAuth token is
|
||||
// replaced by the proxy usage token (HasUsableLLMCred=true). This path is
|
||||
// UNCHANGED by the byok strip removal — only the byok/disabled branch changed.
|
||||
func TestApplyPlatformManagedLLMEnv_PlatformManagedStillReceivesGlobalCreds(t *testing.T) {
|
||||
const wsID = "99999999-9999-9999-9999-999999999999"
|
||||
mock := setupTestDB(t)
|
||||
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModePlatformManaged))
|
||||
|
||||
t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged)
|
||||
t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
|
||||
t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "https://api.example.test/api/v1/internal/llm/anthropic")
|
||||
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
|
||||
|
||||
envVars := map[string]string{
|
||||
"CLAUDE_CODE_OAUTH_TOKEN": "DIRECT-OAUTH-TOKEN",
|
||||
"MODEL": "opus",
|
||||
}
|
||||
|
||||
res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "", nil)
|
||||
|
||||
// Platform-managed routes through the CP proxy: OAuth stripped, proxy creds forced.
|
||||
if _, ok := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; ok {
|
||||
t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN should be stripped + replaced by the proxy token for platform_managed")
|
||||
}
|
||||
if got := envVars["ANTHROPIC_API_KEY"]; got != "tenant-admin-token" {
|
||||
t.Fatalf("ANTHROPIC_API_KEY = %q, want proxy usage token for platform_managed", got)
|
||||
}
|
||||
if !res.HasUsableLLMCred {
|
||||
t.Fatalf("HasUsableLLMCred = false, want true for platform_managed (proxy token is the credential)")
|
||||
}
|
||||
if res.ResolvedMode != LLMBillingModePlatformManaged {
|
||||
t.Fatalf("ResolvedMode = %q, want %q", res.ResolvedMode, LLMBillingModePlatformManaged)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestApplyPlatformManagedLLMEnv_PlatformManagedStillEmitsResolvedMode is the
|
||||
// no-regression companion: a workspace that resolves to platform_managed must
|
||||
// still strip + force the proxy AND emit MOLECULE_LLM_BILLING_MODE=
|
||||
@@ -1189,7 +1507,7 @@ func TestApplyPlatformManagedLLMEnv_PlatformManagedStillEmitsResolvedMode(t *tes
|
||||
"CLAUDE_CODE_OAUTH_TOKEN": "user-oauth-token",
|
||||
"MODEL": "sonnet",
|
||||
}
|
||||
applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "")
|
||||
applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "", nil)
|
||||
|
||||
// OAuth stripped, proxy forced — unchanged platform_managed contract.
|
||||
if _, ok := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; ok {
|
||||
|
||||
@@ -1399,6 +1399,9 @@ func (s *stubFailingCPProv) Start(_ context.Context, _ provisioner.WorkspaceConf
|
||||
func (s *stubFailingCPProv) Stop(_ context.Context, _ string) error {
|
||||
panic("stubFailingCPProv.Stop not expected on the provisionWorkspaceCP failure path")
|
||||
}
|
||||
func (s *stubFailingCPProv) StopAndPrune(_ context.Context, _ string) error {
|
||||
panic("stubFailingCPProv.StopAndPrune not expected on the provisionWorkspaceCP failure path")
|
||||
}
|
||||
|
||||
func (s *stubFailingCPProv) GetConsoleOutput(_ context.Context, _ string) (string, error) {
|
||||
panic("stubFailingCPProv.GetConsoleOutput not expected on the provisionWorkspaceCP failure path")
|
||||
|
||||
@@ -726,7 +726,7 @@ func (h *WorkspaceHandler) cpStopWithRetry(ctx context.Context, workspaceID, sou
|
||||
// terminal error. The delete path needs the error (it must keep the
|
||||
// row recoverable for the orphan-sweeper + emit a durable event), so
|
||||
// the actual retry loop lives in cpStopWithRetryErr below.
|
||||
_ = h.cpStopWithRetryErr(ctx, workspaceID, source)
|
||||
_ = h.cpStopWithRetryErr(ctx, workspaceID, source, false) // restart/hibernate never prunes
|
||||
}
|
||||
|
||||
// cpStopWithRetryErr is the shared bounded-retry core for cpProv.Stop.
|
||||
@@ -743,14 +743,24 @@ func (h *WorkspaceHandler) cpStopWithRetry(ctx context.Context, workspaceID, sou
|
||||
// - all attempts fail → returns the LAST attempt's error and emits the
|
||||
// stable `LEAK-SUSPECT cpProv.Stop ...` log line so the CP-side orphan
|
||||
// reconciler can correlate by workspace_id.
|
||||
func (h *WorkspaceHandler) cpStopWithRetryErr(ctx context.Context, workspaceID, source string) error {
|
||||
//
|
||||
// cpStopWithRetryErr terminates the workspace's CP-managed compute with bounded
|
||||
// retry. prune=true (internal#734) additionally requests CP erase the durable
|
||||
// data volume — set ONLY by the permanent-delete-with-erase path, NEVER by
|
||||
// restart/hibernate (those pass false), so a recreate can never prune.
|
||||
func (h *WorkspaceHandler) cpStopWithRetryErr(ctx context.Context, workspaceID, source string, prune bool) error {
|
||||
if h.cpProv == nil {
|
||||
return nil
|
||||
}
|
||||
var lastErr error
|
||||
delay := cpStopRetryBaseDelay
|
||||
for attempt := 1; attempt <= cpStopRetryAttempts; attempt++ {
|
||||
err := h.cpProv.Stop(ctx, workspaceID)
|
||||
var err error
|
||||
if prune {
|
||||
err = h.cpProv.StopAndPrune(ctx, workspaceID)
|
||||
} else {
|
||||
err = h.cpProv.Stop(ctx, workspaceID)
|
||||
}
|
||||
if err == nil {
|
||||
if attempt > 1 {
|
||||
log.Printf("%s: cpProv.Stop(%s) succeeded on attempt %d", source, workspaceID, attempt)
|
||||
|
||||
@@ -72,6 +72,13 @@ func (s *scriptedCPStop) Stop(ctx context.Context, _ string) error {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// StopAndPrune delegates to Stop so the retry/error scripting is identical —
|
||||
// the prune flag only changes the URL the real provisioner builds, not the
|
||||
// retry behavior these tests exercise.
|
||||
func (s *scriptedCPStop) StopAndPrune(ctx context.Context, id string) error {
|
||||
return s.Stop(ctx, id)
|
||||
}
|
||||
func (s *scriptedCPStop) Start(_ context.Context, _ provisioner.WorkspaceConfig) (string, error) {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
@@ -501,10 +501,12 @@ func TestWorkspaceCreate_WithSecrets_Persists(t *testing.T) {
|
||||
// while persisting a secret causes the entire transaction to roll back and
|
||||
// the handler to return 500. The workspace row must NOT be committed.
|
||||
func TestWorkspaceCreate_SecretPersistFails_RollsBack(t *testing.T) {
|
||||
// internal#691: see TestExtended_SecretsSet — same default-closed reasoning.
|
||||
// This test is asserting the rollback path on DB failure, not the strip gate;
|
||||
// keep the org in byok so the OPENAI_API_KEY write reaches the INSERT.
|
||||
t.Setenv("MOLECULE_LLM_BILLING_MODE", "byok")
|
||||
// internal#718 P2-B: this test asserts the rollback path on DB failure, not
|
||||
// the strip gate. The create-time secret gate keys off the DERIVED mode now
|
||||
// (org rung retired). An explicit byok override makes the workspace byok in a
|
||||
// single resolver read (precedence-1 short-circuit), so the OPENAI_API_KEY
|
||||
// write is allowed and reaches the INSERT-and-fail path this test exercises.
|
||||
t.Setenv("MOLECULE_LLM_BILLING_MODE", "platform_managed") // org env ignored now
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
@@ -513,14 +515,11 @@ func TestWorkspaceCreate_SecretPersistFails_RollsBack(t *testing.T) {
|
||||
mock.ExpectBegin()
|
||||
mock.ExpectExec("INSERT INTO workspaces").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
// internal#691: Create() now resolves billing mode per-workspace before
|
||||
// the secret-strip gate. The workspace row was just inserted in the same
|
||||
// transaction so it isn't readable from a separate query yet; the
|
||||
// resolver expects the SELECT and the mock returns no row → falls back
|
||||
// to the org default (byok, set above) so the OPENAI_API_KEY write
|
||||
// reaches the INSERT-and-fail path this test exercises.
|
||||
// Create() resolves billing mode per-workspace before the secret-strip gate.
|
||||
// An explicit byok override short-circuits the resolver (precedence 1) so the
|
||||
// OPENAI_API_KEY write is allowed and reaches the INSERT-and-fail path.
|
||||
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}))
|
||||
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModeBYOK))
|
||||
mock.ExpectExec("INSERT INTO workspace_secrets").
|
||||
WillReturnError(sql.ErrConnDone) // DB failure while writing secret
|
||||
mock.ExpectRollback() // workspace insert must be rolled back
|
||||
@@ -1787,7 +1786,7 @@ func TestWorkspaceCreate_TemplateDefaultsMissingRuntimeAndModel(t *testing.T) {
|
||||
tier: 2
|
||||
runtime: hermes
|
||||
runtime_config:
|
||||
model: nousresearch/hermes-4-70b
|
||||
model: moonshot/kimi-k2.6
|
||||
`)
|
||||
if err := os.WriteFile(filepath.Join(templateDir, "config.yaml"), cfg, 0o644); err != nil {
|
||||
t.Fatalf("write cfg: %v", err)
|
||||
@@ -1842,7 +1841,7 @@ func TestWorkspaceCreate_TemplateDefaultsLegacyTopLevelModel(t *testing.T) {
|
||||
cfg := []byte(`name: Legacy Agent
|
||||
tier: 1
|
||||
runtime: hermes
|
||||
model: anthropic:claude-sonnet-4-5
|
||||
model: moonshot/kimi-k2.5
|
||||
`)
|
||||
if err := os.WriteFile(filepath.Join(templateDir, "config.yaml"), cfg, 0o644); err != nil {
|
||||
t.Fatalf("write cfg: %v", err)
|
||||
@@ -1897,7 +1896,7 @@ func TestWorkspaceCreate_CallerModelOverridesTemplateDefault(t *testing.T) {
|
||||
}
|
||||
cfg := []byte(`runtime: hermes
|
||||
runtime_config:
|
||||
model: nousresearch/hermes-4-70b
|
||||
model: moonshot/kimi-k2.6
|
||||
`)
|
||||
if err := os.WriteFile(filepath.Join(templateDir, "config.yaml"), cfg, 0o644); err != nil {
|
||||
t.Fatalf("write cfg: %v", err)
|
||||
@@ -1924,7 +1923,11 @@ runtime_config:
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
body := `{"name":"Custom Hermes","template":"hermes-template","model":"minimax/MiniMax-M2.7"}`
|
||||
// Caller overrides with a different hermes-valid model — registry permits
|
||||
// both moonshot/kimi-k2.5 and moonshot/kimi-k2.6 for hermes (P4 PR-1 native
|
||||
// set). The template default would have been moonshot/kimi-k2.6; caller
|
||||
// picks kimi-k2.5 explicitly to prove the override actually fires.
|
||||
body := `{"name":"Custom Hermes","template":"hermes-template","model":"moonshot/kimi-k2.5"}`
|
||||
c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
@@ -2048,6 +2051,152 @@ func TestWorkspaceCreate_188_NoTemplateNoRuntime_NowMODEL_REQUIRED(t *testing.T)
|
||||
}
|
||||
}
|
||||
|
||||
// internal#718 P4 PR-2: only-registered validation HARD-REJECT. A known
|
||||
// (registry) runtime with a model NOT in its registered set is rejected at the
|
||||
// create boundary with 422 UNREGISTERED_MODEL_FOR_RUNTIME — no DB rows touched,
|
||||
// no provisioning attempt, no wedged workspace. Replaces P2-B's WARN-mode
|
||||
// header.
|
||||
func TestWorkspaceCreate_718_P4_UnregisteredModelHardReject422(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
|
||||
// No DB expectations: the 422 fires BEFORE BeginTx, so any unexpected
|
||||
// INSERT will fail the test via ExpectationsWereMet.
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
body := `{"name":"Bad Model","runtime":"claude-code","model":"totally-made-up-xyz"}`
|
||||
c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
handler.Create(c)
|
||||
|
||||
if w.Code != http.StatusUnprocessableEntity {
|
||||
t.Fatalf("unregistered-model create: expected 422, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if !bytes.Contains(w.Body.Bytes(), []byte(`"code":"UNREGISTERED_MODEL_FOR_RUNTIME"`)) {
|
||||
t.Errorf("expected code=UNREGISTERED_MODEL_FOR_RUNTIME in 422 body, got %s", w.Body.String())
|
||||
}
|
||||
if !bytes.Contains(w.Body.Bytes(), []byte(`"runtime":"claude-code"`)) {
|
||||
t.Errorf("expected runtime=claude-code echoed in 422 body, got %s", w.Body.String())
|
||||
}
|
||||
if !bytes.Contains(w.Body.Bytes(), []byte(`"model":"totally-made-up-xyz"`)) {
|
||||
t.Errorf("expected model echoed in 422 body, got %s", w.Body.String())
|
||||
}
|
||||
// The legacy WARN header must NOT fire — there is no "proceeded with
|
||||
// warning" path anymore.
|
||||
if w.Header().Get("X-Molecule-Model-Unregistered") != "" {
|
||||
t.Errorf("P4 hard-reject must not emit the legacy WARN header, got %q", w.Header().Get("X-Molecule-Model-Unregistered"))
|
||||
}
|
||||
|
||||
// Strict mock check: no DB ops should have happened.
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unexpected DB activity on hard-reject path: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// A REGISTERED model on a registry runtime proceeds with 201 and no unregistered header.
|
||||
func TestWorkspaceCreate_718_P4_RegisteredModelProceeds(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
|
||||
mock.ExpectBegin()
|
||||
mock.ExpectExec("INSERT INTO workspaces").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectCommit()
|
||||
mock.ExpectExec("INSERT INTO workspace_secrets").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectExec("INSERT INTO canvas_layouts").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectExec("INSERT INTO structure_events").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
// claude-opus-4-7 IS a registered claude-code model (anthropic-api).
|
||||
body := `{"name":"Good Model","runtime":"claude-code","model":"claude-opus-4-7"}`
|
||||
c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
handler.Create(c)
|
||||
|
||||
if w.Code != http.StatusCreated {
|
||||
t.Fatalf("registered-model create: expected 201, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if w.Header().Get("X-Molecule-Model-Unregistered") != "" {
|
||||
t.Errorf("registered model must NOT set the legacy unregistered header, got %q", w.Header().Get("X-Molecule-Model-Unregistered"))
|
||||
}
|
||||
}
|
||||
|
||||
// internal#718 P4 PR-2: the legacy colon-namespaced BYOK vocabulary
|
||||
// 'anthropic:claude-opus-4-7' is now a FIRST-CLASS registered claude-code model
|
||||
// (P4 PR-1 reconciled the colon-vocab into the registry). The hard-reject must
|
||||
// NOT 422 this legitimate live-corpus form — verifying the reconcile + flip work
|
||||
// together. This is the canonical regression guard for the colon-vocab path.
|
||||
func TestWorkspaceCreate_718_P4_LegacyColonVocabAccepted(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
|
||||
mock.ExpectBegin()
|
||||
mock.ExpectExec("INSERT INTO workspaces").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectCommit()
|
||||
mock.ExpectExec("INSERT INTO workspace_secrets").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectExec("INSERT INTO canvas_layouts").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectExec("INSERT INTO structure_events").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
body := `{"name":"Legacy Colon","runtime":"claude-code","model":"anthropic:claude-opus-4-7"}`
|
||||
c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
handler.Create(c)
|
||||
|
||||
if w.Code != http.StatusCreated {
|
||||
t.Fatalf("legacy colon-form create (P4 PR-1 reconciled): expected 201, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
// internal#718 P2-B: a runtime NOT in the registry (mock — a known core runtime
|
||||
// absent from the first-party provider registry) fails OPEN — the
|
||||
// only-registered gate does not block it (federation / non-first-party path
|
||||
// unchanged). It proceeds past the gate to the normal create flow.
|
||||
func TestWorkspaceCreate_718_NonRegistryRuntimeFailsOpen(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
|
||||
mock.ExpectBegin()
|
||||
mock.ExpectExec("INSERT INTO workspaces").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectCommit()
|
||||
mock.ExpectExec("INSERT INTO canvas_layouts").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectExec("INSERT INTO structure_events").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
// "mock" is a known core runtime but NOT in the first-party registry;
|
||||
// any model passes the only-registered gate (fail-open).
|
||||
body := `{"name":"Mock Agent","runtime":"mock","model":"canned-replies"}`
|
||||
c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
handler.Create(c)
|
||||
|
||||
if w.Code != http.StatusCreated {
|
||||
t.Fatalf("non-registry runtime should fail open (201), got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
// Explicit runtime, no template → honored, 201 (no template resolution
|
||||
// needed; runtimeExplicitlyRequested true but already resolved).
|
||||
func TestWorkspaceCreate_188_ExplicitRuntimeNoTemplate_OK(t *testing.T) {
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user