From 5be20ac1cfab9fb42cd9841bb0a560436f9f98e1 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Tue, 21 Apr 2026 10:18:02 -0700 Subject: [PATCH 1/5] fix(e2e): inject OPENAI_API_KEY into workspace secrets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Workspace runtimes (hermes, langgraph, etc.) crash at boot with 'No provider API key found' when no ANTHROPIC_API_KEY / OPENAI_API_KEY / etc. is set. Harness previously sent no secrets → workspace sat in provisioning for 10 min → harness timed out. Console log from staging run 2026-04-21T17:08Z showed the exact crash: ValueError: No Hermes provider API key found. Set any one of: ANTHROPIC_API_KEY, HERMES_API_KEY, NOUS_API_KEY, OPENROUTER_API_KEY, OPENAI_API_KEY, ... Read E2E_OPENAI_API_KEY from env and inject into both parent and child workspace POST bodies via the secrets field (persists as workspace_secret, materialises into container env). Empty key falls through — dev can still run smoke tests, workspace just won't reach online. For CI, a new repo secret MOLECULE_STAGING_OPENAI_KEY needs to be added and passed as E2E_OPENAI_API_KEY in the workflow env. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/e2e/test_staging_full_saas.sh | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tests/e2e/test_staging_full_saas.sh b/tests/e2e/test_staging_full_saas.sh index 8e66f525..46014e35 100755 --- a/tests/e2e/test_staging_full_saas.sh +++ b/tests/e2e/test_staging_full_saas.sh @@ -229,10 +229,22 @@ tenant_call() { } # ─── 5. Provision parent workspace ───────────────────────────────────── +# Runtimes like hermes crash at boot with "No provider API key found" +# if nothing in the standard env-var list is set. Inject the API key +# from E2E_OPENAI_API_KEY so the runtime can actually start — it's +# per-workspace secret, so it's persisted as a workspace_secret and +# materialized into the container env. Missing key falls through to +# an empty secrets map; workspace will still fail but the error is +# expected and actionable. +SECRETS_JSON='{}' +if [ -n "${E2E_OPENAI_API_KEY:-}" ]; then + SECRETS_JSON="{\"OPENAI_API_KEY\":\"$E2E_OPENAI_API_KEY\"}" +fi + log "5/11 Provisioning parent workspace (runtime=$RUNTIME)..." PARENT_RESP=$(tenant_call POST /workspaces \ -H "Content-Type: application/json" \ - -d "{\"name\":\"E2E Parent\",\"runtime\":\"$RUNTIME\",\"tier\":2,\"model\":\"gpt-4o\"}") + -d "{\"name\":\"E2E Parent\",\"runtime\":\"$RUNTIME\",\"tier\":2,\"model\":\"gpt-4o\",\"secrets\":$SECRETS_JSON}") PARENT_ID=$(echo "$PARENT_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin)['id'])") log " PARENT_ID=$PARENT_ID" @@ -242,7 +254,7 @@ if [ "$MODE" = "full" ]; then log "6/11 Provisioning child workspace..." CHILD_RESP=$(tenant_call POST /workspaces \ -H "Content-Type: application/json" \ - -d "{\"name\":\"E2E Child\",\"runtime\":\"$RUNTIME\",\"tier\":2,\"model\":\"gpt-4o\",\"parent_id\":\"$PARENT_ID\"}") + -d "{\"name\":\"E2E Child\",\"runtime\":\"$RUNTIME\",\"tier\":2,\"model\":\"gpt-4o\",\"parent_id\":\"$PARENT_ID\",\"secrets\":$SECRETS_JSON}") CHILD_ID=$(echo "$CHILD_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin)['id'])") log " CHILD_ID=$CHILD_ID" else From 392282c51881dc62766efc3c539b7359e7a520cc Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Tue, 21 Apr 2026 10:24:58 -0700 Subject: [PATCH 2/5] fix(e2e): set MODEL_PROVIDER=openai for Hermes runtime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hermes's provider resolver checks ANTHROPIC_API_KEY first (resolution order puts anthropic before openai). Without MODEL_PROVIDER=openai explicitly set, Hermes defaults to claude-sonnet-4-6 against the OpenAI endpoint and 404s with model_not_found. Staging E2E run 2026-04-21T17:24Z hit this after every earlier fix landed (workspace online, A2A ready) — last remaining blocker for the happy path. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/e2e/test_staging_full_saas.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/e2e/test_staging_full_saas.sh b/tests/e2e/test_staging_full_saas.sh index 46014e35..b1d78345 100755 --- a/tests/e2e/test_staging_full_saas.sh +++ b/tests/e2e/test_staging_full_saas.sh @@ -238,7 +238,11 @@ tenant_call() { # expected and actionable. SECRETS_JSON='{}' if [ -n "${E2E_OPENAI_API_KEY:-}" ]; then - SECRETS_JSON="{\"OPENAI_API_KEY\":\"$E2E_OPENAI_API_KEY\"}" + # MODEL_PROVIDER=openai forces Hermes's resolver to pick the OpenAI + # path. Without it Hermes defaults to Claude (resolution order puts + # anthropic before openai) and you get 404 model_not_found because + # the OpenAI endpoint doesn't serve claude-sonnet-* models. + SECRETS_JSON="{\"OPENAI_API_KEY\":\"$E2E_OPENAI_API_KEY\",\"MODEL_PROVIDER\":\"openai\"}" fi log "5/11 Provisioning parent workspace (runtime=$RUNTIME)..." From b8b3d5ce1f89cb968bafbf896a2c577a6e6a46a8 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Tue, 21 Apr 2026 10:33:27 -0700 Subject: [PATCH 3/5] fix(e2e): MODEL_PROVIDER is provider:model slug, not just provider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit workspace/config.py:258 reads MODEL_PROVIDER as the full model string (format 'provider:model', e.g. 'anthropic:claude-opus-4-7'). My prior 'openai' alone got parsed as the model name → 404 model_not_found. Use 'openai:gpt-4o' and also set OPENAI_BASE_URL to api.openai.com (default was openrouter.ai which takes different key format). Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/e2e/test_staging_full_saas.sh | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/e2e/test_staging_full_saas.sh b/tests/e2e/test_staging_full_saas.sh index b1d78345..87a44ce3 100755 --- a/tests/e2e/test_staging_full_saas.sh +++ b/tests/e2e/test_staging_full_saas.sh @@ -238,11 +238,12 @@ tenant_call() { # expected and actionable. SECRETS_JSON='{}' if [ -n "${E2E_OPENAI_API_KEY:-}" ]; then - # MODEL_PROVIDER=openai forces Hermes's resolver to pick the OpenAI - # path. Without it Hermes defaults to Claude (resolution order puts - # anthropic before openai) and you get 404 model_not_found because - # the OpenAI endpoint doesn't serve claude-sonnet-* models. - SECRETS_JSON="{\"OPENAI_API_KEY\":\"$E2E_OPENAI_API_KEY\",\"MODEL_PROVIDER\":\"openai\"}" + # MODEL_PROVIDER is a full model slug in 'provider:model' format per + # workspace/config.py:258. Using just "openai" gets parsed as the + # model name → 404 model_not_found. Also set OPENAI_BASE_URL to + # OpenAI's own endpoint — default is openrouter.ai which would need + # a different key format. + SECRETS_JSON="{\"OPENAI_API_KEY\":\"$E2E_OPENAI_API_KEY\",\"OPENAI_BASE_URL\":\"https://api.openai.com/v1\",\"MODEL_PROVIDER\":\"openai:gpt-4o\"}" fi log "5/11 Provisioning parent workspace (runtime=$RUNTIME)..." From 5e130b7e6ff04cd7397c4b011582c6f5e965d127 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Tue, 21 Apr 2026 10:41:17 -0700 Subject: [PATCH 4/5] fix(e2e): delegation raw curl missing X-Molecule-Org-Id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Section 10's delegation call is a raw curl (not tenant_call, because it carries an additional X-Source-Workspace-Id). It was missing X-Molecule-Org-Id, which TenantGuard requires — so the tenant 404'd every delegation probe despite section 8's A2A call (via tenant_call) working correctly. Repro: staging run 2026-04-21T17:40Z had section 8 green (PONG) and section 10 red (rc=22) on the same workspace. Only difference was the missing header. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/e2e/test_staging_full_saas.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/e2e/test_staging_full_saas.sh b/tests/e2e/test_staging_full_saas.sh index 87a44ce3..1218ae02 100755 --- a/tests/e2e/test_staging_full_saas.sh +++ b/tests/e2e/test_staging_full_saas.sh @@ -376,8 +376,13 @@ print(json.dumps({ })) ") set +e + # Raw curl (not tenant_call) because this call carries an extra + # X-Source-Workspace-Id header. Must still send X-Molecule-Org-Id + # or TenantGuard 404s — previously missing, caused section 10 to + # fail rc=22 despite everything upstream being correct (2026-04-21). DELEG_RESP=$(curl "${CURL_COMMON[@]}" -X POST "$TENANT_URL/workspaces/$CHILD_ID/a2a" \ -H "Authorization: Bearer $EFFECTIVE_TENANT_TOKEN" \ + -H "X-Molecule-Org-Id: $ORG_ID" \ -H "X-Source-Workspace-Id: $PARENT_ID" \ -H "Content-Type: application/json" \ -d "$DELEG_PAYLOAD") From bd020d84be08ca8034ea638aed07eb9409d2c86b Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Tue, 21 Apr 2026 11:24:59 -0700 Subject: [PATCH 5/5] ci(e2e): wire MOLECULE_STAGING_OPENAI_KEY into workflow env MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The harness needs E2E_OPENAI_API_KEY set for Hermes workspaces to boot — without it the runtime crashes with "No provider API key found" and workspaces never hit online. Preflight step fails fast with a clear error if the repo secret is missing, so CI doesn't burn 10 minutes on a foregone conclusion. Repo secret to add: Settings → Secrets → Actions → MOLECULE_STAGING_OPENAI_KEY. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/e2e-staging-saas.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/e2e-staging-saas.yml b/.github/workflows/e2e-staging-saas.yml index c43e1200..c1e2b878 100644 --- a/.github/workflows/e2e-staging-saas.yml +++ b/.github/workflows/e2e-staging-saas.yml @@ -78,6 +78,10 @@ jobs: # retrieval + teardown. Configure in # Settings → Secrets and variables → Actions → Repository secrets. MOLECULE_ADMIN_TOKEN: ${{ secrets.MOLECULE_STAGING_ADMIN_TOKEN }} + # OpenAI key for workspace LLM calls (section 8 A2A). Without it, + # Hermes runtime crashes at boot with "No provider API key found". + # Configure at Settings → Secrets → Actions → MOLECULE_STAGING_OPENAI_KEY. + E2E_OPENAI_API_KEY: ${{ secrets.MOLECULE_STAGING_OPENAI_KEY }} E2E_RUNTIME: ${{ github.event.inputs.runtime || 'hermes' }} E2E_RUN_ID: "${{ github.run_id }}-${{ github.run_attempt }}" E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }} @@ -93,6 +97,14 @@ jobs: fi echo "Admin token present ✓" + - name: Verify OpenAI key present + run: | + if [ -z "$E2E_OPENAI_API_KEY" ]; then + echo "::error::MOLECULE_STAGING_OPENAI_KEY secret not set — workspaces will fail at boot with 'No provider API key found'" + exit 2 + fi + echo "OpenAI key present ✓ (len=${#E2E_OPENAI_API_KEY})" + - name: CP staging health preflight run: | code=$(curl -sS -o /dev/null -w "%{http_code}" --max-time 10 "$MOLECULE_CP_URL/health")