fix(provisioner,e2e): #2851 advertise Docker gateway IP when platform is containerized #2860
@@ -324,13 +324,13 @@ jobs:
|
||||
# even if the runner's $GITHUB_ENV propagation is flaky (#2468 RCA).
|
||||
MOLECULE_ENV: development
|
||||
SECRETS_ENCRYPTION_KEY: lpe2e-test-encryption-key-32bytes!!
|
||||
# #2851: the provisioner now makes the workspace advertise http://localhost:<host-port>.
|
||||
# When the platform itself runs inside Docker, the A2A proxy rewrites the
|
||||
# stored http://127.0.0.1:<host-port> URL to ws-<id>:8000, so keep the job
|
||||
# container on molecule-core-net and Docker-aware mode so that Docker-DNS
|
||||
# name resolves. Dev mode already relaxes private-range SSRF checks, so SaaS
|
||||
# deploy mode is no longer required.
|
||||
MOLECULE_IN_DOCKER: true
|
||||
# #2851 fix follow-up: act_runner executes this job container in host-network
|
||||
# mode, so the platform process shares the host's loopback. The A2A proxy
|
||||
# must therefore keep the stored http://127.0.0.1:<host-port> URL as-is;
|
||||
# enabling Docker-aware rewrite would synthesize ws-<id>:8000, which the
|
||||
# host-network platform cannot resolve. Leave MOLECULE_IN_DOCKER=false so
|
||||
# the proxy uses the host-mapped port directly (same as lifecycle-stub).
|
||||
MOLECULE_IN_DOCKER: false
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
|
||||
@@ -356,14 +356,10 @@ jobs:
|
||||
# uses for token-write helper containers.
|
||||
docker pull alpine:3.20@sha256:c64c687cbea9300178b30c95835354e34c4e4febc4badfe27102879de0483b5e >/dev/null
|
||||
docker network create molecule-core-net >/dev/null 2>&1 || true
|
||||
# #2851: connect the act_runner job container to molecule-core-net so the
|
||||
# platform process can resolve ws-<id>:8000 workspace URLs.
|
||||
JOB_CID="$(hostname)"
|
||||
if docker network connect molecule-core-net "$JOB_CID" >/dev/null 2>&1; then
|
||||
echo "Connected job container ($JOB_CID) to molecule-core-net."
|
||||
else
|
||||
echo "WARN: could not connect job container to molecule-core-net (already connected or fallback)."
|
||||
fi
|
||||
# Workspace containers join molecule-core-net. The act_runner job
|
||||
# container runs host-network, so connecting it to the bridge is not
|
||||
# possible, but the bridge itself must exist for sibling workspace
|
||||
# containers to communicate.
|
||||
|
||||
- name: Start Postgres (docker, ephemeral host port)
|
||||
run: |
|
||||
@@ -424,6 +420,11 @@ jobs:
|
||||
fi
|
||||
echo "PLATFORM_HOST_IP=${PLATFORM_HOST_IP}"
|
||||
echo "PLATFORM_URL=http://${PLATFORM_HOST_IP}:${PORT}" >> "$GITHUB_ENV"
|
||||
# The workspace advertises localhost:<host-port> (the default when
|
||||
# MOLECULE_WORKSPACE_ADVERTISE_HOST is unset). Because this job runs
|
||||
# host-network, the platform reaches the container via 127.0.0.1; no
|
||||
# gateway-IP advertisement is needed here and keeping it unset lets
|
||||
# /registry/register accept the localhost URL.
|
||||
T="lpe2e-real-admin-${{ github.run_id }}-${{ github.run_attempt }}"
|
||||
echo "ADMIN_TOKEN=${T}" >> "$GITHUB_ENV"
|
||||
echo "MOLECULE_ADMIN_TOKEN=${T}" >> "$GITHUB_ENV"
|
||||
|
||||
@@ -83,11 +83,15 @@ HOSTNAME = os.environ.get("HOSTNAME", "").strip() # docker sets this to the con
|
||||
# address (0.0.0.0:8000, reachable as ws-<id>:8000 on the bridge) is what
|
||||
# the proxy actually hits — independent of the URL string we register.
|
||||
#
|
||||
# Net: register a name-form localhost URL purely to satisfy push-mode's
|
||||
# "url required + must pass SSRF check" and to get our auth_token. Routing is
|
||||
# handled by the provisioner-stored 127.0.0.1 URL + the proxy rewrite.
|
||||
# Net: register a host-reachable localhost URL. The provisioner injects the
|
||||
# correct host-mapped port via MOLECULE_WORKSPACE_URL (#2851); prefer that and
|
||||
# fall back to the legacy STUB_REGISTER_URL or the internal listen port.
|
||||
_short = WORKSPACE_ID[:12] if len(WORKSPACE_ID) > 12 else WORKSPACE_ID
|
||||
SELF_URL = os.environ.get("STUB_REGISTER_URL", f"http://localhost:{PORT}")
|
||||
SELF_URL = (
|
||||
os.environ.get("STUB_REGISTER_URL")
|
||||
or os.environ.get("MOLECULE_WORKSPACE_URL")
|
||||
or f"http://localhost:{PORT}"
|
||||
)
|
||||
|
||||
CONFIG_PATH = (os.environ.get("WORKSPACE_CONFIG_PATH") or "/configs").rstrip("/")
|
||||
AUTH_TOKEN_FILE = f"{CONFIG_PATH}/.auth_token"
|
||||
|
||||
@@ -522,10 +522,12 @@ done
|
||||
if [ -n "$RUN" ]; then pass "container running: $RUN"; else fail "no running ws-${WSID} container within 10s of online" "docker ps shows none"; fi
|
||||
|
||||
# #2851: fail fast if the workspace advertised an unresolvable/unreachable URL.
|
||||
# The provisioner now makes the runtime advertise http://localhost:<host-port>,
|
||||
# which the platform stores as http://127.0.0.1:<host-port>. The A2A proxy
|
||||
# rewrites that to ws-<id>:8000 when the platform runs inside Docker, so the
|
||||
# URL stored in the registry should always be a host-reachable loopback address.
|
||||
# The provisioner now makes the runtime advertise http://localhost:<host-port>
|
||||
# by default, which the platform stores as http://127.0.0.1:<host-port>. The A2A
|
||||
# proxy rewrites that to ws-<id>:8000 when the platform runs inside Docker, so
|
||||
# the URL stored in the registry should always be a host-reachable address.
|
||||
# When the platform itself is containerized, MOLECULE_WORKSPACE_ADVERTISE_HOST
|
||||
# points the runtime at the Docker gateway IP instead; accept that too.
|
||||
# (The end-to-end proxy reach in Step 5 is the real reachability proof; this
|
||||
# assertion just surfaces hostname/DNS misconfiguration early.)
|
||||
WS_URL_AFTER=$(ws_field "$WS" "url")
|
||||
@@ -534,10 +536,13 @@ if [ -n "$WS_URL_AFTER" ]; then
|
||||
else
|
||||
fail "workspace URL is empty after reaching online" "registry row has no url"
|
||||
fi
|
||||
if echo "$WS_URL_AFTER" | grep -qE '^https?://(127\.0\.0\.1|localhost):'; then
|
||||
pass "workspace registered a host-reachable loopback URL"
|
||||
URL_HOST_AFTER="${WS_URL_AFTER#http://}"
|
||||
URL_HOST_AFTER="${URL_HOST_AFTER#https://}"
|
||||
URL_HOST_AFTER="${URL_HOST_AFTER%%:*}"
|
||||
if [ "$URL_HOST_AFTER" = "127.0.0.1" ] || [ "$URL_HOST_AFTER" = "localhost" ] || [ "$URL_HOST_AFTER" = "${PLATFORM_HOST_IP:-}" ]; then
|
||||
pass "workspace registered a host-reachable URL (host=$URL_HOST_AFTER)"
|
||||
else
|
||||
fail "workspace URL is not a host-reachable loopback address" "url=$WS_URL_AFTER"
|
||||
fail "workspace URL is not a host-reachable address" "url=$WS_URL_AFTER expected localhost/127.0.0.1 or PLATFORM_HOST_IP=${PLATFORM_HOST_IP:-<unset>}"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -21,6 +22,30 @@ import (
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// isProvisionerHostPortURL reports whether u is a loopback URL with a port
|
||||
// other than 8000. Such URLs are written by the provisioner (via
|
||||
// MOLECULE_WORKSPACE_URL) and should be preserved across runtime registrations.
|
||||
// The 8000 fallback is the runtime's own default and is allowed to be
|
||||
// overwritten.
|
||||
func isProvisionerHostPortURL(u string) bool {
|
||||
if u == "" {
|
||||
return false
|
||||
}
|
||||
if !strings.HasPrefix(u, "http://127.0.0.1:") && !strings.HasPrefix(u, "http://localhost:") {
|
||||
return false
|
||||
}
|
||||
// Extract the trailing port.
|
||||
port := u[strings.LastIndex(u, ":")+1:]
|
||||
if port == "" {
|
||||
return false
|
||||
}
|
||||
n, err := strconv.Atoi(port)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return n != 8000
|
||||
}
|
||||
|
||||
// blockedRange is a named CIDR block so the conditional blocklist in
|
||||
// validateAgentURL reads as a slice of homogeneous values instead of
|
||||
// repeated anonymous struct literals.
|
||||
@@ -578,8 +603,33 @@ func (h *RegistryHandler) Register(c *gin.Context) {
|
||||
INSERT INTO workspaces (id, name, url, agent_card, status, last_heartbeat_at, delivery_mode, kind)
|
||||
VALUES ($1, $2, $3, $4::jsonb, 'online', now(), $5, COALESCE(NULLIF($6, ''), 'workspace'))
|
||||
ON CONFLICT (id) DO UPDATE SET
|
||||
-- Preserve the provisioner-set host-port URL. The provisioner
|
||||
-- injects MOLECULE_WORKSPACE_URL=<host-port> into the container
|
||||
-- env (buildStartWorkspaceEnv in workspace-server), so the
|
||||
-- runtime should register that same URL. The runtime's
|
||||
-- resolve_workspace_url honors MOLECULE_WORKSPACE_URL at highest
|
||||
-- precedence, so when the env propagation is correct, the
|
||||
-- runtime's URL == provisioner's URL. When env propagation is
|
||||
-- broken (real-image lifecycle E2E gap that bit 3 rounds
|
||||
-- running), the runtime falls back to http://HOSTNAME:8000
|
||||
-- — the port 8000 makes it distinguishable from the
|
||||
-- provisioner's host-port (typically >30000). Preserve the
|
||||
-- provisioner's URL when its port != 8000.
|
||||
--
|
||||
-- Researcher #11798: round-3 fix changed the provisioner
|
||||
-- from http://127.0.0.1 to http://localhost (the
|
||||
-- workspaceAdvertiseURL default), but the Register handler
|
||||
-- only matched the legacy 127.0.0.1 prefix, so the upsert
|
||||
-- overwrote the provisioner's URL with the runtime's 8000
|
||||
-- fallback. Generalize to match any host-prefixed
|
||||
-- host-port URL whose port != 8000.
|
||||
url = CASE
|
||||
WHEN workspaces.url LIKE 'http://127.0.0.1%' THEN workspaces.url
|
||||
WHEN workspaces.url IS NOT NULL
|
||||
AND workspaces.url != ''
|
||||
AND (workspaces.url LIKE 'http://127.0.0.1:%'
|
||||
OR workspaces.url LIKE 'http://localhost:%')
|
||||
AND CAST(substring(workspaces.url FROM ':([0-9]+)$') AS int) <> 8000
|
||||
THEN workspaces.url
|
||||
ELSE EXCLUDED.url
|
||||
END,
|
||||
agent_card = EXCLUDED.agent_card,
|
||||
@@ -617,7 +667,7 @@ func (h *RegistryHandler) Register(c *gin.Context) {
|
||||
cachedURL := effectiveURL
|
||||
var dbURL string
|
||||
if err := db.DB.QueryRowContext(ctx, `SELECT url FROM workspaces WHERE id = $1`, payload.ID).Scan(&dbURL); err == nil {
|
||||
if strings.HasPrefix(dbURL, "http://127.0.0.1") {
|
||||
if isProvisionerHostPortURL(dbURL) {
|
||||
cachedURL = dbURL
|
||||
}
|
||||
}
|
||||
|
||||
@@ -157,6 +157,44 @@ func statusOf(t *testing.T, conn *sql.DB, id string) string {
|
||||
return status
|
||||
}
|
||||
|
||||
// insertWorkspaceWithURL creates a workspace row with an explicit URL
|
||||
// (the provisioner-set host-port URL the CASE-preservation tests need
|
||||
// pre-populated). Returns the DB-generated UUID. Mirrors insertWorkspace
|
||||
// but additionally writes the `url` column on insert.
|
||||
func insertWorkspaceWithURL(t *testing.T, conn *sql.DB, name, status, url string) string {
|
||||
t.Helper()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
var id string
|
||||
err := conn.QueryRowContext(ctx, `
|
||||
INSERT INTO workspaces (name, status, parent_id, delivery_mode, url)
|
||||
VALUES ($1, $2, NULL, 'push', NULLIF($3, ''))
|
||||
RETURNING id
|
||||
`, name, status, url).Scan(&id)
|
||||
if err != nil {
|
||||
t.Fatalf("insertWorkspaceWithURL(%s): %v", name, err)
|
||||
}
|
||||
return id
|
||||
}
|
||||
|
||||
// urlOf reads a workspace row's current url, failing the test if the row
|
||||
// is gone or the url is NULL. Used by the CASE-preservation tests to
|
||||
// verify the upsert preserved (or did not preserve) the expected URL.
|
||||
func urlOf(t *testing.T, conn *sql.DB, id string) string {
|
||||
t.Helper()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
var url sql.NullString
|
||||
err := conn.QueryRowContext(ctx, `SELECT url FROM workspaces WHERE id = $1`, id).Scan(&url)
|
||||
if err != nil {
|
||||
t.Fatalf("urlOf(%s): %v", id, err)
|
||||
}
|
||||
if !url.Valid {
|
||||
t.Fatalf("urlOf(%s): row has NULL url", id)
|
||||
}
|
||||
return url.String
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 1 — registry register/heartbeat row-state: the #73 tombstone guard.
|
||||
//
|
||||
@@ -176,8 +214,25 @@ const registerUpsertSQL = `
|
||||
INSERT INTO workspaces (id, name, url, agent_card, status, last_heartbeat_at, delivery_mode)
|
||||
VALUES ($1, $2, $3, $4::jsonb, 'online', now(), $5)
|
||||
ON CONFLICT (id) DO UPDATE SET
|
||||
-- Preserve the provisioner-set host-port URL. The provisioner
|
||||
-- injects MOLECULE_WORKSPACE_URL=<host-port> into the container
|
||||
-- env (buildStartWorkspaceEnv in workspace-server), so the
|
||||
-- runtime should register that same URL. The runtime's
|
||||
-- resolve_workspace_url honors MOLECULE_WORKSPACE_URL at highest
|
||||
-- precedence, so when the env propagation is correct, the
|
||||
-- runtime's URL == provisioner's URL. When env propagation is
|
||||
-- broken (real-image lifecycle E2E gap that bit 3 rounds
|
||||
-- running), the runtime falls back to http://HOSTNAME:8000
|
||||
-- — the port 8000 makes it distinguishable from the
|
||||
-- provisioner's host-port (typically >30000). Preserve the
|
||||
-- provisioner's URL when its port != 8000.
|
||||
url = CASE
|
||||
WHEN workspaces.url LIKE 'http://127.0.0.1%' THEN workspaces.url
|
||||
WHEN workspaces.url IS NOT NULL
|
||||
AND workspaces.url != ''
|
||||
AND (workspaces.url LIKE 'http://127.0.0.1:%'
|
||||
OR workspaces.url LIKE 'http://localhost:%')
|
||||
AND CAST(substring(workspaces.url FROM ':([0-9]+)$') AS int) <> 8000
|
||||
THEN workspaces.url
|
||||
ELSE EXCLUDED.url
|
||||
END,
|
||||
agent_card = EXCLUDED.agent_card,
|
||||
@@ -237,6 +292,114 @@ func TestIntegration_RegistryRowState_RegisterUpsertsLiveWorkspaceToOnline(t *te
|
||||
}
|
||||
}
|
||||
|
||||
// TestIntegration_RegistryRowState_RegisterPreservesProvisionerHostPort covers
|
||||
// the #2851 round-4 / Researcher #11798 close-out: when the provisioner has
|
||||
// already set a host-port URL on the row (e.g. http://localhost:41751 via
|
||||
// buildStartWorkspaceEnv) and the runtime re-registers with the same
|
||||
// host-port URL (or any non-8000 URL on the localhost/127.0.0.1 prefix),
|
||||
// the upsert must preserve the EXISTING URL (not overwrite with EXCLUDED).
|
||||
// Regression of the round-3 11798 gap.
|
||||
func TestIntegration_RegistryRowState_RegisterPreservesProvisionerHostPort(t *testing.T) {
|
||||
conn := integrationAuthDB(t)
|
||||
ctx := context.Background()
|
||||
|
||||
id := insertWorkspaceWithURL(t, conn, "hostport-ws", "online", "http://localhost:41751")
|
||||
|
||||
// Re-register with the SAME host-port URL. The provisioner would have
|
||||
// injected this same URL via MOLECULE_WORKSPACE_URL, so the runtime's
|
||||
// EXCLUDED.url == existing url. The CASE should preserve either way.
|
||||
if _, err := conn.ExecContext(ctx, registerUpsertSQL,
|
||||
id, id, "http://localhost:41751", `{"name":"x"}`, "push"); err != nil {
|
||||
t.Fatalf("register upsert: %v", err)
|
||||
}
|
||||
if got := urlOf(t, conn, id); got != "http://localhost:41751" {
|
||||
t.Fatalf("host-port URL not preserved: got %q, want http://localhost:41751", got)
|
||||
}
|
||||
|
||||
// Re-register with a DIFFERENT host-port URL on the same prefix. The
|
||||
// provisioner is allowed to update the host-port (e.g. after a restart
|
||||
// allocated a new port). The CASE should still preserve the row's
|
||||
// existing host-port URL — the runtime shouldn't be able to silently
|
||||
// rewrite the port without re-provisioning.
|
||||
if _, err := conn.ExecContext(ctx, registerUpsertSQL,
|
||||
id, id, "http://localhost:50000", `{"name":"x"}`, "push"); err != nil {
|
||||
t.Fatalf("register upsert 2: %v", err)
|
||||
}
|
||||
if got := urlOf(t, conn, id); got != "http://localhost:41751" {
|
||||
t.Fatalf("host-port URL changed on re-register: got %q, want preserved http://localhost:41751", got)
|
||||
}
|
||||
|
||||
// Same with the legacy 127.0.0.1 prefix (back-compat).
|
||||
id2 := insertWorkspaceWithURL(t, conn, "hostport-ws-legacy", "online", "http://127.0.0.1:33605")
|
||||
if _, err := conn.ExecContext(ctx, registerUpsertSQL,
|
||||
id2, "hostport-ws-legacy", "http://127.0.0.1:33605", `{"name":"x"}`, "push"); err != nil {
|
||||
t.Fatalf("register upsert 3 (127.0.0.1): %v", err)
|
||||
}
|
||||
if got := urlOf(t, conn, id2); got != "http://127.0.0.1:33605" {
|
||||
t.Fatalf("legacy 127.0.0.1 host-port URL not preserved: got %q, want http://127.0.0.1:33605", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestIntegration_RegistryRowState_RegisterOverwritesRuntime8000 covers the
|
||||
// "runtime's wrong localhost:8000 fallback overwriting the provisioner's
|
||||
// host-port" case: the CASE excludes port 8000 (which is the runtime's
|
||||
// listen-port fallback, NOT the provisioner's host-port) so the upsert
|
||||
// overwrites with EXCLUDED.url — but EXCLUDED.url in this scenario is
|
||||
// ALSO 8000, so the net result is the row's URL is 8000 (which is
|
||||
// wrong but reflects the runtime's broken env propagation — the
|
||||
// underlying fix is the buildStartWorkspaceEnv injection, not the CASE).
|
||||
func TestIntegration_RegistryRowState_RegisterOverwritesRuntime8000(t *testing.T) {
|
||||
conn := integrationAuthDB(t)
|
||||
ctx := context.Background()
|
||||
|
||||
// Provisioner set the legacy 127.0.0.1:<port> URL (pre-round-3 path).
|
||||
id := insertWorkspaceWithURL(t, conn, "port8000-ws", "online", "http://127.0.0.1:8000")
|
||||
// Runtime re-registers with its localhost:8000 fallback.
|
||||
if _, err := conn.ExecContext(ctx, registerUpsertSQL,
|
||||
id, id, "http://localhost:8000", `{"name":"x"}`, "push"); err != nil {
|
||||
t.Fatalf("register upsert: %v", err)
|
||||
}
|
||||
// CASE excludes port 8000 → falls through to EXCLUDED.url → row = localhost:8000.
|
||||
if got := urlOf(t, conn, id); got != "http://localhost:8000" {
|
||||
t.Fatalf("port-8000 case: got %q, want http://localhost:8000 (CASE excluded, EXCLUDED wins)", got)
|
||||
}
|
||||
|
||||
// And vice-versa (existing localhost:8000 + runtime 127.0.0.1:8000):
|
||||
id2 := insertWorkspaceWithURL(t, conn, "port8000-ws-2", "online", "http://localhost:8000")
|
||||
if _, err := conn.ExecContext(ctx, registerUpsertSQL,
|
||||
id2, "port8000-ws-2", "http://127.0.0.1:8000", `{"name":"x"}`, "push"); err != nil {
|
||||
t.Fatalf("register upsert 2: %v", err)
|
||||
}
|
||||
if got := urlOf(t, conn, id2); got != "http://127.0.0.1:8000" {
|
||||
t.Fatalf("port-8000 reverse: got %q, want http://127.0.0.1:8000", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestIntegration_RegistryRowState_RegisterOverwritesNonLocalhost covers the
|
||||
// "the row's existing URL is a non-localhost URL (e.g. https://example.com/foo
|
||||
// or http://192.168.1.100:8080) — the CASE only matches localhost/127.0.0.1
|
||||
// prefix, so non-localhost URLs are NOT preserved; EXCLUDED.url wins".
|
||||
// This is the defense-in-depth: non-localhost URLs that shouldn't be in
|
||||
// workspaces.url (SSRF defense) get overwritten by the runtime's
|
||||
// validateAgentURL-checked URL.
|
||||
func TestIntegration_RegistryRowState_RegisterOverwritesNonLocalhost(t *testing.T) {
|
||||
conn := integrationAuthDB(t)
|
||||
ctx := context.Background()
|
||||
|
||||
// Legacy state: a non-localhost URL snuck into the row (shouldn't happen
|
||||
// in practice post-#1130, but if it did, the upsert must not preserve
|
||||
// it — EXCLUDED.url is the new URL and the runtime's
|
||||
// validateAgentURL already gated it).
|
||||
id := insertWorkspaceWithURL(t, conn, "nonlocal-ws", "online", "http://192.168.1.100:8080")
|
||||
if _, err := conn.ExecContext(ctx, registerUpsertSQL,
|
||||
id, id, "http://localhost:41751", `{"name":"x"}`, "push"); err != nil {
|
||||
t.Fatalf("register upsert: %v", err)
|
||||
}
|
||||
if got := urlOf(t, conn, id); got != "http://localhost:41751" {
|
||||
t.Fatalf("non-localhost URL not overwritten: got %q, want http://localhost:41751", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIntegration_RegistryRowState_HeartbeatDoesNotResurrectRemoved(t *testing.T) {
|
||||
conn := integrationAuthDB(t)
|
||||
ctx := context.Background()
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"io"
|
||||
"log"
|
||||
"net"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
@@ -561,8 +562,60 @@ func allocateHostPort() (string, error) {
|
||||
// handler layer then stores the equivalent http://127.0.0.1:<port> URL and
|
||||
// the A2A proxy rewrites it to ws-<id>:8000 when the platform itself runs
|
||||
// inside Docker.
|
||||
//
|
||||
// #2851 follow-up: when the platform also runs inside a container, the
|
||||
// workspace must advertise the Docker host/gateway IP (PLATFORM_HOST_IP) so
|
||||
// the platform container can reach the host-mapped workspace port. The
|
||||
// operator sets MOLECULE_WORKSPACE_ADVERTISE_HOST to override the default
|
||||
// "localhost".
|
||||
func workspaceAdvertiseURL(hostPort string) string {
|
||||
return fmt.Sprintf("http://localhost:%s", hostPort)
|
||||
host := os.Getenv("MOLECULE_WORKSPACE_ADVERTISE_HOST")
|
||||
if host == "" {
|
||||
host = "localhost"
|
||||
}
|
||||
return fmt.Sprintf("http://%s:%s", host, hostPort)
|
||||
}
|
||||
|
||||
// buildStartWorkspaceEnv assembles the full env for the workspace container,
|
||||
// including the platform-injected MOLECULE_WORKSPACE_URL. Extracted from
|
||||
// Start() so the production-path env injection is unit-testable without a
|
||||
// Docker daemon (Researcher #11798 / #11787 close-out — the gap that bit
|
||||
// 3 rounds running: in real-image lifecycle E2E, the runtime's resolve_workspace_url
|
||||
// fell back to http://localhost:8000 because the provisioner's
|
||||
// MOLECULE_WORKSPACE_URL injection was not exercised in tests, and the
|
||||
// Register handler's URL-preservation CASE only matched the legacy
|
||||
// 127.0.0.1 prefix — so the upsert overwrote the host-port URL with the
|
||||
// runtime's 8000 fallback. Both gaps closed: this helper ensures the
|
||||
// injection happens, and the Register handler's CASE now also matches
|
||||
// the localhost prefix the provisioner uses after round-3).
|
||||
func buildStartWorkspaceEnv(cfg WorkspaceConfig, hostPort string) []string {
|
||||
return append(buildContainerEnv(cfg),
|
||||
fmt.Sprintf("MOLECULE_WORKSPACE_URL=%s", workspaceAdvertiseURL(hostPort)))
|
||||
}
|
||||
|
||||
|
||||
// resolveStartWorkspaceHostURL computes the hostURL that StartWorkspace
|
||||
// should return to the platform. The host comes from workspaceAdvertiseURL
|
||||
// (env override → localhost); the port starts at hostPort and is swapped
|
||||
// to boundPort if Docker bound a different one.
|
||||
//
|
||||
// #2851 (registration-path fix): the persisted hostURL must be the
|
||||
// host-reachable advertise URL (not 127.0.0.1), so ProxyA2A's
|
||||
// resolveAgentURL doesn't rewrite it to the internal Docker hostname
|
||||
// (ws-<id>:8000) and isSafeURL then reject it. The env override lets
|
||||
// the operator point the runtime at the Docker host/gateway IP in
|
||||
// containerized-platform mode; the default "localhost" preserves the
|
||||
// pre-#2851 behavior when the platform runs on the host directly.
|
||||
func resolveStartWorkspaceHostURL(hostPort, boundPort string) string {
|
||||
hostURL := workspaceAdvertiseURL(hostPort)
|
||||
if boundPort == "" || boundPort == hostPort {
|
||||
return hostURL
|
||||
}
|
||||
u, err := url.Parse(hostURL)
|
||||
if err != nil || u.Hostname() == "" {
|
||||
return hostURL
|
||||
}
|
||||
return fmt.Sprintf("http://%s:%s", u.Hostname(), boundPort)
|
||||
}
|
||||
|
||||
// Start provisions and starts a workspace container.
|
||||
@@ -595,12 +648,17 @@ func (p *Provisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string, e
|
||||
}
|
||||
log.Printf("Provisioner: config volume %s ready", configVolume)
|
||||
|
||||
// #2851: tell the runtime exactly which URL to advertise. localhost is
|
||||
// accepted by registry validateAgentURL; the handler layer then stores the
|
||||
// equivalent http://127.0.0.1:<port> URL and the A2A proxy rewrites it to
|
||||
// ws-<id>:8000 when the platform itself runs inside Docker.
|
||||
// #2851 (round-4 production-injection fix): tell the runtime exactly which
|
||||
// URL to advertise. The helper injects MOLECULE_WORKSPACE_URL=<host-port>
|
||||
// into the container env (host-port URL, not the runtime's listen-port
|
||||
// 8000 fallback). The runtime's resolve_workspace_url honors
|
||||
// MOLECULE_WORKSPACE_URL at highest precedence, so the registered URL
|
||||
// matches the host-port the provisioner allocated — preventing the
|
||||
// "registered as localhost:8000 but the host-port is 41751" gap
|
||||
// that bit 3 rounds running in production (the real-image lifecycle
|
||||
// E2E ProxyA2A path).
|
||||
env := buildStartWorkspaceEnv(cfg, hostPort)
|
||||
advertiseURL := workspaceAdvertiseURL(hostPort)
|
||||
env := append(buildContainerEnv(cfg), fmt.Sprintf("MOLECULE_WORKSPACE_URL=%s", advertiseURL))
|
||||
|
||||
image, imgErr := selectImage(cfg)
|
||||
if imgErr != nil {
|
||||
@@ -821,10 +879,17 @@ func (p *Provisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string, e
|
||||
// /configs and /workspace, then drops to agent via gosu). No per-start
|
||||
// chown needed here.
|
||||
|
||||
// #2851: use the pre-allocated host port directly. The inspect loop below
|
||||
// is kept as a verification that Docker bound the expected port, but the
|
||||
// stable value comes from allocateHostPort above.
|
||||
hostURL := fmt.Sprintf("http://127.0.0.1:%s", hostPort)
|
||||
// #2851 (registration-path fix): use the host-reachable advertise URL
|
||||
// (not just 127.0.0.1) as the hostURL persisted in the DB. When the
|
||||
// platform itself runs inside an act_runner container, 127.0.0.1 in
|
||||
// the workspace URL would force ProxyA2A's resolveAgentURL to rewrite
|
||||
// to the internal Docker hostname (ws-<id>:8000), which isSafeURL then
|
||||
// rejects as "workspace URL is not publicly routable". By persisting
|
||||
// the advertise URL (172.18.0.1:<port> in containerized dev, or the
|
||||
// operator's host IP in prod), the platform-facing URL is host-
|
||||
// reachable, the resolveAgentURL rewrite doesn't kick in, and the
|
||||
// dev-mode SSRF relaxation allows the 172.18/16 private range.
|
||||
hostURL := resolveStartWorkspaceHostURL(hostPort, "")
|
||||
for attempt := 0; attempt < 3; attempt++ {
|
||||
info, inspectErr := p.cli.ContainerInspect(ctx, resp.ID)
|
||||
if inspectErr != nil {
|
||||
@@ -837,13 +902,9 @@ func (p *Provisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string, e
|
||||
if attempt < 2 {
|
||||
time.Sleep(500 * time.Millisecond) // wait for Docker to bind the port
|
||||
} else {
|
||||
log.Printf("Provisioner: container %s did not bind expected host port %s; falling back to inspect value", name, hostPort)
|
||||
log.Printf("Provisioner: container %s did not bind expected host port %s; falling back to bound port (keeping advertise host)", name, hostPort)
|
||||
if len(portBindings) > 0 {
|
||||
boundIP := portBindings[0].HostIP
|
||||
if boundIP == "" {
|
||||
boundIP = "127.0.0.1"
|
||||
}
|
||||
hostURL = fmt.Sprintf("http://%s:%s", boundIP, portBindings[0].HostPort)
|
||||
hostURL = resolveStartWorkspaceHostURL(hostPort, portBindings[0].HostPort)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1669,9 +1669,154 @@ func TestAllocateHostPort(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestWorkspaceAdvertiseURL(t *testing.T) {
|
||||
got := workspaceAdvertiseURL("12345")
|
||||
want := "http://localhost:12345"
|
||||
if got != want {
|
||||
t.Errorf("workspaceAdvertiseURL = %q, want %q", got, want)
|
||||
}
|
||||
t.Run("default localhost", func(t *testing.T) {
|
||||
got := workspaceAdvertiseURL("12345")
|
||||
want := "http://localhost:12345"
|
||||
if got != want {
|
||||
t.Errorf("workspaceAdvertiseURL = %q, want %q", got, want)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("MOLECULE_WORKSPACE_ADVERTISE_HOST override", func(t *testing.T) {
|
||||
t.Setenv("MOLECULE_WORKSPACE_ADVERTISE_HOST", "192.168.1.100")
|
||||
got := workspaceAdvertiseURL("12345")
|
||||
want := "http://192.168.1.100:12345"
|
||||
if got != want {
|
||||
t.Errorf("workspaceAdvertiseURL = %q, want %q", got, want)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("empty env defaults to localhost", func(t *testing.T) {
|
||||
t.Setenv("MOLECULE_WORKSPACE_ADVERTISE_HOST", "")
|
||||
got := workspaceAdvertiseURL("12345")
|
||||
want := "http://localhost:12345"
|
||||
if got != want {
|
||||
t.Errorf("workspaceAdvertiseURL = %q, want %q", got, want)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestResolveStartWorkspaceHostURL covers the #2851 registration-path fix:
|
||||
// the hostURL StartWorkspace persists in the DB must be the host-reachable
|
||||
// advertise URL (not 127.0.0.1) so ProxyA2A's resolveAgentURL doesn't
|
||||
// rewrite it to the internal Docker hostname. Each subtest pins the env
|
||||
// var then asserts the hostURL for both the initial and inspect-fallback
|
||||
// paths.
|
||||
func TestResolveStartWorkspaceHostURL(t *testing.T) {
|
||||
t.Run("default localhost (no env override)", func(t *testing.T) {
|
||||
t.Setenv("MOLECULE_WORKSPACE_ADVERTISE_HOST", "")
|
||||
got := resolveStartWorkspaceHostURL("12345", "")
|
||||
want := "http://localhost:12345"
|
||||
if got != want {
|
||||
t.Errorf("resolveStartWorkspaceHostURL = %q, want %q", got, want)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("env override, no bound-port swap", func(t *testing.T) {
|
||||
t.Setenv("MOLECULE_WORKSPACE_ADVERTISE_HOST", "172.18.0.1")
|
||||
got := resolveStartWorkspaceHostURL("33605", "")
|
||||
want := "http://172.18.0.1:33605"
|
||||
if got != want {
|
||||
t.Errorf("resolveStartWorkspaceHostURL = %q, want %q", got, want)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("env override, bound port differs (inspect-fallback path keeps advertise host)", func(t *testing.T) {
|
||||
t.Setenv("MOLECULE_WORKSPACE_ADVERTISE_HOST", "172.18.0.1")
|
||||
// pre-allocated hostPort was 33605 but Docker bound 33606.
|
||||
// Final URL must use the advertise host + the bound port.
|
||||
got := resolveStartWorkspaceHostURL("33605", "33606")
|
||||
want := "http://172.18.0.1:33606"
|
||||
if got != want {
|
||||
t.Errorf("resolveStartWorkspaceHostURL = %q, want %q", got, want)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("env override, bound port matches (no-op)", func(t *testing.T) {
|
||||
t.Setenv("MOLECULE_WORKSPACE_ADVERTISE_HOST", "192.168.65.2")
|
||||
got := resolveStartWorkspaceHostURL("8080", "8080")
|
||||
want := "http://192.168.65.2:8080"
|
||||
if got != want {
|
||||
t.Errorf("resolveStartWorkspaceHostURL = %q, want %q", got, want)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("no env override, bound port swap (legacy 127.0.0.1 case)", func(t *testing.T) {
|
||||
t.Setenv("MOLECULE_WORKSPACE_ADVERTISE_HOST", "")
|
||||
// default "localhost" but bound port differs
|
||||
got := resolveStartWorkspaceHostURL("8080", "9090")
|
||||
want := "http://localhost:9090"
|
||||
if got != want {
|
||||
t.Errorf("resolveStartWorkspaceHostURL = %q, want %q", got, want)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
// TestBuildStartWorkspaceEnv covers the #2851 production-path env injection
|
||||
// gap that bit 3 rounds running (Researcher #11798 / #11787 close-out).
|
||||
// The provisioner's Start() must inject MOLECULE_WORKSPACE_URL=<host-port>
|
||||
// into the container env so the runtime's resolve_workspace_url
|
||||
// (highest precedence for the env var) returns the same host-port URL
|
||||
// the provisioner persisted. When env propagation is broken
|
||||
// (the real-image lifecycle E2E gap), the runtime falls back to
|
||||
// http://HOSTNAME:8000 — the Register handler's upsert must then
|
||||
// preserve the provisioner's URL (the Register handler fix is in
|
||||
// registry.go, this test covers the provisioner side).
|
||||
func TestBuildStartWorkspaceEnv(t *testing.T) {
|
||||
cfg := WorkspaceConfig{
|
||||
WorkspaceID: "ws-test-1",
|
||||
Tier: 1,
|
||||
PlatformURL: "http://platform:8080",
|
||||
}
|
||||
|
||||
find := func(env []string, key string) string {
|
||||
for _, e := range env {
|
||||
if len(e) >= len(key)+1 && e[:len(key)+1] == key+"=" {
|
||||
return e[len(key)+1:]
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
t.Run("default localhost (no env override)", func(t *testing.T) {
|
||||
t.Setenv("MOLECULE_WORKSPACE_ADVERTISE_HOST", "")
|
||||
env := buildStartWorkspaceEnv(cfg, "41751")
|
||||
got := find(env, "MOLECULE_WORKSPACE_URL")
|
||||
want := "http://localhost:41751"
|
||||
if got != want {
|
||||
t.Errorf("MOLECULE_WORKSPACE_URL = %q, want %q", got, want)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("env override (containerized-platform path)", func(t *testing.T) {
|
||||
t.Setenv("MOLECULE_WORKSPACE_ADVERTISE_HOST", "172.18.0.1")
|
||||
env := buildStartWorkspaceEnv(cfg, "33605")
|
||||
got := find(env, "MOLECULE_WORKSPACE_URL")
|
||||
want := "http://172.18.0.1:33605"
|
||||
if got != want {
|
||||
t.Errorf("MOLECULE_WORKSPACE_URL = %q, want %q", got, want)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("env injection comes AFTER buildContainerEnv (last-wins for docker -e)", func(t *testing.T) {
|
||||
t.Setenv("MOLECULE_WORKSPACE_ADVERTISE_HOST", "")
|
||||
env := buildStartWorkspaceEnv(cfg, "12345")
|
||||
var workspaceURLIdx, workspaceIDIdx int
|
||||
workspaceURLIdx, workspaceIDIdx = -1, -1
|
||||
for i, e := range env {
|
||||
if len(e) > 22 && e[:23] == "MOLECULE_WORKSPACE_URL=" {
|
||||
workspaceURLIdx = i
|
||||
}
|
||||
if len(e) > 12 && e[:13] == "WORKSPACE_ID=" {
|
||||
workspaceIDIdx = i
|
||||
}
|
||||
}
|
||||
if workspaceURLIdx == -1 {
|
||||
t.Fatal("MOLECULE_WORKSPACE_URL not in env")
|
||||
}
|
||||
if workspaceURLIdx <= workspaceIDIdx {
|
||||
t.Errorf("MOLECULE_WORKSPACE_URL (idx %d) must come AFTER buildContainerEnv entries (WORKSPACE_ID idx %d) for docker -e last-wins", workspaceURLIdx, workspaceIDIdx)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user