fix(provisioner,e2e): #2851 advertise Docker gateway IP when platform is containerized #2860

Merged
devops-engineer merged 8 commits from fix/2851-containerized-platform-advertise-host into main 2026-06-14 17:57:03 +00:00
7 changed files with 479 additions and 50 deletions
+16 -15
View File
@@ -324,13 +324,13 @@ jobs:
# even if the runner's $GITHUB_ENV propagation is flaky (#2468 RCA).
MOLECULE_ENV: development
SECRETS_ENCRYPTION_KEY: lpe2e-test-encryption-key-32bytes!!
# #2851: the provisioner now makes the workspace advertise http://localhost:<host-port>.
# When the platform itself runs inside Docker, the A2A proxy rewrites the
# stored http://127.0.0.1:<host-port> URL to ws-<id>:8000, so keep the job
# container on molecule-core-net and Docker-aware mode so that Docker-DNS
# name resolves. Dev mode already relaxes private-range SSRF checks, so SaaS
# deploy mode is no longer required.
MOLECULE_IN_DOCKER: true
# #2851 fix follow-up: act_runner executes this job container in host-network
# mode, so the platform process shares the host's loopback. The A2A proxy
# must therefore keep the stored http://127.0.0.1:<host-port> URL as-is;
# enabling Docker-aware rewrite would synthesize ws-<id>:8000, which the
# host-network platform cannot resolve. Leave MOLECULE_IN_DOCKER=false so
# the proxy uses the host-mapped port directly (same as lifecycle-stub).
MOLECULE_IN_DOCKER: false
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
@@ -356,14 +356,10 @@ jobs:
# uses for token-write helper containers.
docker pull alpine:3.20@sha256:c64c687cbea9300178b30c95835354e34c4e4febc4badfe27102879de0483b5e >/dev/null
docker network create molecule-core-net >/dev/null 2>&1 || true
# #2851: connect the act_runner job container to molecule-core-net so the
# platform process can resolve ws-<id>:8000 workspace URLs.
JOB_CID="$(hostname)"
if docker network connect molecule-core-net "$JOB_CID" >/dev/null 2>&1; then
echo "Connected job container ($JOB_CID) to molecule-core-net."
else
echo "WARN: could not connect job container to molecule-core-net (already connected or fallback)."
fi
# Workspace containers join molecule-core-net. The act_runner job
# container runs host-network, so connecting it to the bridge is not
# possible, but the bridge itself must exist for sibling workspace
# containers to communicate.
- name: Start Postgres (docker, ephemeral host port)
run: |
@@ -424,6 +420,11 @@ jobs:
fi
echo "PLATFORM_HOST_IP=${PLATFORM_HOST_IP}"
echo "PLATFORM_URL=http://${PLATFORM_HOST_IP}:${PORT}" >> "$GITHUB_ENV"
# The workspace advertises localhost:<host-port> (the default when
# MOLECULE_WORKSPACE_ADVERTISE_HOST is unset). Because this job runs
# host-network, the platform reaches the container via 127.0.0.1; no
# gateway-IP advertisement is needed here and keeping it unset lets
# /registry/register accept the localhost URL.
T="lpe2e-real-admin-${{ github.run_id }}-${{ github.run_attempt }}"
echo "ADMIN_TOKEN=${T}" >> "$GITHUB_ENV"
echo "MOLECULE_ADMIN_TOKEN=${T}" >> "$GITHUB_ENV"
+8 -4
View File
@@ -83,11 +83,15 @@ HOSTNAME = os.environ.get("HOSTNAME", "").strip() # docker sets this to the con
# address (0.0.0.0:8000, reachable as ws-<id>:8000 on the bridge) is what
# the proxy actually hits — independent of the URL string we register.
#
# Net: register a name-form localhost URL purely to satisfy push-mode's
# "url required + must pass SSRF check" and to get our auth_token. Routing is
# handled by the provisioner-stored 127.0.0.1 URL + the proxy rewrite.
# Net: register a host-reachable localhost URL. The provisioner injects the
# correct host-mapped port via MOLECULE_WORKSPACE_URL (#2851); prefer that and
# fall back to the legacy STUB_REGISTER_URL or the internal listen port.
_short = WORKSPACE_ID[:12] if len(WORKSPACE_ID) > 12 else WORKSPACE_ID
SELF_URL = os.environ.get("STUB_REGISTER_URL", f"http://localhost:{PORT}")
SELF_URL = (
os.environ.get("STUB_REGISTER_URL")
or os.environ.get("MOLECULE_WORKSPACE_URL")
or f"http://localhost:{PORT}"
)
CONFIG_PATH = (os.environ.get("WORKSPACE_CONFIG_PATH") or "/configs").rstrip("/")
AUTH_TOKEN_FILE = f"{CONFIG_PATH}/.auth_token"
@@ -522,10 +522,12 @@ done
if [ -n "$RUN" ]; then pass "container running: $RUN"; else fail "no running ws-${WSID} container within 10s of online" "docker ps shows none"; fi
# #2851: fail fast if the workspace advertised an unresolvable/unreachable URL.
# The provisioner now makes the runtime advertise http://localhost:<host-port>,
# which the platform stores as http://127.0.0.1:<host-port>. The A2A proxy
# rewrites that to ws-<id>:8000 when the platform runs inside Docker, so the
# URL stored in the registry should always be a host-reachable loopback address.
# The provisioner now makes the runtime advertise http://localhost:<host-port>
# by default, which the platform stores as http://127.0.0.1:<host-port>. The A2A
# proxy rewrites that to ws-<id>:8000 when the platform runs inside Docker, so
# the URL stored in the registry should always be a host-reachable address.
# When the platform itself is containerized, MOLECULE_WORKSPACE_ADVERTISE_HOST
# points the runtime at the Docker gateway IP instead; accept that too.
# (The end-to-end proxy reach in Step 5 is the real reachability proof; this
# assertion just surfaces hostname/DNS misconfiguration early.)
WS_URL_AFTER=$(ws_field "$WS" "url")
@@ -534,10 +536,13 @@ if [ -n "$WS_URL_AFTER" ]; then
else
fail "workspace URL is empty after reaching online" "registry row has no url"
fi
if echo "$WS_URL_AFTER" | grep -qE '^https?://(127\.0\.0\.1|localhost):'; then
pass "workspace registered a host-reachable loopback URL"
URL_HOST_AFTER="${WS_URL_AFTER#http://}"
URL_HOST_AFTER="${URL_HOST_AFTER#https://}"
URL_HOST_AFTER="${URL_HOST_AFTER%%:*}"
if [ "$URL_HOST_AFTER" = "127.0.0.1" ] || [ "$URL_HOST_AFTER" = "localhost" ] || [ "$URL_HOST_AFTER" = "${PLATFORM_HOST_IP:-}" ]; then
pass "workspace registered a host-reachable URL (host=$URL_HOST_AFTER)"
else
fail "workspace URL is not a host-reachable loopback address" "url=$WS_URL_AFTER"
fail "workspace URL is not a host-reachable address" "url=$WS_URL_AFTER expected localhost/127.0.0.1 or PLATFORM_HOST_IP=${PLATFORM_HOST_IP:-<unset>}"
fi
echo ""
+52 -2
View File
@@ -10,6 +10,7 @@ import (
"net/http"
"net/url"
"os"
"strconv"
"strings"
"sync"
"time"
@@ -21,6 +22,30 @@ import (
"github.com/gin-gonic/gin"
)
// isProvisionerHostPortURL reports whether u is a loopback URL with a port
// other than 8000. Such URLs are written by the provisioner (via
// MOLECULE_WORKSPACE_URL) and should be preserved across runtime registrations.
// The 8000 fallback is the runtime's own default and is allowed to be
// overwritten.
func isProvisionerHostPortURL(u string) bool {
if u == "" {
return false
}
if !strings.HasPrefix(u, "http://127.0.0.1:") && !strings.HasPrefix(u, "http://localhost:") {
return false
}
// Extract the trailing port.
port := u[strings.LastIndex(u, ":")+1:]
if port == "" {
return false
}
n, err := strconv.Atoi(port)
if err != nil {
return false
}
return n != 8000
}
// blockedRange is a named CIDR block so the conditional blocklist in
// validateAgentURL reads as a slice of homogeneous values instead of
// repeated anonymous struct literals.
@@ -578,8 +603,33 @@ func (h *RegistryHandler) Register(c *gin.Context) {
INSERT INTO workspaces (id, name, url, agent_card, status, last_heartbeat_at, delivery_mode, kind)
VALUES ($1, $2, $3, $4::jsonb, 'online', now(), $5, COALESCE(NULLIF($6, ''), 'workspace'))
ON CONFLICT (id) DO UPDATE SET
-- Preserve the provisioner-set host-port URL. The provisioner
-- injects MOLECULE_WORKSPACE_URL=<host-port> into the container
-- env (buildStartWorkspaceEnv in workspace-server), so the
-- runtime should register that same URL. The runtime's
-- resolve_workspace_url honors MOLECULE_WORKSPACE_URL at highest
-- precedence, so when the env propagation is correct, the
-- runtime's URL == provisioner's URL. When env propagation is
-- broken (real-image lifecycle E2E gap that bit 3 rounds
-- running), the runtime falls back to http://HOSTNAME:8000
-- — the port 8000 makes it distinguishable from the
-- provisioner's host-port (typically >30000). Preserve the
-- provisioner's URL when its port != 8000.
--
-- Researcher #11798: round-3 fix changed the provisioner
-- from http://127.0.0.1 to http://localhost (the
-- workspaceAdvertiseURL default), but the Register handler
-- only matched the legacy 127.0.0.1 prefix, so the upsert
-- overwrote the provisioner's URL with the runtime's 8000
-- fallback. Generalize to match any host-prefixed
-- host-port URL whose port != 8000.
url = CASE
WHEN workspaces.url LIKE 'http://127.0.0.1%' THEN workspaces.url
WHEN workspaces.url IS NOT NULL
AND workspaces.url != ''
AND (workspaces.url LIKE 'http://127.0.0.1:%'
OR workspaces.url LIKE 'http://localhost:%')
AND CAST(substring(workspaces.url FROM ':([0-9]+)$') AS int) <> 8000
THEN workspaces.url
ELSE EXCLUDED.url
END,
agent_card = EXCLUDED.agent_card,
@@ -617,7 +667,7 @@ func (h *RegistryHandler) Register(c *gin.Context) {
cachedURL := effectiveURL
var dbURL string
if err := db.DB.QueryRowContext(ctx, `SELECT url FROM workspaces WHERE id = $1`, payload.ID).Scan(&dbURL); err == nil {
if strings.HasPrefix(dbURL, "http://127.0.0.1") {
if isProvisionerHostPortURL(dbURL) {
cachedURL = dbURL
}
}
@@ -157,6 +157,44 @@ func statusOf(t *testing.T, conn *sql.DB, id string) string {
return status
}
// insertWorkspaceWithURL creates a workspace row with an explicit URL
// (the provisioner-set host-port URL the CASE-preservation tests need
// pre-populated). Returns the DB-generated UUID. Mirrors insertWorkspace
// but additionally writes the `url` column on insert.
func insertWorkspaceWithURL(t *testing.T, conn *sql.DB, name, status, url string) string {
t.Helper()
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
var id string
err := conn.QueryRowContext(ctx, `
INSERT INTO workspaces (name, status, parent_id, delivery_mode, url)
VALUES ($1, $2, NULL, 'push', NULLIF($3, ''))
RETURNING id
`, name, status, url).Scan(&id)
if err != nil {
t.Fatalf("insertWorkspaceWithURL(%s): %v", name, err)
}
return id
}
// urlOf reads a workspace row's current url, failing the test if the row
// is gone or the url is NULL. Used by the CASE-preservation tests to
// verify the upsert preserved (or did not preserve) the expected URL.
func urlOf(t *testing.T, conn *sql.DB, id string) string {
t.Helper()
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
var url sql.NullString
err := conn.QueryRowContext(ctx, `SELECT url FROM workspaces WHERE id = $1`, id).Scan(&url)
if err != nil {
t.Fatalf("urlOf(%s): %v", id, err)
}
if !url.Valid {
t.Fatalf("urlOf(%s): row has NULL url", id)
}
return url.String
}
// ---------------------------------------------------------------------------
// 1 — registry register/heartbeat row-state: the #73 tombstone guard.
//
@@ -176,8 +214,25 @@ const registerUpsertSQL = `
INSERT INTO workspaces (id, name, url, agent_card, status, last_heartbeat_at, delivery_mode)
VALUES ($1, $2, $3, $4::jsonb, 'online', now(), $5)
ON CONFLICT (id) DO UPDATE SET
-- Preserve the provisioner-set host-port URL. The provisioner
-- injects MOLECULE_WORKSPACE_URL=<host-port> into the container
-- env (buildStartWorkspaceEnv in workspace-server), so the
-- runtime should register that same URL. The runtime's
-- resolve_workspace_url honors MOLECULE_WORKSPACE_URL at highest
-- precedence, so when the env propagation is correct, the
-- runtime's URL == provisioner's URL. When env propagation is
-- broken (real-image lifecycle E2E gap that bit 3 rounds
-- running), the runtime falls back to http://HOSTNAME:8000
-- — the port 8000 makes it distinguishable from the
-- provisioner's host-port (typically >30000). Preserve the
-- provisioner's URL when its port != 8000.
url = CASE
WHEN workspaces.url LIKE 'http://127.0.0.1%' THEN workspaces.url
WHEN workspaces.url IS NOT NULL
AND workspaces.url != ''
AND (workspaces.url LIKE 'http://127.0.0.1:%'
OR workspaces.url LIKE 'http://localhost:%')
AND CAST(substring(workspaces.url FROM ':([0-9]+)$') AS int) <> 8000
THEN workspaces.url
ELSE EXCLUDED.url
END,
agent_card = EXCLUDED.agent_card,
@@ -237,6 +292,114 @@ func TestIntegration_RegistryRowState_RegisterUpsertsLiveWorkspaceToOnline(t *te
}
}
// TestIntegration_RegistryRowState_RegisterPreservesProvisionerHostPort covers
// the #2851 round-4 / Researcher #11798 close-out: when the provisioner has
// already set a host-port URL on the row (e.g. http://localhost:41751 via
// buildStartWorkspaceEnv) and the runtime re-registers with the same
// host-port URL (or any non-8000 URL on the localhost/127.0.0.1 prefix),
// the upsert must preserve the EXISTING URL (not overwrite with EXCLUDED).
// Regression of the round-3 11798 gap.
func TestIntegration_RegistryRowState_RegisterPreservesProvisionerHostPort(t *testing.T) {
conn := integrationAuthDB(t)
ctx := context.Background()
id := insertWorkspaceWithURL(t, conn, "hostport-ws", "online", "http://localhost:41751")
// Re-register with the SAME host-port URL. The provisioner would have
// injected this same URL via MOLECULE_WORKSPACE_URL, so the runtime's
// EXCLUDED.url == existing url. The CASE should preserve either way.
if _, err := conn.ExecContext(ctx, registerUpsertSQL,
id, id, "http://localhost:41751", `{"name":"x"}`, "push"); err != nil {
t.Fatalf("register upsert: %v", err)
}
if got := urlOf(t, conn, id); got != "http://localhost:41751" {
t.Fatalf("host-port URL not preserved: got %q, want http://localhost:41751", got)
}
// Re-register with a DIFFERENT host-port URL on the same prefix. The
// provisioner is allowed to update the host-port (e.g. after a restart
// allocated a new port). The CASE should still preserve the row's
// existing host-port URL — the runtime shouldn't be able to silently
// rewrite the port without re-provisioning.
if _, err := conn.ExecContext(ctx, registerUpsertSQL,
id, id, "http://localhost:50000", `{"name":"x"}`, "push"); err != nil {
t.Fatalf("register upsert 2: %v", err)
}
if got := urlOf(t, conn, id); got != "http://localhost:41751" {
t.Fatalf("host-port URL changed on re-register: got %q, want preserved http://localhost:41751", got)
}
// Same with the legacy 127.0.0.1 prefix (back-compat).
id2 := insertWorkspaceWithURL(t, conn, "hostport-ws-legacy", "online", "http://127.0.0.1:33605")
if _, err := conn.ExecContext(ctx, registerUpsertSQL,
id2, "hostport-ws-legacy", "http://127.0.0.1:33605", `{"name":"x"}`, "push"); err != nil {
t.Fatalf("register upsert 3 (127.0.0.1): %v", err)
}
if got := urlOf(t, conn, id2); got != "http://127.0.0.1:33605" {
t.Fatalf("legacy 127.0.0.1 host-port URL not preserved: got %q, want http://127.0.0.1:33605", got)
}
}
// TestIntegration_RegistryRowState_RegisterOverwritesRuntime8000 covers the
// "runtime's wrong localhost:8000 fallback overwriting the provisioner's
// host-port" case: the CASE excludes port 8000 (which is the runtime's
// listen-port fallback, NOT the provisioner's host-port) so the upsert
// overwrites with EXCLUDED.url — but EXCLUDED.url in this scenario is
// ALSO 8000, so the net result is the row's URL is 8000 (which is
// wrong but reflects the runtime's broken env propagation — the
// underlying fix is the buildStartWorkspaceEnv injection, not the CASE).
func TestIntegration_RegistryRowState_RegisterOverwritesRuntime8000(t *testing.T) {
conn := integrationAuthDB(t)
ctx := context.Background()
// Provisioner set the legacy 127.0.0.1:<port> URL (pre-round-3 path).
id := insertWorkspaceWithURL(t, conn, "port8000-ws", "online", "http://127.0.0.1:8000")
// Runtime re-registers with its localhost:8000 fallback.
if _, err := conn.ExecContext(ctx, registerUpsertSQL,
id, id, "http://localhost:8000", `{"name":"x"}`, "push"); err != nil {
t.Fatalf("register upsert: %v", err)
}
// CASE excludes port 8000 → falls through to EXCLUDED.url → row = localhost:8000.
if got := urlOf(t, conn, id); got != "http://localhost:8000" {
t.Fatalf("port-8000 case: got %q, want http://localhost:8000 (CASE excluded, EXCLUDED wins)", got)
}
// And vice-versa (existing localhost:8000 + runtime 127.0.0.1:8000):
id2 := insertWorkspaceWithURL(t, conn, "port8000-ws-2", "online", "http://localhost:8000")
if _, err := conn.ExecContext(ctx, registerUpsertSQL,
id2, "port8000-ws-2", "http://127.0.0.1:8000", `{"name":"x"}`, "push"); err != nil {
t.Fatalf("register upsert 2: %v", err)
}
if got := urlOf(t, conn, id2); got != "http://127.0.0.1:8000" {
t.Fatalf("port-8000 reverse: got %q, want http://127.0.0.1:8000", got)
}
}
// TestIntegration_RegistryRowState_RegisterOverwritesNonLocalhost covers the
// "the row's existing URL is a non-localhost URL (e.g. https://example.com/foo
// or http://192.168.1.100:8080) — the CASE only matches localhost/127.0.0.1
// prefix, so non-localhost URLs are NOT preserved; EXCLUDED.url wins".
// This is the defense-in-depth: non-localhost URLs that shouldn't be in
// workspaces.url (SSRF defense) get overwritten by the runtime's
// validateAgentURL-checked URL.
func TestIntegration_RegistryRowState_RegisterOverwritesNonLocalhost(t *testing.T) {
conn := integrationAuthDB(t)
ctx := context.Background()
// Legacy state: a non-localhost URL snuck into the row (shouldn't happen
// in practice post-#1130, but if it did, the upsert must not preserve
// it — EXCLUDED.url is the new URL and the runtime's
// validateAgentURL already gated it).
id := insertWorkspaceWithURL(t, conn, "nonlocal-ws", "online", "http://192.168.1.100:8080")
if _, err := conn.ExecContext(ctx, registerUpsertSQL,
id, id, "http://localhost:41751", `{"name":"x"}`, "push"); err != nil {
t.Fatalf("register upsert: %v", err)
}
if got := urlOf(t, conn, id); got != "http://localhost:41751" {
t.Fatalf("non-localhost URL not overwritten: got %q, want http://localhost:41751", got)
}
}
func TestIntegration_RegistryRowState_HeartbeatDoesNotResurrectRemoved(t *testing.T) {
conn := integrationAuthDB(t)
ctx := context.Background()
@@ -12,6 +12,7 @@ import (
"io"
"log"
"net"
"net/url"
"os"
"path/filepath"
"runtime"
@@ -561,8 +562,60 @@ func allocateHostPort() (string, error) {
// handler layer then stores the equivalent http://127.0.0.1:<port> URL and
// the A2A proxy rewrites it to ws-<id>:8000 when the platform itself runs
// inside Docker.
//
// #2851 follow-up: when the platform also runs inside a container, the
// workspace must advertise the Docker host/gateway IP (PLATFORM_HOST_IP) so
// the platform container can reach the host-mapped workspace port. The
// operator sets MOLECULE_WORKSPACE_ADVERTISE_HOST to override the default
// "localhost".
func workspaceAdvertiseURL(hostPort string) string {
return fmt.Sprintf("http://localhost:%s", hostPort)
host := os.Getenv("MOLECULE_WORKSPACE_ADVERTISE_HOST")
if host == "" {
host = "localhost"
}
return fmt.Sprintf("http://%s:%s", host, hostPort)
}
// buildStartWorkspaceEnv assembles the full env for the workspace container,
// including the platform-injected MOLECULE_WORKSPACE_URL. Extracted from
// Start() so the production-path env injection is unit-testable without a
// Docker daemon (Researcher #11798 / #11787 close-out — the gap that bit
// 3 rounds running: in real-image lifecycle E2E, the runtime's resolve_workspace_url
// fell back to http://localhost:8000 because the provisioner's
// MOLECULE_WORKSPACE_URL injection was not exercised in tests, and the
// Register handler's URL-preservation CASE only matched the legacy
// 127.0.0.1 prefix — so the upsert overwrote the host-port URL with the
// runtime's 8000 fallback. Both gaps closed: this helper ensures the
// injection happens, and the Register handler's CASE now also matches
// the localhost prefix the provisioner uses after round-3).
func buildStartWorkspaceEnv(cfg WorkspaceConfig, hostPort string) []string {
return append(buildContainerEnv(cfg),
fmt.Sprintf("MOLECULE_WORKSPACE_URL=%s", workspaceAdvertiseURL(hostPort)))
}
// resolveStartWorkspaceHostURL computes the hostURL that StartWorkspace
// should return to the platform. The host comes from workspaceAdvertiseURL
// (env override → localhost); the port starts at hostPort and is swapped
// to boundPort if Docker bound a different one.
//
// #2851 (registration-path fix): the persisted hostURL must be the
// host-reachable advertise URL (not 127.0.0.1), so ProxyA2A's
// resolveAgentURL doesn't rewrite it to the internal Docker hostname
// (ws-<id>:8000) and isSafeURL then reject it. The env override lets
// the operator point the runtime at the Docker host/gateway IP in
// containerized-platform mode; the default "localhost" preserves the
// pre-#2851 behavior when the platform runs on the host directly.
func resolveStartWorkspaceHostURL(hostPort, boundPort string) string {
hostURL := workspaceAdvertiseURL(hostPort)
if boundPort == "" || boundPort == hostPort {
return hostURL
}
u, err := url.Parse(hostURL)
if err != nil || u.Hostname() == "" {
return hostURL
}
return fmt.Sprintf("http://%s:%s", u.Hostname(), boundPort)
}
// Start provisions and starts a workspace container.
@@ -595,12 +648,17 @@ func (p *Provisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string, e
}
log.Printf("Provisioner: config volume %s ready", configVolume)
// #2851: tell the runtime exactly which URL to advertise. localhost is
// accepted by registry validateAgentURL; the handler layer then stores the
// equivalent http://127.0.0.1:<port> URL and the A2A proxy rewrites it to
// ws-<id>:8000 when the platform itself runs inside Docker.
// #2851 (round-4 production-injection fix): tell the runtime exactly which
// URL to advertise. The helper injects MOLECULE_WORKSPACE_URL=<host-port>
// into the container env (host-port URL, not the runtime's listen-port
// 8000 fallback). The runtime's resolve_workspace_url honors
// MOLECULE_WORKSPACE_URL at highest precedence, so the registered URL
// matches the host-port the provisioner allocated — preventing the
// "registered as localhost:8000 but the host-port is 41751" gap
// that bit 3 rounds running in production (the real-image lifecycle
// E2E ProxyA2A path).
env := buildStartWorkspaceEnv(cfg, hostPort)
advertiseURL := workspaceAdvertiseURL(hostPort)
env := append(buildContainerEnv(cfg), fmt.Sprintf("MOLECULE_WORKSPACE_URL=%s", advertiseURL))
image, imgErr := selectImage(cfg)
if imgErr != nil {
@@ -821,10 +879,17 @@ func (p *Provisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string, e
// /configs and /workspace, then drops to agent via gosu). No per-start
// chown needed here.
// #2851: use the pre-allocated host port directly. The inspect loop below
// is kept as a verification that Docker bound the expected port, but the
// stable value comes from allocateHostPort above.
hostURL := fmt.Sprintf("http://127.0.0.1:%s", hostPort)
// #2851 (registration-path fix): use the host-reachable advertise URL
// (not just 127.0.0.1) as the hostURL persisted in the DB. When the
// platform itself runs inside an act_runner container, 127.0.0.1 in
// the workspace URL would force ProxyA2A's resolveAgentURL to rewrite
// to the internal Docker hostname (ws-<id>:8000), which isSafeURL then
// rejects as "workspace URL is not publicly routable". By persisting
// the advertise URL (172.18.0.1:<port> in containerized dev, or the
// operator's host IP in prod), the platform-facing URL is host-
// reachable, the resolveAgentURL rewrite doesn't kick in, and the
// dev-mode SSRF relaxation allows the 172.18/16 private range.
hostURL := resolveStartWorkspaceHostURL(hostPort, "")
for attempt := 0; attempt < 3; attempt++ {
info, inspectErr := p.cli.ContainerInspect(ctx, resp.ID)
if inspectErr != nil {
@@ -837,13 +902,9 @@ func (p *Provisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string, e
if attempt < 2 {
time.Sleep(500 * time.Millisecond) // wait for Docker to bind the port
} else {
log.Printf("Provisioner: container %s did not bind expected host port %s; falling back to inspect value", name, hostPort)
log.Printf("Provisioner: container %s did not bind expected host port %s; falling back to bound port (keeping advertise host)", name, hostPort)
if len(portBindings) > 0 {
boundIP := portBindings[0].HostIP
if boundIP == "" {
boundIP = "127.0.0.1"
}
hostURL = fmt.Sprintf("http://%s:%s", boundIP, portBindings[0].HostPort)
hostURL = resolveStartWorkspaceHostURL(hostPort, portBindings[0].HostPort)
}
}
}
@@ -1669,9 +1669,154 @@ func TestAllocateHostPort(t *testing.T) {
}
func TestWorkspaceAdvertiseURL(t *testing.T) {
got := workspaceAdvertiseURL("12345")
want := "http://localhost:12345"
if got != want {
t.Errorf("workspaceAdvertiseURL = %q, want %q", got, want)
}
t.Run("default localhost", func(t *testing.T) {
got := workspaceAdvertiseURL("12345")
want := "http://localhost:12345"
if got != want {
t.Errorf("workspaceAdvertiseURL = %q, want %q", got, want)
}
})
t.Run("MOLECULE_WORKSPACE_ADVERTISE_HOST override", func(t *testing.T) {
t.Setenv("MOLECULE_WORKSPACE_ADVERTISE_HOST", "192.168.1.100")
got := workspaceAdvertiseURL("12345")
want := "http://192.168.1.100:12345"
if got != want {
t.Errorf("workspaceAdvertiseURL = %q, want %q", got, want)
}
})
t.Run("empty env defaults to localhost", func(t *testing.T) {
t.Setenv("MOLECULE_WORKSPACE_ADVERTISE_HOST", "")
got := workspaceAdvertiseURL("12345")
want := "http://localhost:12345"
if got != want {
t.Errorf("workspaceAdvertiseURL = %q, want %q", got, want)
}
})
}
// TestResolveStartWorkspaceHostURL covers the #2851 registration-path fix:
// the hostURL StartWorkspace persists in the DB must be the host-reachable
// advertise URL (not 127.0.0.1) so ProxyA2A's resolveAgentURL doesn't
// rewrite it to the internal Docker hostname. Each subtest pins the env
// var then asserts the hostURL for both the initial and inspect-fallback
// paths.
func TestResolveStartWorkspaceHostURL(t *testing.T) {
t.Run("default localhost (no env override)", func(t *testing.T) {
t.Setenv("MOLECULE_WORKSPACE_ADVERTISE_HOST", "")
got := resolveStartWorkspaceHostURL("12345", "")
want := "http://localhost:12345"
if got != want {
t.Errorf("resolveStartWorkspaceHostURL = %q, want %q", got, want)
}
})
t.Run("env override, no bound-port swap", func(t *testing.T) {
t.Setenv("MOLECULE_WORKSPACE_ADVERTISE_HOST", "172.18.0.1")
got := resolveStartWorkspaceHostURL("33605", "")
want := "http://172.18.0.1:33605"
if got != want {
t.Errorf("resolveStartWorkspaceHostURL = %q, want %q", got, want)
}
})
t.Run("env override, bound port differs (inspect-fallback path keeps advertise host)", func(t *testing.T) {
t.Setenv("MOLECULE_WORKSPACE_ADVERTISE_HOST", "172.18.0.1")
// pre-allocated hostPort was 33605 but Docker bound 33606.
// Final URL must use the advertise host + the bound port.
got := resolveStartWorkspaceHostURL("33605", "33606")
want := "http://172.18.0.1:33606"
if got != want {
t.Errorf("resolveStartWorkspaceHostURL = %q, want %q", got, want)
}
})
t.Run("env override, bound port matches (no-op)", func(t *testing.T) {
t.Setenv("MOLECULE_WORKSPACE_ADVERTISE_HOST", "192.168.65.2")
got := resolveStartWorkspaceHostURL("8080", "8080")
want := "http://192.168.65.2:8080"
if got != want {
t.Errorf("resolveStartWorkspaceHostURL = %q, want %q", got, want)
}
})
t.Run("no env override, bound port swap (legacy 127.0.0.1 case)", func(t *testing.T) {
t.Setenv("MOLECULE_WORKSPACE_ADVERTISE_HOST", "")
// default "localhost" but bound port differs
got := resolveStartWorkspaceHostURL("8080", "9090")
want := "http://localhost:9090"
if got != want {
t.Errorf("resolveStartWorkspaceHostURL = %q, want %q", got, want)
}
})
}
// TestBuildStartWorkspaceEnv covers the #2851 production-path env injection
// gap that bit 3 rounds running (Researcher #11798 / #11787 close-out).
// The provisioner's Start() must inject MOLECULE_WORKSPACE_URL=<host-port>
// into the container env so the runtime's resolve_workspace_url
// (highest precedence for the env var) returns the same host-port URL
// the provisioner persisted. When env propagation is broken
// (the real-image lifecycle E2E gap), the runtime falls back to
// http://HOSTNAME:8000 — the Register handler's upsert must then
// preserve the provisioner's URL (the Register handler fix is in
// registry.go, this test covers the provisioner side).
func TestBuildStartWorkspaceEnv(t *testing.T) {
cfg := WorkspaceConfig{
WorkspaceID: "ws-test-1",
Tier: 1,
PlatformURL: "http://platform:8080",
}
find := func(env []string, key string) string {
for _, e := range env {
if len(e) >= len(key)+1 && e[:len(key)+1] == key+"=" {
return e[len(key)+1:]
}
}
return ""
}
t.Run("default localhost (no env override)", func(t *testing.T) {
t.Setenv("MOLECULE_WORKSPACE_ADVERTISE_HOST", "")
env := buildStartWorkspaceEnv(cfg, "41751")
got := find(env, "MOLECULE_WORKSPACE_URL")
want := "http://localhost:41751"
if got != want {
t.Errorf("MOLECULE_WORKSPACE_URL = %q, want %q", got, want)
}
})
t.Run("env override (containerized-platform path)", func(t *testing.T) {
t.Setenv("MOLECULE_WORKSPACE_ADVERTISE_HOST", "172.18.0.1")
env := buildStartWorkspaceEnv(cfg, "33605")
got := find(env, "MOLECULE_WORKSPACE_URL")
want := "http://172.18.0.1:33605"
if got != want {
t.Errorf("MOLECULE_WORKSPACE_URL = %q, want %q", got, want)
}
})
t.Run("env injection comes AFTER buildContainerEnv (last-wins for docker -e)", func(t *testing.T) {
t.Setenv("MOLECULE_WORKSPACE_ADVERTISE_HOST", "")
env := buildStartWorkspaceEnv(cfg, "12345")
var workspaceURLIdx, workspaceIDIdx int
workspaceURLIdx, workspaceIDIdx = -1, -1
for i, e := range env {
if len(e) > 22 && e[:23] == "MOLECULE_WORKSPACE_URL=" {
workspaceURLIdx = i
}
if len(e) > 12 && e[:13] == "WORKSPACE_ID=" {
workspaceIDIdx = i
}
}
if workspaceURLIdx == -1 {
t.Fatal("MOLECULE_WORKSPACE_URL not in env")
}
if workspaceURLIdx <= workspaceIDIdx {
t.Errorf("MOLECULE_WORKSPACE_URL (idx %d) must come AFTER buildContainerEnv entries (WORKSPACE_ID idx %d) for docker -e last-wins", workspaceURLIdx, workspaceIDIdx)
}
})
}