fix(provisioner): #2851 inject MOLECULE_WORKSPACE_URL for local-provision real-image #2854
@@ -320,12 +320,13 @@ jobs:
|
||||
# even if the runner's $GITHUB_ENV propagation is flaky (#2468 RCA).
|
||||
MOLECULE_ENV: development
|
||||
SECRETS_ENCRYPTION_KEY: lpe2e-test-encryption-key-32bytes!!
|
||||
# act_runner runs the job inside a Docker container, so /.dockerenv exists
|
||||
# and the platform auto-detects platformInDocker=true. But the job container
|
||||
# is NOT on molecule-core-net, so it cannot resolve workspace container
|
||||
# hostnames (ws-<id>:8000). Force false so the proxy keeps using the
|
||||
# host-mapped 127.0.0.1:<ephemeral_port> URL, which IS reachable.
|
||||
MOLECULE_IN_DOCKER: false
|
||||
# #2851: the provisioner now makes the workspace advertise http://localhost:<host-port>.
|
||||
# When the platform itself runs inside Docker, the A2A proxy rewrites the
|
||||
# stored http://127.0.0.1:<host-port> URL to ws-<id>:8000, so keep the job
|
||||
# container on molecule-core-net and Docker-aware mode so that Docker-DNS
|
||||
# name resolves. Dev mode already relaxes private-range SSRF checks, so SaaS
|
||||
# deploy mode is no longer required.
|
||||
MOLECULE_IN_DOCKER: true
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
|
||||
@@ -351,6 +352,14 @@ jobs:
|
||||
# uses for token-write helper containers.
|
||||
docker pull alpine:3.20@sha256:c64c687cbea9300178b30c95835354e34c4e4febc4badfe27102879de0483b5e >/dev/null
|
||||
docker network create molecule-core-net >/dev/null 2>&1 || true
|
||||
# #2851: connect the act_runner job container to molecule-core-net so the
|
||||
# platform process can resolve ws-<id>:8000 workspace URLs.
|
||||
JOB_CID="$(hostname)"
|
||||
if docker network connect molecule-core-net "$JOB_CID" >/dev/null 2>&1; then
|
||||
echo "Connected job container ($JOB_CID) to molecule-core-net."
|
||||
else
|
||||
echo "WARN: could not connect job container to molecule-core-net (already connected or fallback)."
|
||||
fi
|
||||
|
||||
- name: Start Postgres (docker, ephemeral host port)
|
||||
run: |
|
||||
|
||||
@@ -441,6 +441,11 @@ if [ "$LIFECYCLE_LLM" = "minimax" ]; then
|
||||
echo " secret write (MODEL_PROVIDER): $(echo "$SECP" | head -c 120)"
|
||||
fi
|
||||
|
||||
# #2851: the provisioner now injects MOLECULE_WORKSPACE_URL automatically so the
|
||||
# runtime advertises a host-reachable URL (http://localhost:<host-port>), which
|
||||
# the A2A proxy rewrites to ws-<id>:8000 when the platform runs inside Docker.
|
||||
# No manual workspace-secret injection is needed here.
|
||||
|
||||
# Seed config.yaml directly into the named config volume so the provision (and
|
||||
# every later restart) has a config source. Create's byok-no-cred abort never
|
||||
# wrote it, and this dev stack ships no claude-code template in the platform's
|
||||
@@ -515,6 +520,25 @@ for _ in $(seq 1 10); do
|
||||
sleep 1
|
||||
done
|
||||
if [ -n "$RUN" ]; then pass "container running: $RUN"; else fail "no running ws-${WSID} container within 10s of online" "docker ps shows none"; fi
|
||||
|
||||
# #2851: fail fast if the workspace advertised an unresolvable/unreachable URL.
|
||||
# The provisioner now makes the runtime advertise http://localhost:<host-port>,
|
||||
# which the platform stores as http://127.0.0.1:<host-port>. The A2A proxy
|
||||
# rewrites that to ws-<id>:8000 when the platform runs inside Docker, so the
|
||||
# URL stored in the registry should always be a host-reachable loopback address.
|
||||
# (The end-to-end proxy reach in Step 5 is the real reachability proof; this
|
||||
# assertion just surfaces hostname/DNS misconfiguration early.)
|
||||
WS_URL_AFTER=$(ws_field "$WS" "url")
|
||||
if [ -n "$WS_URL_AFTER" ]; then
|
||||
pass "workspace registered a non-empty URL: $WS_URL_AFTER"
|
||||
else
|
||||
fail "workspace URL is empty after reaching online" "registry row has no url"
|
||||
fi
|
||||
if echo "$WS_URL_AFTER" | grep -qE '^https?://(127\.0\.0\.1|localhost):'; then
|
||||
pass "workspace registered a host-reachable loopback URL"
|
||||
else
|
||||
fail "workspace URL is not a host-reachable loopback address" "url=$WS_URL_AFTER"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
@@ -520,6 +521,34 @@ func InternalURL(workspaceID string) string {
|
||||
return fmt.Sprintf("http://%s:%s", ContainerName(workspaceID), DefaultPort)
|
||||
}
|
||||
|
||||
// allocateHostPort binds a temporary TCP listener on 127.0.0.1:0 and returns
|
||||
// the allocated ephemeral port. The listener is closed before returning, so
|
||||
// the port is free for Docker to bind. This lets the provisioner advertise a
|
||||
// stable, host-reachable workspace URL (http://localhost:<port>) before the
|
||||
// container exists, eliminating the race where the runtime registers its
|
||||
// unresolvable Docker container-id hostname.
|
||||
func allocateHostPort() (string, error) {
|
||||
l, err := net.Listen("tcp", "127.0.0.1:0")
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to allocate ephemeral host port: %w", err)
|
||||
}
|
||||
defer l.Close()
|
||||
addr, ok := l.Addr().(*net.TCPAddr)
|
||||
if !ok {
|
||||
return "", fmt.Errorf("unexpected listener address type %T", l.Addr())
|
||||
}
|
||||
return strconv.Itoa(addr.Port), nil
|
||||
}
|
||||
|
||||
// workspaceAdvertiseURL returns the URL the runtime should advertise at
|
||||
// register time. localhost is accepted by registry validateAgentURL; the
|
||||
// handler layer then stores the equivalent http://127.0.0.1:<port> URL and
|
||||
// the A2A proxy rewrites it to ws-<id>:8000 when the platform itself runs
|
||||
// inside Docker.
|
||||
func workspaceAdvertiseURL(hostPort string) string {
|
||||
return fmt.Sprintf("http://localhost:%s", hostPort)
|
||||
}
|
||||
|
||||
// Start provisions and starts a workspace container.
|
||||
func (p *Provisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string, error) {
|
||||
if p == nil || p.cli == nil {
|
||||
@@ -530,8 +559,18 @@ func (p *Provisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string, e
|
||||
// already exists, so pre-deploy workspace data is not orphaned.
|
||||
configVolume := p.resolveConfigVolumeName(ctx, cfg.WorkspaceID)
|
||||
|
||||
// #2851: allocate a stable host port BEFORE building container env so the
|
||||
// runtime can advertise a host-reachable URL. The alternative — letting
|
||||
// Docker pick an ephemeral port and inspecting after start — leaves the
|
||||
// runtime guessing its own address and registering an unresolvable
|
||||
// container-id hostname.
|
||||
hostPort, err := allocateHostPort()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Create named volume for configs (idempotent — no-op if already exists)
|
||||
_, err := p.cli.VolumeCreate(ctx, volume.CreateOptions{
|
||||
_, err = p.cli.VolumeCreate(ctx, volume.CreateOptions{
|
||||
Name: configVolume,
|
||||
Labels: managedLabels(),
|
||||
})
|
||||
@@ -540,7 +579,12 @@ func (p *Provisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string, e
|
||||
}
|
||||
log.Printf("Provisioner: config volume %s ready", configVolume)
|
||||
|
||||
env := buildContainerEnv(cfg)
|
||||
// #2851: tell the runtime exactly which URL to advertise. localhost is
|
||||
// accepted by registry validateAgentURL; the handler layer then stores the
|
||||
// equivalent http://127.0.0.1:<port> URL and the A2A proxy rewrites it to
|
||||
// ws-<id>:8000 when the platform itself runs inside Docker.
|
||||
advertiseURL := workspaceAdvertiseURL(hostPort)
|
||||
env := append(buildContainerEnv(cfg), fmt.Sprintf("MOLECULE_WORKSPACE_URL=%s", advertiseURL))
|
||||
|
||||
image, imgErr := selectImage(cfg)
|
||||
if imgErr != nil {
|
||||
@@ -646,7 +690,7 @@ func (p *Provisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string, e
|
||||
RestartPolicy: container.RestartPolicy{Name: "unless-stopped"},
|
||||
PortBindings: nat.PortMap{
|
||||
nat.Port(DefaultPort + "/tcp"): []nat.PortBinding{
|
||||
{HostIP: "127.0.0.1", HostPort: ""}, // Ephemeral host port
|
||||
{HostIP: "127.0.0.1", HostPort: hostPort}, // Pre-allocated stable host port (#2851)
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -761,30 +805,34 @@ func (p *Provisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string, e
|
||||
// /configs and /workspace, then drops to agent via gosu). No per-start
|
||||
// chown needed here.
|
||||
|
||||
// Resolve the host-mapped port. Retry inspect up to 3 times if Docker hasn't
|
||||
// bound the ephemeral port yet (rare race under heavy load).
|
||||
hostURL := InternalURL(cfg.WorkspaceID) // fallback to Docker-internal
|
||||
// #2851: use the pre-allocated host port directly. The inspect loop below
|
||||
// is kept as a verification that Docker bound the expected port, but the
|
||||
// stable value comes from allocateHostPort above.
|
||||
hostURL := fmt.Sprintf("http://127.0.0.1:%s", hostPort)
|
||||
for attempt := 0; attempt < 3; attempt++ {
|
||||
info, inspectErr := p.cli.ContainerInspect(ctx, resp.ID)
|
||||
if inspectErr != nil {
|
||||
break
|
||||
}
|
||||
portBindings := info.NetworkSettings.Ports[nat.Port(DefaultPort+"/tcp")]
|
||||
if len(portBindings) > 0 {
|
||||
hostPort := portBindings[0].HostPort
|
||||
hostIP := portBindings[0].HostIP
|
||||
if hostIP == "" {
|
||||
hostIP = "127.0.0.1"
|
||||
}
|
||||
hostURL = fmt.Sprintf("http://%s:%s", hostIP, hostPort)
|
||||
if len(portBindings) > 0 && portBindings[0].HostPort == hostPort {
|
||||
break
|
||||
}
|
||||
if attempt < 2 {
|
||||
time.Sleep(500 * time.Millisecond) // wait for Docker to bind the port
|
||||
} else {
|
||||
log.Printf("Provisioner: container %s did not bind expected host port %s; falling back to inspect value", name, hostPort)
|
||||
if len(portBindings) > 0 {
|
||||
boundIP := portBindings[0].HostIP
|
||||
if boundIP == "" {
|
||||
boundIP = "127.0.0.1"
|
||||
}
|
||||
hostURL = fmt.Sprintf("http://%s:%s", boundIP, portBindings[0].HostPort)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log.Printf("Provisioner: started container %s for workspace %s at %s (internal: %s)", name, cfg.WorkspaceID, hostURL, InternalURL(cfg.WorkspaceID))
|
||||
log.Printf("Provisioner: started container %s for workspace %s at %s (advertise: %s, internal: %s)", name, cfg.WorkspaceID, hostURL, advertiseURL, InternalURL(cfg.WorkspaceID))
|
||||
return hostURL, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"context"
|
||||
"errors"
|
||||
"io"
|
||||
"net"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
@@ -1633,3 +1634,44 @@ func TestApplyTierResources(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- #2851 host-port advertisement ----------
|
||||
|
||||
func TestAllocateHostPort(t *testing.T) {
|
||||
port, err := allocateHostPort()
|
||||
if err != nil {
|
||||
t.Fatalf("allocateHostPort failed: %v", err)
|
||||
}
|
||||
if port == "" {
|
||||
t.Fatal("allocateHostPort returned empty port")
|
||||
}
|
||||
if port == "0" {
|
||||
t.Fatalf("allocateHostPort returned port 0")
|
||||
}
|
||||
|
||||
// Verify the port is actually numeric and in the ephemeral range.
|
||||
n, err := strconv.Atoi(port)
|
||||
if err != nil {
|
||||
t.Fatalf("allocateHostPort returned non-numeric port %q: %v", port, err)
|
||||
}
|
||||
if n < 1024 || n > 65535 {
|
||||
t.Fatalf("allocateHostPort returned out-of-range port %d", n)
|
||||
}
|
||||
|
||||
// Verify the port is genuinely free for Docker to bind. (We don't assert
|
||||
// uniqueness across two calls — the OS may immediately reuse a just-closed
|
||||
// ephemeral port under heavy load.)
|
||||
l, err := net.Listen("tcp", "127.0.0.1:"+port)
|
||||
if err != nil {
|
||||
t.Fatalf("allocateHostPort returned port %s that is not bindable: %v", port, err)
|
||||
}
|
||||
l.Close()
|
||||
}
|
||||
|
||||
func TestWorkspaceAdvertiseURL(t *testing.T) {
|
||||
got := workspaceAdvertiseURL("12345")
|
||||
want := "http://localhost:12345"
|
||||
if got != want {
|
||||
t.Errorf("workspaceAdvertiseURL = %q, want %q", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user