Merge pull request #2817 from Molecule-AI/staging
staging → main: auto-promote 856c967
This commit is contained in:
commit
9a835ef631
2
.github/workflows/canary-staging.yml
vendored
2
.github/workflows/canary-staging.yml
vendored
@ -302,7 +302,7 @@ jobs:
|
||||
-X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"confirm\":\"$slug\"}" >/tmp/canary-cleanup.code 2>/dev/null
|
||||
-d "{\"confirm\":\"$slug\"}" >/tmp/canary-cleanup.code
|
||||
set -e
|
||||
code=$(cat /tmp/canary-cleanup.code 2>/dev/null || echo "000")
|
||||
if [ "$code" = "200" ] || [ "$code" = "204" ]; then
|
||||
|
||||
2
.github/workflows/e2e-staging-canvas.yml
vendored
2
.github/workflows/e2e-staging-canvas.yml
vendored
@ -199,7 +199,7 @@ jobs:
|
||||
-X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"confirm\":\"$slug\"}" >/tmp/canvas-cleanup.code 2>/dev/null
|
||||
-d "{\"confirm\":\"$slug\"}" >/tmp/canvas-cleanup.code
|
||||
set -e
|
||||
code=$(cat /tmp/canvas-cleanup.code 2>/dev/null || echo "000")
|
||||
if [ "$code" = "200" ] || [ "$code" = "204" ]; then
|
||||
|
||||
2
.github/workflows/e2e-staging-external.yml
vendored
2
.github/workflows/e2e-staging-external.yml
vendored
@ -166,7 +166,7 @@ jobs:
|
||||
-X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"confirm\":\"$slug\"}" >/tmp/external-cleanup.code 2>/dev/null
|
||||
-d "{\"confirm\":\"$slug\"}" >/tmp/external-cleanup.code
|
||||
set -e
|
||||
code=$(cat /tmp/external-cleanup.code 2>/dev/null || echo "000")
|
||||
if [ "$code" = "200" ] || [ "$code" = "204" ]; then
|
||||
|
||||
2
.github/workflows/e2e-staging-saas.yml
vendored
2
.github/workflows/e2e-staging-saas.yml
vendored
@ -231,7 +231,7 @@ jobs:
|
||||
-X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"confirm\":\"$slug\"}" >/tmp/saas-cleanup.code 2>/dev/null
|
||||
-d "{\"confirm\":\"$slug\"}" >/tmp/saas-cleanup.code
|
||||
set -e
|
||||
code=$(cat /tmp/saas-cleanup.code 2>/dev/null || echo "000")
|
||||
if [ "$code" = "200" ] || [ "$code" = "204" ]; then
|
||||
|
||||
2
.github/workflows/e2e-staging-sanity.yml
vendored
2
.github/workflows/e2e-staging-sanity.yml
vendored
@ -155,7 +155,7 @@ jobs:
|
||||
-X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"confirm\":\"$slug\"}" >/tmp/sanity-cleanup.code 2>/dev/null
|
||||
-d "{\"confirm\":\"$slug\"}" >/tmp/sanity-cleanup.code
|
||||
set -e
|
||||
code=$(cat /tmp/sanity-cleanup.code 2>/dev/null || echo "000")
|
||||
if [ "$code" = "200" ] || [ "$code" = "204" ]; then
|
||||
|
||||
@ -200,8 +200,11 @@ jobs:
|
||||
-H "Authorization: Bearer $CP_ADMIN_API_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-X POST "$CP_URL/cp/admin/tenants/redeploy-fleet" \
|
||||
-d "$BODY" >"$HTTP_CODE_FILE" 2>/dev/null
|
||||
-d "$BODY" >"$HTTP_CODE_FILE"
|
||||
set -e
|
||||
# Stderr from curl (e.g. dial errors with -sS) goes to the runner
|
||||
# log so operators can see WHY a connection failed. Stdout is
|
||||
# captured to $HTTP_CODE_FILE because that's where -w writes.
|
||||
HTTP_CODE=$(cat "$HTTP_CODE_FILE" 2>/dev/null || echo "000")
|
||||
[ -z "$HTTP_CODE" ] && HTTP_CODE="000"
|
||||
|
||||
|
||||
@ -160,8 +160,10 @@ jobs:
|
||||
-H "Authorization: Bearer $CP_STAGING_ADMIN_API_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-X POST "$CP_URL/cp/admin/tenants/redeploy-fleet" \
|
||||
-d "$BODY" >"$HTTP_CODE_FILE" 2>/dev/null
|
||||
-d "$BODY" >"$HTTP_CODE_FILE"
|
||||
set -e
|
||||
# Stderr from curl (-sS shows dial errors etc.) goes to the
|
||||
# runner log so operators can see WHY a connection failed.
|
||||
HTTP_CODE=$(cat "$HTTP_CODE_FILE" 2>/dev/null || echo "000")
|
||||
[ -z "$HTTP_CODE" ] && HTTP_CODE="000"
|
||||
|
||||
|
||||
3
.github/workflows/sweep-stale-e2e-orgs.yml
vendored
3
.github/workflows/sweep-stale-e2e-orgs.yml
vendored
@ -167,8 +167,9 @@ jobs:
|
||||
-X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"confirm\":\"$slug\"}" >/tmp/del_code 2>/dev/null
|
||||
-d "{\"confirm\":\"$slug\"}" >/tmp/del_code
|
||||
set -e
|
||||
# Stderr from curl (-sS shows dial errors etc.) goes to runner log.
|
||||
http_code=$(cat /tmp/del_code 2>/dev/null || echo "000")
|
||||
if [ "$http_code" = "200" ] || [ "$http_code" = "204" ]; then
|
||||
deleted=$((deleted+1))
|
||||
|
||||
@ -163,7 +163,7 @@ func (h *WorkspaceHandler) maybeMarkContainerDead(ctx context.Context, workspace
|
||||
if wsRuntime == "external" {
|
||||
return false
|
||||
}
|
||||
if h.provisioner == nil && h.cpProv == nil {
|
||||
if !h.HasProvisioner() {
|
||||
return false
|
||||
}
|
||||
|
||||
|
||||
@ -175,8 +175,18 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
|
||||
h.broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_ONLINE", id, map[string]interface{}{
|
||||
"name": ws.Name, "external": true,
|
||||
})
|
||||
} else if h.provisioner != nil {
|
||||
// Provision container
|
||||
} else if h.workspace.HasProvisioner() {
|
||||
// Provision container — either backend (CP for SaaS, local Docker
|
||||
// for self-hosted) is fine. Pre-2026-05-05 this gate was
|
||||
// `h.provisioner != nil`, which only checked the Docker pointer
|
||||
// and silently dropped every workspace on a SaaS tenant: the prep
|
||||
// block was skipped, no Auto call ever fired, and the row sat in
|
||||
// 'provisioning' until the 600s sweeper marked it failed with the
|
||||
// misleading "container started but never called /registry/register"
|
||||
// (incident: hongming tenant org-import 2026-05-05 01:14, 7-of-7
|
||||
// claude-code workspaces stuck). Routing to the right backend
|
||||
// happens inside provisionWorkspaceAuto — this gate just decides
|
||||
// whether to do prep at all.
|
||||
payload := models.CreateWorkspacePayload{
|
||||
Name: ws.Name, Tier: tier, Runtime: runtime, Model: model,
|
||||
WorkspaceDir: ws.WorkspaceDir,
|
||||
|
||||
@ -112,21 +112,43 @@ func (h *WorkspaceHandler) SetCPProvisioner(cp provisioner.CPProvisionerAPI) {
|
||||
h.cpProv = cp
|
||||
}
|
||||
|
||||
// HasProvisioner reports whether either backend (CP or local Docker) is
|
||||
// wired. Callers that gate prep-work on "do we have something that can
|
||||
// provision a container?" should use this rather than direct field access
|
||||
// to either provisioner — those individual checks miss the SaaS path
|
||||
// (cpProv set, provisioner nil) or the self-hosted path (provisioner set,
|
||||
// cpProv nil) symmetrically. Org-import + future bulk paths gate their
|
||||
// template/config/secret prep on this so the work isn't wasted on
|
||||
// deployments where no backend is available.
|
||||
func (h *WorkspaceHandler) HasProvisioner() bool {
|
||||
return h.cpProv != nil || h.provisioner != nil
|
||||
}
|
||||
|
||||
// provisionWorkspaceAuto picks the backend (CP for SaaS, local Docker
|
||||
// for self-hosted) and starts provisioning in a goroutine. Returns true
|
||||
// when a backend was kicked off, false when neither is wired (caller
|
||||
// owns the persist-config + mark-failed surface in that case).
|
||||
// when a backend was kicked off, false when neither is wired.
|
||||
//
|
||||
// Centralized so every caller — Create, TeamHandler.Expand, future
|
||||
// paths — gets the same routing. Pre-2026-05-04 TeamHandler.Expand
|
||||
// hardcoded provisionWorkspace (Docker) and silently broke the
|
||||
// "deploy a team on SaaS" flow: child workspace rows were created with
|
||||
// no EC2 instance, the runtime never ran, and the 600s sweeper logged
|
||||
// the misleading "container started but never called /registry/register".
|
||||
// Single source of truth for "start provisioning a workspace" across
|
||||
// every caller (Create, OrgHandler.createWorkspaceTree, TeamHandler.Expand,
|
||||
// future paths). Centralized routing here means callers don't repeat
|
||||
// the "Docker vs CP" decision and can't drift on it.
|
||||
//
|
||||
// Self-marks-failed on the no-backend path: pre-2026-05-05 the false
|
||||
// return was silent, and any caller that forgot to handle it (TeamHandler
|
||||
// pre-#2367, OrgHandler.createWorkspaceTree pre-this-fix) silently
|
||||
// dropped workspaces — they sat in 'provisioning' for 10 min until the
|
||||
// sweeper marked them failed with the misleading "container started but
|
||||
// never called /registry/register" message. Marking failed inside Auto
|
||||
// closes that class: even if a future caller bypasses HasProvisioner
|
||||
// gating or ignores the bool return, the workspace ends in a clean
|
||||
// failed state with an actionable error message.
|
||||
//
|
||||
// Architectural principle: templates own runtime/config/prompts/files/
|
||||
// plugins; the platform owns where it runs. Anything that picks
|
||||
// between CP and local Docker belongs in this one helper.
|
||||
// between CP and local Docker belongs in this one helper. Anything
|
||||
// post-routing-but-pre-Start (mint secrets, render template, etc.)
|
||||
// lives in prepareProvisionContext (shared by both per-backend
|
||||
// goroutines).
|
||||
func (h *WorkspaceHandler) provisionWorkspaceAuto(workspaceID, templatePath string, configFiles map[string][]byte, payload models.CreateWorkspacePayload) bool {
|
||||
if h.cpProv != nil {
|
||||
go h.provisionWorkspaceCP(workspaceID, templatePath, configFiles, payload)
|
||||
@ -136,6 +158,15 @@ func (h *WorkspaceHandler) provisionWorkspaceAuto(workspaceID, templatePath stri
|
||||
go h.provisionWorkspace(workspaceID, templatePath, configFiles, payload)
|
||||
return true
|
||||
}
|
||||
// No backend wired — mark failed so the workspace doesn't linger in
|
||||
// 'provisioning' for the full 10-minute sweep window. 10s is enough
|
||||
// for the broadcast + single UPDATE inside markProvisionFailed.
|
||||
log.Printf("provisionWorkspaceAuto: no provisioning backend wired for %s — marking failed (cpProv=nil, provisioner=nil)", workspaceID)
|
||||
failCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
h.markProvisionFailed(failCtx, workspaceID,
|
||||
"no provisioning backend available — workspace requires either a Docker daemon (self-hosted) or control-plane provisioner (SaaS)",
|
||||
nil)
|
||||
return false
|
||||
}
|
||||
|
||||
@ -565,29 +596,22 @@ func (h *WorkspaceHandler) Create(c *gin.Context) {
|
||||
}
|
||||
|
||||
// Auto-provision — pick backend: control plane (SaaS) or Docker (self-hosted).
|
||||
// Routing is centralized in provisionWorkspaceAuto so every caller
|
||||
// (Create, TeamHandler.Expand, future paths) gets the same backend
|
||||
// selection. Pre-2026-05-04 the team-deploy path hardcoded the
|
||||
// Docker route, so on a SaaS tenant 7-of-7 sub-agents were created
|
||||
// as DB rows but had no EC2 — symptom: "container started but never
|
||||
// called /registry/register" + diagnose returns "docker client not
|
||||
// configured". Centralizing here closes that drift class.
|
||||
// Routing AND the no-backend mark-failed path are both inside
|
||||
// provisionWorkspaceAuto (single source of truth). The Create-specific
|
||||
// extra is the workspace_config UPSERT below: when no backend is
|
||||
// wired, Auto marks the row failed but doesn't persist the bare
|
||||
// runtime/model/tier as JSON — the Config tab needs that to render
|
||||
// even on failed workspaces, so Create owns this Create-only side
|
||||
// effect rather than coupling Auto to a UI concern.
|
||||
if !h.provisionWorkspaceAuto(id, templatePath, configFiles, payload) {
|
||||
// No Docker available (SaaS tenant). Persist basic config as JSON
|
||||
// so the Config tab shows the correct runtime/model/name. Then mark
|
||||
// the workspace as failed with a clear message.
|
||||
cfgJSON := fmt.Sprintf(`{"name":%q,"runtime":%q,"tier":%d,"template":%q}`,
|
||||
payload.Name, payload.Runtime, payload.Tier, payload.Template)
|
||||
db.DB.ExecContext(ctx, `
|
||||
if _, err := db.DB.ExecContext(ctx, `
|
||||
INSERT INTO workspace_config (workspace_id, data) VALUES ($1, $2::jsonb)
|
||||
ON CONFLICT (workspace_id) DO UPDATE SET data = $2::jsonb
|
||||
`, id, cfgJSON)
|
||||
db.DB.ExecContext(ctx,
|
||||
`UPDATE workspaces SET status = $1, last_sample_error = 'Docker not available — workspace containers require a Docker daemon or external provisioning.', updated_at = now() WHERE id = $2`, models.StatusFailed, id)
|
||||
h.broadcaster.RecordAndBroadcast(ctx, "WORKSPACE_PROVISION_FAILED", id, map[string]interface{}{
|
||||
"error": "Docker not available on this platform instance",
|
||||
})
|
||||
log.Printf("Create: no Docker daemon — workspace %s config persisted, marked failed", id)
|
||||
`, id, cfgJSON); err != nil {
|
||||
log.Printf("Create: workspace_config persist failed for %s: %v", id, err)
|
||||
}
|
||||
}
|
||||
|
||||
c.JSON(http.StatusCreated, gin.H{
|
||||
|
||||
@ -67,12 +67,27 @@ func (r *trackingCPProv) startedSnapshot() []string {
|
||||
return out
|
||||
}
|
||||
|
||||
// TestProvisionWorkspaceAuto_NoBackendReturnsFalse — when neither
|
||||
// cpProv nor provisioner is wired, the dispatcher returns false so the
|
||||
// caller knows it must own the persist + mark-failed path. Pre-fix,
|
||||
// TeamHandler had no equivalent fallback at all and silently dropped
|
||||
// children on the floor.
|
||||
func TestProvisionWorkspaceAuto_NoBackendReturnsFalse(t *testing.T) {
|
||||
// TestProvisionWorkspaceAuto_NoBackendMarksFailed — when neither cpProv
|
||||
// nor provisioner is wired, the dispatcher must:
|
||||
// 1. Return false (so the caller can do its own extra cleanup if
|
||||
// needed — Create persists workspace_config for the Config tab).
|
||||
// 2. Mark the workspace failed via markProvisionFailed (defense in
|
||||
// depth: if a future caller bypasses the bool return, the workspace
|
||||
// still doesn't sit stuck in 'provisioning' for 10 min until the
|
||||
// sweeper fires).
|
||||
//
|
||||
// Pre-2026-05-05 the false return was silent and TeamHandler /
|
||||
// OrgHandler.createWorkspaceTree dropped workspaces on the floor when
|
||||
// they ignored it. This test pins the new contract that Auto owns the
|
||||
// failed-mark on no-backend.
|
||||
func TestProvisionWorkspaceAuto_NoBackendMarksFailed(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
mock.MatchExpectationsInOrder(false)
|
||||
// markProvisionFailed does a single UPDATE workspaces ... SET status='failed'.
|
||||
mock.ExpectExec(`UPDATE workspaces SET status =`).
|
||||
WithArgs(sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
bcast := &concurrentSafeBroadcaster{}
|
||||
h := NewWorkspaceHandler(bcast, nil, "http://localhost:8080", t.TempDir())
|
||||
// Do NOT call SetCPProvisioner — both backends nil.
|
||||
@ -83,6 +98,9 @@ func TestProvisionWorkspaceAuto_NoBackendReturnsFalse(t *testing.T) {
|
||||
if ok {
|
||||
t.Fatalf("expected provisionWorkspaceAuto to return false with no backend wired")
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("expected markProvisionFailed UPDATE to fire on no-backend path: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestProvisionWorkspaceAuto_RoutesToCPWhenSet — when cpProv is set
|
||||
@ -285,3 +303,132 @@ func TestOrgImport_UsesAutoNotDirectDockerPath(t *testing.T) {
|
||||
t.Errorf("org_import.go must call h.workspace.provisionWorkspaceAuto for child provisioning — current code does not")
|
||||
}
|
||||
}
|
||||
|
||||
// TestHasProvisioner_TrueOnCPOnly — SaaS tenants run with cpProv set and
|
||||
// the local Docker provisioner nil. HasProvisioner must report true so
|
||||
// gate-y callers (org-import prep block) don't skip provisioning.
|
||||
//
|
||||
// Pre-2026-05-05 the org-import gate checked `h.provisioner != nil`
|
||||
// directly — false on SaaS — and the entire provisioning prep block was
|
||||
// skipped. The Auto call inside the block was unreachable; PR #2798's
|
||||
// "route through Auto" fix didn't help because the gate fired earlier.
|
||||
// Symptom: 7-workspace org-import on hongming sat in 'provisioning' for
|
||||
// the full 10-minute sweep window.
|
||||
func TestHasProvisioner_TrueOnCPOnly(t *testing.T) {
|
||||
bcast := &concurrentSafeBroadcaster{}
|
||||
h := NewWorkspaceHandler(bcast, nil, "http://localhost:8080", t.TempDir())
|
||||
h.SetCPProvisioner(&trackingCPProv{})
|
||||
if !h.HasProvisioner() {
|
||||
t.Errorf("HasProvisioner() == false with cpProv wired (Docker nil) — every gate that uses this would skip provisioning on SaaS, reproducing the hongming 7-workspace stuck-in-provisioning incident from 2026-05-05")
|
||||
}
|
||||
}
|
||||
|
||||
// TestHasProvisioner_TrueOnDockerOnly — self-hosted operators run with
|
||||
// the local Docker provisioner wired and cpProv nil. HasProvisioner must
|
||||
// report true.
|
||||
func TestHasProvisioner_TrueOnDockerOnly(t *testing.T) {
|
||||
bcast := &concurrentSafeBroadcaster{}
|
||||
// NewWorkspaceHandler guards the typed-nil-interface trap (workspace.go
|
||||
// docstring) — pass a real *Provisioner stub via the test fixture
|
||||
// rather than a nil pointer cast to the interface.
|
||||
h := NewWorkspaceHandler(bcast, &provisioner.Provisioner{}, "http://localhost:8080", t.TempDir())
|
||||
if !h.HasProvisioner() {
|
||||
t.Errorf("HasProvisioner() == false with Docker wired (cpProv nil) — would break self-hosted operators")
|
||||
}
|
||||
}
|
||||
|
||||
// TestHasProvisioner_FalseWhenNeitherWired — misconfigured deployment
|
||||
// with neither backend reachable. HasProvisioner must report false so
|
||||
// the org-import prep block is skipped (no point doing template/secret
|
||||
// prep work when nothing can run the resulting container).
|
||||
func TestHasProvisioner_FalseWhenNeitherWired(t *testing.T) {
|
||||
bcast := &concurrentSafeBroadcaster{}
|
||||
h := NewWorkspaceHandler(bcast, nil, "http://localhost:8080", t.TempDir())
|
||||
if h.HasProvisioner() {
|
||||
t.Errorf("HasProvisioner() == true with no backend wired — gate should short-circuit and not waste prep cycles")
|
||||
}
|
||||
}
|
||||
|
||||
// TestNoBareBothNilCheck — source-level pin: any code that wants to ask
|
||||
// "is no backend wired?" must use !HasProvisioner(), not the verbose
|
||||
// `h.provisioner == nil && h.cpProv == nil` shape. Two reasons:
|
||||
//
|
||||
// 1. Single source of truth — when a third backend lands (k8s,
|
||||
// containerd, whatever), HasProvisioner gets the new field added in
|
||||
// one place. Bare both-nil checks each need to be hunted down.
|
||||
// 2. Symmetry — easier to read `!h.HasProvisioner()` and know the
|
||||
// intent than to mentally evaluate `nil && nil`.
|
||||
//
|
||||
// Allowed exception: workspace.go's HasProvisioner() definition itself.
|
||||
// Test files are also exempt — assertions on internal field state are
|
||||
// fine.
|
||||
func TestNoBareBothNilCheck(t *testing.T) {
|
||||
wd, err := os.Getwd()
|
||||
if err != nil {
|
||||
t.Fatalf("getwd: %v", err)
|
||||
}
|
||||
entries, err := os.ReadDir(wd)
|
||||
if err != nil {
|
||||
t.Fatalf("readdir: %v", err)
|
||||
}
|
||||
bareShapes := []string{
|
||||
"h.provisioner == nil && h.cpProv == nil",
|
||||
"h.cpProv == nil && h.provisioner == nil",
|
||||
}
|
||||
for _, entry := range entries {
|
||||
name := entry.Name()
|
||||
if filepath.Ext(name) != ".go" {
|
||||
continue
|
||||
}
|
||||
// Allow tests (legitimate field-state assertions).
|
||||
if len(name) > len("_test.go") &&
|
||||
name[len(name)-len("_test.go"):] == "_test.go" {
|
||||
continue
|
||||
}
|
||||
// workspace.go houses HasProvisioner's definition + can reference
|
||||
// the fields directly — but with the !HasProvisioner() refactor
|
||||
// it shouldn't contain the bare both-nil shape any more.
|
||||
src, err := os.ReadFile(filepath.Join(wd, name))
|
||||
if err != nil {
|
||||
t.Fatalf("read %s: %v", name, err)
|
||||
}
|
||||
for _, needle := range bareShapes {
|
||||
if bytes.Contains(src, []byte(needle)) {
|
||||
t.Errorf("%s contains bare `%s` — must use `!h.HasProvisioner()` for SSOT.", name, needle)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestOrgImportGate_UsesHasProvisionerNotBareField — source-level pin
|
||||
// for the org-import gate. Pre-fix the gate read `h.provisioner != nil`,
|
||||
// which checked only the Docker pointer and silently dropped every
|
||||
// SaaS workspace. The fix routes through HasProvisioner so both
|
||||
// backends count.
|
||||
//
|
||||
// Substring match because the failure shape is "wrong field" — a plain
|
||||
// text gate suffices, same rationale as TestTeamExpand_UsesAutoNotDirectDockerPath
|
||||
// above.
|
||||
func TestOrgImportGate_UsesHasProvisionerNotBareField(t *testing.T) {
|
||||
wd, err := os.Getwd()
|
||||
if err != nil {
|
||||
t.Fatalf("getwd: %v", err)
|
||||
}
|
||||
src, err := os.ReadFile(filepath.Join(wd, "org_import.go"))
|
||||
if err != nil {
|
||||
t.Fatalf("read org_import.go: %v", err)
|
||||
}
|
||||
// The provisioning gate is the `else if ...` clause that follows the
|
||||
// `if ws.External {` external-workspace branch. If org_import.go
|
||||
// reintroduces a bare `h.provisioner` check there, every SaaS tenant
|
||||
// silently drops org-imported workspaces again. Auto's nil check is
|
||||
// the right routing layer; the gate just decides whether to do prep
|
||||
// work at all, and HasProvisioner is the symmetric question.
|
||||
if bytes.Contains(src, []byte("} else if h.provisioner != nil {")) {
|
||||
t.Errorf("org_import.go gates the provisioning prep block on `h.provisioner != nil` (bare Docker check) — must use `h.workspace.HasProvisioner()` so SaaS tenants (cpProv set, provisioner nil) reach the Auto call. " +
|
||||
"Repro: 2026-05-05 hongming org-import incident — 7 claude-code workspaces stuck in 'provisioning' for 10 min because the gate skipped the entire block on SaaS, hiding the Auto call PR #2798 introduced.")
|
||||
}
|
||||
if !bytes.Contains(src, []byte("h.workspace.HasProvisioner()")) {
|
||||
t.Errorf("org_import.go must call h.workspace.HasProvisioner() in the provisioning gate — current code does not")
|
||||
}
|
||||
}
|
||||
|
||||
@ -115,7 +115,7 @@ func (h *WorkspaceHandler) Restart(c *gin.Context) {
|
||||
// available — previously only `provisioner` was checked, which broke
|
||||
// restart entirely on every SaaS tenant (the workspace EC2 couldn't
|
||||
// be terminated + relaunched, the endpoint 503'd on every try).
|
||||
if h.provisioner == nil && h.cpProv == nil {
|
||||
if !h.HasProvisioner() {
|
||||
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "provisioner not available"})
|
||||
return
|
||||
}
|
||||
@ -360,7 +360,7 @@ func (h *WorkspaceHandler) RestartByID(workspaceID string) {
|
||||
// reactive auto-restart on every SaaS tenant (where the local Docker
|
||||
// provisioner is intentionally nil). The runRestartCycle below now
|
||||
// branches on which one is set for the Stop call.
|
||||
if h.provisioner == nil && h.cpProv == nil {
|
||||
if !h.HasProvisioner() {
|
||||
return
|
||||
}
|
||||
coalesceRestart(workspaceID, func() { h.runRestartCycle(workspaceID) })
|
||||
@ -657,7 +657,7 @@ func (h *WorkspaceHandler) Resume(c *gin.Context) {
|
||||
// Accept either provisioner (Docker self-hosted OR CP SaaS). See the
|
||||
// same guard in Restart above for context — Resume previously 503'd
|
||||
// on every SaaS tenant.
|
||||
if h.provisioner == nil && h.cpProv == nil {
|
||||
if !h.HasProvisioner() {
|
||||
c.JSON(http.StatusServiceUnavailable, gin.H{"error": "provisioner not available"})
|
||||
return
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user