feat(provisioner): #2843 #24 PR-B — fetcher selection (real Gitea for SaaS, no-op for self-host) + public-fetch wire #2903

Merged
devops-engineer merged 3 commits from feat/pr-b-template-asset-channel into main 2026-06-15 07:31:23 +00:00
5 changed files with 348 additions and 35 deletions
+53 -6
View File
@@ -63,6 +63,32 @@ import (
"github.com/gin-gonic/gin"
)
// isSaaSDeployment reports whether this tenant platform is
// running in SaaS cross-EC2 mode (mirrors handlers.saasMode;
// duplicated here because the helpers package is unexported
// and main.go is a separate package — would be a cycle).
//
// Resolution order:
// 1. MOLECULE_DEPLOY_MODE set — explicit operator flag is authoritative.
// "saas" → true. "self-hosted"/"selfhosted"/"standalone" → false.
// Unknown values log a warning + fall closed to false.
// 2. MOLECULE_DEPLOY_MODE unset — fall back to MOLECULE_ORG_ID presence.
func isSaaSDeployment() bool {
raw := strings.TrimSpace(os.Getenv("MOLECULE_DEPLOY_MODE"))
if raw != "" {
switch strings.ToLower(raw) {
case "saas":
return true
case "self-hosted", "selfhosted", "standalone":
return false
default:
log.Printf("isSaaSDeployment: MOLECULE_DEPLOY_MODE=%q not recognised; falling back to strict (non-SaaS) mode. Valid values: saas | self-hosted.", raw)
return false
}
}
return strings.TrimSpace(os.Getenv("MOLECULE_ORG_ID")) != ""
}
func main() {
// .env auto-load: in dev, the operator keeps MOLECULE_ENV /
// DATABASE_URL / etc. in the monorepo's .env file. Loading it here
@@ -257,12 +283,33 @@ func main() {
// unconfigured tenants. baseURL has a production default
// (https://git.moleculesai.app) but is overridable for staging
// or per-deployment Gitea mirrors.
if token := templateRepoToken(); token != "" {
baseURL := envOr("MOLECULE_GITEA_BASE_URL", "https://git.moleculesai.app")
wh.SetGiteaTemplateFetcher(provisioner.NewGiteaTemplateAssetFetcher(baseURL, token, nil))
log.Printf("template repo fetcher: wired (baseURL=%q, token set)", baseURL)
} else {
log.Printf("template repo fetcher: MOLECULE_TEMPLATE_REPO_TOKEN unset; fetcher disabled (self-host default / SCAFFOLD-gate skip)")
// PR-B keystone (RFC #2843 #24): wire the template-asset fetcher
// via the selection helper. SaaS deployments get the real
// Gitea fetcher (public-fetch when MOLECULE_TEMPLATE_REPO_TOKEN
// is empty per the CTO public-fetch GO; authenticated when set
// for the future private-template / rate-limit CTO-grant item).
// Self-host deployments get the no-op fetcher (self-host uses
// the local TemplatePath + ConfigFiles path for /configs and
// does not need an external asset channel). The token is
// OPTIONAL for SaaS (the molecule-ai/* template repos are
// PUBLIC — verified: GET /repos/.../archive/main.tar.gz returns
// 200 with no Authorization header). baseURL has a production
// default (https://git.moleculesai.app) but is overridable via
// MOLECULE_GITEA_BASE_URL for staging or per-deployment Gitea
// mirrors.
token := templateRepoToken()
baseURL := envOr("MOLECULE_GITEA_BASE_URL", "https://git.moleculesai.app")
sel := provisioner.SelectTemplateAssetFetcher(isSaaSDeployment, baseURL, token)
wh.SetGiteaTemplateFetcher(sel.Fetcher)
switch sel.Mode {
case "self-host-noop":
log.Printf("template repo fetcher: wired (no-op — self-host default, no external asset channel)")
default:
if sel.Authenticated {
log.Printf("template repo fetcher: wired (baseURL=%q, SaaS, token set — authenticated)", baseURL)
} else {
log.Printf("template repo fetcher: wired (baseURL=%q, SaaS, no token — public unauthenticated fetch)", baseURL)
}
}
// Self-hosted platform-agent boot-provision (Change 1). The line-128 seed
@@ -35,12 +35,24 @@ package provisioner
//
// Auth: per-identity READ-ONLY Gitea token. The token is threaded
// from Infisical SSOT (per #2676 program) into the fetcher via the
// Token field. The token MUST be a per-identity PAT scoped to the
// template repo with read-only access — NOT a founder PAT, NOT a
// workspace-admin PAT. Workspace-server never logs or echoes the
// token (the httpClient logs are scrubbed via the standard net/http
// strip-header path; the token is in the Authorization header,
// which net/http does not log by default).
// Token field. When the token is set, it MUST be a per-identity PAT
// scoped to the template repo with read-only access — NOT a founder
// PAT, NOT a workspace-admin PAT.
//
// PUBLIC FETCH (empty token): when the token is empty, the fetcher
// performs an UNAUTHENTICATED request — the Authorization header
// is OMITTED ENTIRELY (not sent as "token " with an empty value,
// which Gitea would 401 as a malformed credential). This enables
// the public-fetch activation: SaaS tenants without a configured
// MOLECULE_TEMPLATE_REPO_TOKEN can still fetch molecule-ai/*
// templates, which are PUBLIC repos on the Gitea instance. Self-
// host callers use the no-op fetcher and never reach this code path.
//
// Workspace-server never logs or echoes the token (the httpClient
// logs are scrubbed via the standard net/http strip-header path;
// the token is in the Authorization header, which net/http does
// not log by default). The empty-token path sends NO header at all
// (strictly less information disclosure than a populated header).
//
// NO SIZE CAP: the existing #2845 acbc0da9 added a 16MiB bound
// for TemplateAssets in collectCPConfigFiles (separate from the
@@ -126,13 +138,20 @@ func parseTemplateIdentity(identity string) (owner, repo, ref string, err error)
// Load fetches the template's tarball archive and returns the
// allowlisted asset map. See the package doc-comment for the full
// transport + allowlist contract.
//
// PUBLIC FETCH (empty token): when f.token is empty, the fetcher
// sends an UNAUTHENTICATED request (NO Authorization header). This
// is the public-fetch activation that lets SaaS tenants without a
// configured MOLECULE_TEMPLATE_REPO_TOKEN fetch PUBLIC template
// repos. The earlier code rejected an empty token at Load time
// (forcing SaaS-no-token tenants to fetch ZERO templates — a
// runtime defect caught by the driver in #2903 review). Empty
// token + non-empty token both go through the same code path
// below; the only difference is the optional Authorization header.
func (f *giteaTemplateAssetFetcher) Load(ctx context.Context, templateIdentity string) (map[string][]byte, error) {
if f.baseURL == "" {
return nil, errors.New("giteaTemplateAssetFetcher: baseURL is empty")
}
if f.token == "" {
return nil, errors.New("giteaTemplateAssetFetcher: token is empty (per-identity READ-ONLY Gitea PAT required)")
}
owner, repo, ref, err := parseTemplateIdentity(templateIdentity)
if err != nil {
return nil, fmt.Errorf("giteaTemplateAssetFetcher: %w", err)
@@ -143,7 +162,13 @@ func (f *giteaTemplateAssetFetcher) Load(ctx context.Context, templateIdentity s
if err != nil {
return nil, fmt.Errorf("giteaTemplateAssetFetcher: build request: %w", err)
}
req.Header.Set("Authorization", "token "+f.token)
// Only set Authorization when a token is configured. Sending
// "token " with an empty value would be a malformed credential
// that Gitea 401s on — strictly worse than sending no header at
// all (which is what the public path needs).
if f.token != "" {
req.Header.Set("Authorization", "token "+f.token)
}
req.Header.Set("Accept", "application/gzip, application/octet-stream")
resp, err := f.httpClient.Do(req)
@@ -158,20 +158,101 @@ func TestGiteaTemplateAssetFetcher_FailsClosedOnTransportError(t *testing.T) {
}
}
// TestGiteaTemplateAssetFetcher_RejectsEmptyToken pins the
// security guard: an empty token (which would otherwise be
// sent as "Authorization: token " with no credential) is
// rejected at construction time. A forgotten token init would
// otherwise silently fail-against-anonymous-requests, which
// Gitea would 401 on — better to fail loud at Load time.
func TestGiteaTemplateAssetFetcher_RejectsEmptyToken(t *testing.T) {
f := NewGiteaTemplateAssetFetcher("http://example.com", "", nil)
_, err := f.Load(context.Background(), "owner/repo@main")
if err == nil {
t.Fatal("expected error on empty token, got nil (security guard violated)")
// TestGiteaTemplateAssetFetcher_EmptyToken_OmitsAuthHeader pins
// the public-fetch activation (driver RC 11907 on #2903, the
// runtime defect that the prior code rejected an empty token and
// left SaaS-no-token tenants with ZERO templates). The new
// contract: empty token → UNAUTHENTICATED request (Authorization
// header is OMITTED, NOT sent as "token " with an empty value,
// which Gitea 401s on as a malformed credential). This is the
// load-bearing pin that catches a regression to the buggy
// "always send Authorization" behavior.
//
// The test asserts three things:
// 1. NO "Authorization" header is set on the outgoing request
// (the request map's lookup returns the zero value and the
// "explicit" map presence is false — net/http normalizes
// headers into a map[string][]string where unset keys are
// simply absent, so a missing key and a ""-valued key are
// distinguishable).
// 2. Load returns no error (a request was issued and a 200
// response was processed into assets).
// 3. Assets are returned (the public-fetch path actually
// delivered a payload, not an empty map).
func TestGiteaTemplateAssetFetcher_EmptyToken_OmitsAuthHeader(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Pin: NO Authorization header should reach the server.
if vals, ok := r.Header["Authorization"]; ok {
t.Errorf("expected NO Authorization header, got %q (empty-token public-fetch must omit, not send \"token \" with empty value)", vals)
}
w.Header().Set("Content-Type", "application/gzip")
gz := gzip.NewWriter(w)
tw := tar.NewWriter(gz)
mustWriteTar(t, tw, "repo-sha/config.yaml", []byte("# public-template config\n"))
_ = tw.Close()
_ = gz.Close()
}))
defer srv.Close()
f := NewGiteaTemplateAssetFetcher(srv.URL, "", nil)
assets, err := f.Load(context.Background(), "owner/repo@main")
if err != nil {
t.Fatalf("Load: %v (empty-token public-fetch should succeed, NOT error)", err)
}
if !strings.Contains(err.Error(), "token") {
t.Errorf("error should mention token, got: %v", err)
mustHaveKey(t, assets, "config.yaml")
if len(assets) != 1 {
t.Errorf("expected 1 asset, got %d: %v", len(assets), keysOf(assets))
}
}
// TestGiteaTemplateAssetFetcher_EmptyToken_RealHTTP_NoAuthHeader_Success
// is the dispatch-required end-to-end pin: an empty token results
// in a real httptest-served HTTP request that has NO Authorization
// header, returns 200, and the fetcher's Load returns the parsed
// allowlisted assets. This is the bug that driver RC 11907 caught
// — the prior code's empty-token rejection at Load time meant a
// SaaS tenant with no MOLECULE_TEMPLATE_REPO_TOKEN got ZERO
// templates. The flip-side regression we guard against: a future
// refactor that re-introduces "Authorization: token " (empty value)
// would Gitea-401 and the same runtime defect returns. The header
// MUST be omitted, not just empty-valued.
func TestGiteaTemplateAssetFetcher_EmptyToken_RealHTTP_NoAuthHeader_Success(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Server-side assertion: NO Authorization header on the
// request. We use Header map presence (not just Get,
// which returns "" for both "absent" and "set to empty").
// The map check is the load-bearing pin — it would
// catch a regression to "Authorization: token " (empty
// value) since net/http would set the key in the map
// but with a [""] value, which still trips this check.
if _, ok := r.Header["Authorization"]; ok {
t.Errorf("server saw Authorization header (value=%q) — empty-token public-fetch must OMIT the header, not send an empty value", r.Header.Get("Authorization"))
}
w.Header().Set("Content-Type", "application/gzip")
w.WriteHeader(http.StatusOK)
gz := gzip.NewWriter(w)
tw := tar.NewWriter(gz)
// Multiple allowlisted paths so the test verifies the
// full extraction path on the public-fetch activation,
// not just a minimal 1-asset happy path.
mustWriteTar(t, tw, "repo-sha/config.yaml", []byte("# public-template config\n"))
mustWriteTar(t, tw, "repo-sha/prompts/system.md", []byte("# public-template system prompt\n"))
mustWriteTar(t, tw, "repo-sha/agent-skills/skill-x/SKILL.md", []byte("# public-template skill\n"))
_ = tw.Close()
_ = gz.Close()
}))
defer srv.Close()
f := NewGiteaTemplateAssetFetcher(srv.URL, "", srv.Client())
assets, err := f.Load(context.Background(), "owner/repo@main")
if err != nil {
t.Fatalf("Load: %v (empty-token public-fetch should succeed against a public-template mock)", err)
}
mustHaveKey(t, assets, "config.yaml")
mustHaveKey(t, assets, "prompts/system.md")
mustHaveKey(t, assets, "agent-skills/skill-x/SKILL.md")
if len(assets) != 3 {
t.Errorf("expected 3 assets, got %d: %v", len(assets), keysOf(assets))
}
}
@@ -103,3 +103,94 @@ func IsCPTemplateAssetPath(name string) bool {
strings.HasPrefix(name, "prompts/") ||
strings.HasPrefix(name, "agent-skills/")
}
// noopTemplateAssetFetcher is the self-host default fetcher (PR-B
// selection, RFC #2843 #24). It returns (nil, nil) — "no assets to
// add" — so the call site in collectCPConfigFiles treats the
// workspace as a self-host: the /configs path comes from the local
// TemplatePath (cfg.TemplatePath) + cfg.ConfigFiles only, and the
// new TemplateAssets wire field is empty.
//
// Why explicit and not nil-interface: WorkspaceConfig's
// TemplateAssetFetcher is a non-nil interface-typed field in the SaaS
// path, so the SaaS codepath can rely on "interface always set"
// without nil-checks. Using a no-op default (rather than nil) keeps
// the field type uniform across deployments and makes
// "self-host = no-op fetcher" an explicit choice rather than an
// accidental absence.
//
// Memory-preserving: the no-op is the only state. The fetcher is
// stateless; concurrent Load calls are safe.
type noopTemplateAssetFetcher struct{}
// Load on noopTemplateAssetFetcher returns (nil, nil) — the
// "no assets" signal. Tests pin this contract.
func (noopTemplateAssetFetcher) Load(_ context.Context, _ string) (map[string][]byte, error) {
return nil, nil
}
// NoopTemplateAssetFetcher returns the no-op fetcher suitable
// as the self-host default. Exported so main.go can wire it
// via the fetcher-selection helper.
func NoopTemplateAssetFetcher() TemplateAssetFetcher {
return noopTemplateAssetFetcher{}
}
// FetcherSelection is the result of choosing which
// TemplateAssetFetcher to wire based on deployment mode (SaaS
// vs self-host) and per-deployment token state.
type FetcherSelection struct {
// Fetcher is the chosen fetcher. For SaaS this is the Gitea
// fetcher; for self-host this is the no-op fetcher. Never
// nil — the selection helper always returns a usable
// fetcher (no-op for self-host is still a valid choice).
Fetcher TemplateAssetFetcher
// Authenticated reports whether the chosen fetcher will send
// an Authorization header. For self-host's no-op fetcher
// this is false (no-op never sends headers); for SaaS this
// is true iff a non-empty token was supplied. Logged at
// boot to make the active mode obvious.
Authenticated bool
// Mode is a short human-readable label for the active mode
// (e.g. "saas-gitea", "saas-gitea-public", "self-host-noop").
// Used only for boot-time logging — not load-bearing.
Mode string
}
// SelectTemplateAssetFetcher chooses the fetcher to wire for the
// current deployment. The selection matrix:
//
// - isSaaSTenant() && token != "" -> real Gitea fetcher, Authenticated=true
// - isSaaSTenant() && token == "" -> real Gitea fetcher, Authenticated=false
// (the public-fetch activation: molecule-ai/* templates are PUBLIC)
// - !isSaaSTenant() -> no-op fetcher, Authenticated=false
// (self-host doesn't need an external asset channel —
// cfg.TemplatePath + cfg.ConfigFiles handle /configs locally)
//
// PR-B keystone (RFC #2843 #24): the token is OPTIONAL. SaaS
// callers may leave the token empty (public-fetch activation)
// and still get the real Gitea fetcher. Self-host callers always
// get the no-op.
//
// The isSaaSTenant function is plumbed in as an argument rather
// than a package-level lookup so the selection is testable in
// isolation (production callers pass a closure over the
// canonical isSaaSTenant helper, tests pass a closure that
// returns a fixed value).
func SelectTemplateAssetFetcher(isSaaSTenant func() bool, baseURL, token string) FetcherSelection {
if isSaaSTenant == nil || !isSaaSTenant() {
return FetcherSelection{
Fetcher: NoopTemplateAssetFetcher(),
Authenticated: false,
Mode: "self-host-noop",
}
}
// SaaS: real Gitea fetcher (public-fetch if token empty, authenticated if set)
return FetcherSelection{
Fetcher: NewGiteaTemplateAssetFetcher(baseURL, token, nil),
Authenticated: token != "",
Mode: "saas-gitea-public", // PR-B's CTO public-fetch is the SaaS default
}
}
@@ -55,8 +55,8 @@ func (f *fakeTemplateAssetFetcher) Load(_ context.Context, templateIdentity stri
func TestCollectCPConfigFiles_MergesFetcherAssets(t *testing.T) {
prov := &fakeTemplateAssetFetcher{
bundle: map[string][]byte{
"config.yaml": []byte("# from template repo"),
"prompts/system.md": []byte("# template system prompt"),
"config.yaml": []byte("# from template repo"),
"prompts/system.md": []byte("# template system prompt"),
"agent-skills/seo-audit/SKILL.md": []byte("# seo skill"),
},
}
@@ -183,7 +183,7 @@ func TestCollectCPConfigFiles_EmptyIdentityNoop(t *testing.T) {
bundle: map[string][]byte{"config.yaml": []byte("# unexpected")},
}
cfg := WorkspaceConfig{
TemplateIdentity: "", // empty
TemplateIdentity: "", // empty
TemplateAssetFetcher: prov, // wired but no identity
}
_, _, err := collectCPConfigFiles(cfg)
@@ -392,7 +392,7 @@ func TestCollectCPConfigFiles_RejectsFetcherAssetOutsideAllowlist(t *testing.T)
func TestCollectCPConfigFiles_FetcherAssetsRawBytes(t *testing.T) {
prov := &fakeTemplateAssetFetcher{
bundle: map[string][]byte{
"config.yaml": []byte("# raw bytes, will be base64 by marshaler"),
"config.yaml": []byte("# raw bytes, will be base64 by marshaler"),
"agent-skills/seo-audit/SKILL.md": []byte("raw-skill"),
},
}
@@ -442,8 +442,8 @@ func TestCollectCPConfigFiles_NoAssetsWhenNoFetcher(t *testing.T) {
func TestCollectCPConfigFiles_PreservesCallerConfigFiles(t *testing.T) {
cfg := WorkspaceConfig{
ConfigFiles: map[string][]byte{
"config.yaml": []byte("# caller"),
"generated.secret": []byte("not really a secret"),
"config.yaml": []byte("# caller"),
"generated.secret": []byte("not really a secret"),
},
}
files, assets, err := collectCPConfigFiles(cfg)
@@ -528,3 +528,72 @@ func TestCollectCPConfigFiles_ConfigFilesStillCappedAt256K(t *testing.T) {
t.Fatal("ConfigFiles over 256 KiB must still be rejected (SM/user-data transport cap unchanged)")
}
}
// TestSelectTemplateAssetFetcher_SaaS_GiteaFetcher covers the SaaS
// selection: the Gitea fetcher is wired (authenticated when the
// token is set, unauthenticated/public when empty — the public-fetch
// activation default for the molecule-ai/* PUBLIC templates).
func TestSelectTemplateAssetFetcher_SaaS_GiteaFetcher(t *testing.T) {
// With token
sel := SelectTemplateAssetFetcher(func() bool { return true }, "http://gitea", "the-token")
if sel.Fetcher == nil {
t.Fatal("SaaS + token: expected non-nil fetcher")
}
if !sel.Authenticated {
t.Error("SaaS + token: expected Authenticated=true")
}
if sel.Mode == "self-host-noop" {
t.Errorf("SaaS + token: expected non-noop mode, got %q", sel.Mode)
}
// Without token (PR-B public-fetch default)
sel2 := SelectTemplateAssetFetcher(func() bool { return true }, "http://gitea", "")
if sel2.Fetcher == nil {
t.Fatal("SaaS + no token: expected non-nil fetcher (public-fetch)")
}
if sel2.Authenticated {
t.Error("SaaS + no token: expected Authenticated=false (public-fetch)")
}
if sel2.Mode == "self-host-noop" {
t.Errorf("SaaS + no token: expected non-noop mode, got %q", sel2.Mode)
}
}
// TestSelectTemplateAssetFetcher_SelfHost_NoopFetcher covers the
// self-host selection: the no-op fetcher is wired regardless of
// token state (self-host doesn't need an external asset channel).
func TestSelectTemplateAssetFetcher_SelfHost_NoopFetcher(t *testing.T) {
// With token (token is IGNORED on self-host)
sel := SelectTemplateAssetFetcher(func() bool { return false }, "http://gitea", "the-token")
if sel.Fetcher == nil {
t.Fatal("self-host: expected non-nil fetcher (no-op default)")
}
if sel.Authenticated {
t.Error("self-host: expected Authenticated=false (no-op never sends auth headers)")
}
if sel.Mode != "self-host-noop" {
t.Errorf("self-host: expected Mode=self-host-noop, got %q", sel.Mode)
}
// The fetcher's Load must return (nil, nil) — "no assets" signal.
assets, err := sel.Fetcher.Load(t.Context(), "molecule-ai/workspace-template-seo@main")
if err != nil {
t.Errorf("self-host no-op Load: expected nil error, got %v", err)
}
if len(assets) != 0 {
t.Errorf("self-host no-op Load: expected empty map, got %d entries: %v", len(assets), keysOf(assets))
}
}
// TestSelectTemplateAssetFetcher_NilSaaSCheck_FallsBackToNoop covers
// the safe default: a nil isSaaSTenant closure is treated as
// "not SaaS" (no-op fetcher), so a misconfigured selection never
// accidentally routes a self-host deployment to the real Gitea
// fetcher.
func TestSelectTemplateAssetFetcher_NilSaaSCheck_FallsBackToNoop(t *testing.T) {
sel := SelectTemplateAssetFetcher(nil, "http://gitea", "the-token")
if sel.Fetcher == nil {
t.Fatal("nil isSaaSTenant closure: expected non-nil fetcher (no-op default)")
}
if sel.Mode != "self-host-noop" {
t.Errorf("nil isSaaSTenant closure: expected Mode=self-host-noop, got %q", sel.Mode)
}
}