feat(provisioner): activate giteaTemplateAssetFetcher for public fetch (no PAT) #2900

Closed
agent-dev-b wants to merge 1 commits from feat/public-fetch-activation into main
3 changed files with 93 additions and 33 deletions
+24 -16
View File
@@ -250,19 +250,24 @@ func main() {
}
// PR-B (RFC #2843 #24): wire the Gitea TemplateAssetFetcher.
// nil-if-empty + WARN: if the token isn't set, log a warning
// and stay in the "no fetcher wired" state. The SCAFFOLD gate
// in collectCPConfigFiles treats nil fetcher as "skip the
// fetcher" — pre-scaffold behavior preserved for self-host /
// unconfigured tenants. baseURL has a production default
// (https://git.moleculesai.app) but is overridable for staging
// or per-deployment Gitea mirrors.
if token := templateRepoToken(); token != "" {
baseURL := envOr("MOLECULE_GITEA_BASE_URL", "https://git.moleculesai.app")
wh.SetGiteaTemplateFetcher(provisioner.NewGiteaTemplateAssetFetcher(baseURL, token, nil))
log.Printf("template repo fetcher: wired (baseURL=%q, token set)", baseURL)
// Activate the Gitea template-asset fetcher on every boot (public
// fetch is the default — the molecule-ai/* template repos are
// PUBLIC, verified: GET /repos/.../archive/main.tar.gz returns
// 200 with no Authorization header). The token is OPTIONAL: if
// MOLECULE_TEMPLATE_REPO_TOKEN is set, the fetcher sends
// `Authorization: token <token>` (per-identity read-only PAT for
// the future private-template / rate-limit CTO-grant item); if
// empty, the fetcher omits the header entirely (public path).
// baseURL has a production default (https://git.moleculesai.app)
// but is overridable via MOLECULE_GITEA_BASE_URL for staging or
// per-deployment Gitea mirrors.
token := templateRepoToken()
baseURL := envOr("MOLECULE_GITEA_BASE_URL", "https://git.moleculesai.app")
wh.SetGiteaTemplateFetcher(provisioner.NewGiteaTemplateAssetFetcher(baseURL, token, nil))
if token != "" {
log.Printf("template repo fetcher: wired (baseURL=%q, token set — authenticated)", baseURL)
} else {
log.Printf("template repo fetcher: MOLECULE_TEMPLATE_REPO_TOKEN unset; fetcher disabled (self-host default / SCAFFOLD-gate skip)")
log.Printf("template repo fetcher: wired (baseURL=%q, no token — public unauthenticated fetch)", baseURL)
}
// Self-hosted platform-agent boot-provision (Change 1). The line-128 seed
@@ -687,10 +692,13 @@ func templateCacheToken() string {
// used by the Gitea TemplateAssetFetcher (RFC #2843 #24 PR-B).
// Distinct from templateCacheToken (which is for the template cache,
// a different feature) so a tenant can rotate the fetcher token
// without touching the cache token. nil-if-empty + WARN: callers
// should treat empty as "fetcher disabled" (self-host default — the
// SCAFFOLD gate in collectCPConfigFiles treats nil fetcher as
// "skip the fetcher", pre-scaffold behavior preserved).
// without touching the cache token. EMPTY IS NOW VALID — the
// fetcher activates on every boot; an empty token is the public-
// fetch path (molecule-ai/* template repos are public, no auth
// required). When set, the token must be a per-identity read-only
// PAT scoped to the template repo (e.g. for the future claude-ci-
// reader for private templates / rate-limits — CTO-grant item,
// deferred from this PR).
func templateRepoToken() string {
return strings.TrimSpace(os.Getenv("MOLECULE_TEMPLATE_REPO_TOKEN"))
}
@@ -126,13 +126,20 @@ func parseTemplateIdentity(identity string) (owner, repo, ref string, err error)
// Load fetches the template's tarball archive and returns the
// allowlisted asset map. See the package doc-comment for the full
// transport + allowlist contract.
//
// Auth is OPTIONAL: when the token is empty/absent, no Authorization
// header is sent (the request is unauthenticated). This is the
// public-fetch path used for molecule-ai's PUBLIC template repos
// (verified: GET /repos/.../archive/main.tar.gz returns 200 with no
// Authorization header). When a token IS set, it is sent as
// `Authorization: token <token>` per the Gitea API spec. The token
// MUST be a per-identity read-only PAT scoped to the template repo
// if/when used (e.g. the future claude-ci-reader for private
// templates / rate-limits — CTO-grant item, deferred from this PR).
func (f *giteaTemplateAssetFetcher) Load(ctx context.Context, templateIdentity string) (map[string][]byte, error) {
if f.baseURL == "" {
return nil, errors.New("giteaTemplateAssetFetcher: baseURL is empty")
}
if f.token == "" {
return nil, errors.New("giteaTemplateAssetFetcher: token is empty (per-identity READ-ONLY Gitea PAT required)")
}
owner, repo, ref, err := parseTemplateIdentity(templateIdentity)
if err != nil {
return nil, fmt.Errorf("giteaTemplateAssetFetcher: %w", err)
@@ -143,7 +150,13 @@ func (f *giteaTemplateAssetFetcher) Load(ctx context.Context, templateIdentity s
if err != nil {
return nil, fmt.Errorf("giteaTemplateAssetFetcher: build request: %w", err)
}
req.Header.Set("Authorization", "token "+f.token)
// OMIT the Authorization header entirely when the token is empty
// — do NOT send "Authorization: token " with an empty value
// (Gitea may 401 on a malformed empty token). The public-fetch
// path is the no-token case.
if f.token != "" {
req.Header.Set("Authorization", "token "+f.token)
}
req.Header.Set("Accept", "application/gzip, application/octet-stream")
resp, err := f.httpClient.Do(req)
@@ -158,23 +158,62 @@ func TestGiteaTemplateAssetFetcher_FailsClosedOnTransportError(t *testing.T) {
}
}
// TestGiteaTemplateAssetFetcher_RejectsEmptyToken pins the
// security guard: an empty token (which would otherwise be
// sent as "Authorization: token " with no credential) is
// rejected at construction time. A forgotten token init would
// otherwise silently fail-against-anonymous-requests, which
// Gitea would 401 on — better to fail loud at Load time.
func TestGiteaTemplateAssetFetcher_RejectsEmptyToken(t *testing.T) {
f := NewGiteaTemplateAssetFetcher("http://example.com", "", nil)
_, err := f.Load(context.Background(), "owner/repo@main")
if err == nil {
t.Fatal("expected error on empty token, got nil (security guard violated)")
// TestGiteaTemplateAssetFetcher_PublicFetchNoToken pins the
// public-fetch activation (no token → no Authorization header, GET
// succeeds, assets extracted). The molecule-ai/* template repos
// are public (verified: GET /repos/.../archive/main.tar.gz returns
// 200 with no Authorization header), so the fetcher must work
// without a token. The previous RejectsEmptyToken test was
// inverted by the public-fetch activation (71b48241): the empty-
// token case is now the no-op public-fetch path, not a hard error.
func TestGiteaTemplateAssetFetcher_PublicFetchNoToken(t *testing.T) {
var gotAuthHeader string
var gotAuthHeaderSet bool
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Capture the Authorization header (or lack thereof).
if v := r.Header.Get("Authorization"); v != "" {
gotAuthHeader = v
}
gotAuthHeaderSet = r.Header.Get("Authorization") != ""
// Serve a real .tar.gz with config.yaml + prompts/system.md
// wrapped in the "<repo>-<sha>" top-level dir Gitea uses.
w.Header().Set("Content-Type", "application/gzip")
w.WriteHeader(http.StatusOK)
gz := gzip.NewWriter(w)
tw := tar.NewWriter(gz)
mustWriteTar(t, tw, "workspace-template-foo-abc123/config.yaml", []byte("model: anthropic:claude-opus-4-7\n"))
mustWriteTar(t, tw, "workspace-template-foo-abc123/prompts/system.md", []byte("you are a helpful assistant\n"))
_ = tw.Close()
_ = gz.Close()
}))
defer srv.Close()
f := NewGiteaTemplateAssetFetcher(srv.URL, "", nil) // EMPTY TOKEN = public fetch
assets, err := f.Load(context.Background(), "molecule-ai/workspace-template-foo@main")
if err != nil {
t.Fatalf("public fetch with empty token failed: %v", err)
}
if !strings.Contains(err.Error(), "token") {
t.Errorf("error should mention token, got: %v", err)
if gotAuthHeaderSet {
t.Errorf("public fetch MUST NOT send an Authorization header when token is empty; got %q", gotAuthHeader)
}
if len(assets) < 1 {
t.Errorf("expected at least 1 asset extracted from the public tarball, got %d", len(assets))
}
// Spot-check the allowlist still applies on the public path.
for name := range assets {
if !IsCPTemplateAssetPath(name) {
t.Errorf("public fetch returned a non-allowlisted path: %q (allowlist must hold regardless of auth state)", name)
}
}
}
// TestGiteaTemplateAssetFetcher_RejectsEmptyToken_REMOVED: the
// previous empty-token-rejection test was INVERTED by the
// public-fetch activation (71b48241). An empty token is now
// the no-op public-fetch path, not a hard error. The behavior
// is pinned by TestGiteaTemplateAssetFetcher_PublicFetchNoToken
// above + the no-Authorization-header assertion in that test.
// TestParseTemplateIdentity pins the identity parser. Format:
// "<owner>/<repo>@<ref>". Malformed identities return errors.
func TestParseTemplateIdentity(t *testing.T) {