Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| ee1195522c | |||
| d89f0c9759 | |||
| e91687325d |
@@ -0,0 +1,113 @@
|
||||
package handlers
|
||||
|
||||
// template_files_agent_home_stub_test.go — pins the Phase-1 stub
|
||||
// contract for the /agent-home root added by internal#425 RFC.
|
||||
//
|
||||
// Today (pre-Phase-2b), every Files API verb against `?root=/agent-home`
|
||||
// must return HTTP 501 with the canonical pending-message body. The
|
||||
// stub MUST NOT:
|
||||
// 1. Hit the DB (the workspace might not even exist yet from the
|
||||
// canvas's POV — the root selector is testable without one).
|
||||
// 2. Touch the EIC tunnel / Docker / template-dir paths — those
|
||||
// would 500/404/[] depending on the env and confuse the canvas.
|
||||
// 3. Accept writes/deletes that the future docker-exec backend
|
||||
// would reject — fail closed.
|
||||
//
|
||||
// When Phase 2b lands, this file gets replaced by a real
|
||||
// docker-exec dispatch test; the stub-message constant in
|
||||
// templates.go disappears.
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// TestAgentHomeAllowedRoot pins that /agent-home is in the allowedRoots
|
||||
// set. Without this, a future refactor that drops the key would
|
||||
// silently degrade the canvas root selector to a 400 instead of the
|
||||
// stub 501.
|
||||
func TestAgentHomeAllowedRoot(t *testing.T) {
|
||||
if !allowedRoots["/agent-home"] {
|
||||
t.Fatal("/agent-home must be in allowedRoots — RFC #425 contract")
|
||||
}
|
||||
}
|
||||
|
||||
// TestAgentHomeStub_AllVerbs_Return501 pins the canonical stub
|
||||
// response across all four verbs. Each must:
|
||||
//
|
||||
// - status 501
|
||||
// - body contains the canonical "/agent-home not implemented" prefix
|
||||
// - NOT contain "workspace not found" (proves we short-circuit before
|
||||
// the DB lookup)
|
||||
//
|
||||
// Phase 2b update (this PR): ListFiles + ReadFile are NO LONGER
|
||||
// stubbed for /agent-home — they dispatch into
|
||||
// listFilesViaDockerExec / readFileViaDockerExec. WriteFile + Delete
|
||||
// REMAIN stubbed as 501 (Phase 2b-followup will land write/delete
|
||||
// with the additional secret-shape gates for write-content + delete-
|
||||
// path). The test table is split so the verbs that still 501 stay
|
||||
// pinned and the verbs that now go through can be exercised
|
||||
// independently.
|
||||
func TestAgentHomeStub_StillStubbedVerbs_Return501(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
method string
|
||||
invoke func(c *gin.Context)
|
||||
}{
|
||||
{
|
||||
name: "WriteFile",
|
||||
method: "PUT",
|
||||
invoke: func(c *gin.Context) { (&TemplatesHandler{}).WriteFile(c) },
|
||||
},
|
||||
{
|
||||
name: "DeleteFile",
|
||||
method: "DELETE",
|
||||
invoke: func(c *gin.Context) { (&TemplatesHandler{}).DeleteFile(c) },
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{
|
||||
{Key: "id", Value: "ws-stub"},
|
||||
// Path param without leading slash so DeleteFile's
|
||||
// filepath.IsAbs guard doesn't 400 before the root
|
||||
// dispatch runs. The List/Read/Write paths strip the
|
||||
// leading slash themselves and accept either form.
|
||||
{Key: "path", Value: "notes.md"},
|
||||
}
|
||||
// WriteFile binds JSON; provide a minimal valid body so the
|
||||
// short-circuit isn't masked by the bind-error path.
|
||||
var body string
|
||||
if tc.method == "PUT" {
|
||||
body = `{"content":"x"}`
|
||||
}
|
||||
c.Request = httptest.NewRequest(
|
||||
tc.method,
|
||||
"/workspaces/ws-stub/files/notes.md?root=/agent-home",
|
||||
strings.NewReader(body),
|
||||
)
|
||||
if body != "" {
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
}
|
||||
|
||||
tc.invoke(c)
|
||||
|
||||
if w.Code != http.StatusNotImplemented {
|
||||
t.Fatalf("expected 501, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if !strings.Contains(w.Body.String(), "/agent-home not implemented") {
|
||||
t.Errorf("body should contain canonical stub message; got %s", w.Body.String())
|
||||
}
|
||||
if strings.Contains(w.Body.String(), "workspace not found") {
|
||||
t.Errorf("stub leaked through to DB lookup; body=%s", w.Body.String())
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,346 @@
|
||||
package handlers
|
||||
|
||||
// template_files_docker_exec.go — Files API backend for the
|
||||
// `/agent-home` root added by internal#425 RFC.
|
||||
//
|
||||
// Architecture (per the RFC):
|
||||
//
|
||||
// - The canvas asks ListFiles/ReadFile/WriteFile/DeleteFile against
|
||||
// `?root=/agent-home` for any SaaS (EC2-per-workspace) tenant.
|
||||
//
|
||||
// - The agent's actual writable state lives INSIDE the molecule-
|
||||
// workspace container (overlay filesystem) at runtime-specific
|
||||
// paths — `/root/.openclaw/` for openclaw, `/home/agent/` for
|
||||
// claude-code + hermes. The host EC2 can't see those bytes
|
||||
// directly because the container's overlay is invisible to the
|
||||
// host filesystem.
|
||||
//
|
||||
// - To reach them, we reuse the EIC SSH tunnel from
|
||||
// template_files_eic.go and wrap the remote command in
|
||||
// `sudo -n docker exec molecule-workspace <cmd>`. The `find` /
|
||||
// `cat` / etc. then run INSIDE the container's filesystem
|
||||
// namespace.
|
||||
//
|
||||
// - Path translation: agentHomePathPrefix maps runtime →
|
||||
// container-internal-$HOME-equivalent path. Resolution is
|
||||
// INDEPENDENT of the runtime's process-level `$HOME` env var
|
||||
// (Hermes runs with `HOME=/tmp` per start.sh, but its actual
|
||||
// state lives at `/home/agent/.hermes` — Hongming Decisions §1,
|
||||
// internal#425 2026-05-15).
|
||||
//
|
||||
// Security boundary (this is the file core-security reviews):
|
||||
//
|
||||
// 1. PATH gate — every relPath goes through validateRelPath (no
|
||||
// absolute, no `..`) BEFORE resolution. The resolver only joins
|
||||
// against allowlisted agentHomePathPrefix entries, so a request
|
||||
// for `?root=/agent-home&path=../../etc/passwd` lands inside
|
||||
// the agent's home prefix regardless.
|
||||
//
|
||||
// 2. PATH-secret-shape gate — secrets.ScanBytes(absPath) rejects
|
||||
// paths that look like credentials (e.g. literal `ghs_xxxx`
|
||||
// filename). Match returns the canonical denial marker; the
|
||||
// caller sees `<denied: secret-shape>` instead of the bytes.
|
||||
// Defense-in-depth — an agent that wrote a secret-looking
|
||||
// filename can't exfiltrate the path itself by listing.
|
||||
//
|
||||
// 3. CONTENT-secret-shape gate — for ReadFile, the file's bytes
|
||||
// are scanned BEFORE being returned. If any pattern matches,
|
||||
// the API returns the canonical `<denied: secret-shape>` body
|
||||
// (NOT the matched bytes). Same SSOT as the secret-scan CI
|
||||
// workflow + the runtime's pre-commit hook (Phase 2a).
|
||||
//
|
||||
// 4. DOCKER-EXEC privilege boundary — the EC2 instance's ubuntu
|
||||
// user runs `sudo -n docker exec` because the molecule-workspace
|
||||
// container was started by cloud-init-as-root (see
|
||||
// molecule-controlplane/internal/provisioner/userdata_containerized.go).
|
||||
// The `sudo -n` is intentional: passwordless sudo on EC2 by
|
||||
// default; if that ever changes, sudo fails fast with
|
||||
// `sudo: a password is required` instead of hanging. The
|
||||
// container's user is the runtime's choice (varies per template
|
||||
// Dockerfile) — we DON'T add `--user` to the docker exec; the
|
||||
// container has full read on its own overlay regardless.
|
||||
//
|
||||
// 5. WRITE + DELETE — NOT implemented in this PR. The 501 stub
|
||||
// from #1247 stays in place for those verbs. Phase 2b-followup
|
||||
// will add write/delete with the additional discipline of:
|
||||
// - reject write if content scanned-secret
|
||||
// - reject delete if path scanned-secret (prevents deletion
|
||||
// of the agent's own credentials by a confused/malicious
|
||||
// actor)
|
||||
// Splitting read from write here keeps the security-review
|
||||
// surface manageable; read-only is the bigger value-add anyway
|
||||
// (canvas operators debugging agent state).
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/secrets"
|
||||
)
|
||||
|
||||
// agentHomePathPrefix maps a runtime name to the absolute base path
|
||||
// INSIDE the molecule-workspace container where the agent's actual
|
||||
// state lives.
|
||||
//
|
||||
// Path source-of-truth: each template's Dockerfile +
|
||||
// runtime-specific start.sh + the runtime's own state-dir convention.
|
||||
//
|
||||
// - openclaw → `/root/.openclaw/` (root-owned daily-driver state;
|
||||
// openclaw containers run their agent as root)
|
||||
// - claude-code → `/home/agent/.claude` (the agent user's
|
||||
// claude state)
|
||||
// - hermes → `/home/agent` (Hongming Decisions §1, internal#425:
|
||||
// /home/agent NOT /tmp; the agent runs with HOME=/tmp but its
|
||||
// real state — `.hermes/`, sessions, paired devices — is under
|
||||
// /home/agent)
|
||||
//
|
||||
// Entries not present here → /agent-home returns 400 ("runtime does
|
||||
// not support /agent-home"). Currently external/kimi/etc are
|
||||
// external-like and short-circuited at the canvas level
|
||||
// (NotAvailablePanel), so they never reach this dispatch.
|
||||
//
|
||||
// Adding a new runtime here: confirm the template's Dockerfile +
|
||||
// start.sh + the runtime's runtime-config state-dir convention,
|
||||
// then add the entry. Tests in template_files_docker_exec_test.go
|
||||
// pin the current set.
|
||||
var agentHomePathPrefix = map[string]string{
|
||||
"openclaw": "/root/.openclaw",
|
||||
"claude-code": "/home/agent/.claude",
|
||||
"hermes": "/home/agent",
|
||||
}
|
||||
|
||||
// resolveAgentHomePath translates (runtime, relPath) into an absolute
|
||||
// path INSIDE the workspace container. Returns ("", err) for an
|
||||
// unsupported runtime or an invalid relPath.
|
||||
//
|
||||
// The empty-runtime case (runtime=="") falls through to error — we
|
||||
// don't pick a default because /agent-home semantics are runtime-
|
||||
// specific and a wrong default would surface the wrong files.
|
||||
func resolveAgentHomePath(runtime, relPath string) (string, error) {
|
||||
// Empty relPath = "give me the base directory" — used by
|
||||
// listFilesViaDockerExec to compute the listing root. Non-empty
|
||||
// relPath must satisfy validateRelPath (no `..`, no absolute).
|
||||
if relPath != "" {
|
||||
if err := validateRelPath(relPath); err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
base, ok := agentHomePathPrefix[strings.ToLower(strings.TrimSpace(runtime))]
|
||||
if !ok {
|
||||
return "", fmt.Errorf("/agent-home is not supported for runtime %q", runtime)
|
||||
}
|
||||
if relPath == "" {
|
||||
return base, nil
|
||||
}
|
||||
return filepath.Join(base, filepath.Clean(relPath)), nil
|
||||
}
|
||||
|
||||
// dockerExecPrefix is the shell prefix that wraps every remote command
|
||||
// so it runs INSIDE the molecule-workspace container's filesystem
|
||||
// namespace instead of on the EC2 host.
|
||||
//
|
||||
// sudo -n docker exec molecule-workspace <cmd>
|
||||
//
|
||||
// `sudo -n` (non-interactive) — fails fast instead of hanging if the
|
||||
// ubuntu user ever loses passwordless sudo. The molecule-workspace
|
||||
// container name is the canonical name cloud-init starts the
|
||||
// container with (see userdata_containerized.go).
|
||||
//
|
||||
// Stored as a const so tests can prove the prefix shape via string
|
||||
// match instead of having to spin up a real docker daemon.
|
||||
const dockerExecPrefix = "sudo -n docker exec molecule-workspace"
|
||||
|
||||
// buildDockerExecFindShell wraps buildFindShell from the EIC helper
|
||||
// to run inside the container. We can't directly call buildFindShell
|
||||
// + prepend the prefix because the prefix changes the QUOTING context
|
||||
// (the outer sudo args don't need to escape what's inside docker
|
||||
// exec) — but with `docker exec <container> sh -c '<cmd>'` the inner
|
||||
// command is one argv element and is passed through literally. So
|
||||
// the cleanest shape is:
|
||||
//
|
||||
// sudo -n docker exec molecule-workspace sh -c '<find shell from EIC helper>'
|
||||
//
|
||||
// The single-quoted body is then exactly what buildFindShell would
|
||||
// emit on a non-container EIC path. Any single quote in the body
|
||||
// goes through the existing shellQuote escape inside buildFindShell.
|
||||
func buildDockerExecFindShell(listPath string, maxDepth int) string {
|
||||
inner := buildFindShell(listPath, maxDepth)
|
||||
return fmt.Sprintf("%s sh -c %s", dockerExecPrefix, shellQuote(inner))
|
||||
}
|
||||
|
||||
// buildDockerExecCatShell wraps buildCatShell for in-container exec.
|
||||
// Same shape rationale as buildDockerExecFindShell.
|
||||
func buildDockerExecCatShell(absPath string) string {
|
||||
inner := buildCatShell(absPath)
|
||||
return fmt.Sprintf("%s sh -c %s", dockerExecPrefix, shellQuote(inner))
|
||||
}
|
||||
|
||||
// secretShapeDeniedMarker is the canonical body the Files API emits
|
||||
// to the canvas when a path or content match a credential regex.
|
||||
// The canvas renders a placeholder INSTEAD OF the textarea (see
|
||||
// canvas/src/components/tabs/FilesTab/FileEditor.tsx
|
||||
// SECRET_SHAPE_DENIED_MARKER). Both sides reference the literal
|
||||
// string — keep them aligned.
|
||||
const secretShapeDeniedMarker = "<denied: secret-shape>"
|
||||
|
||||
// listFilesViaDockerExec lists files under /agent-home/{sub} for the
|
||||
// given workspace. Returns the same wire shape as listFilesViaEIC
|
||||
// so the templates.go handler can interleave them.
|
||||
//
|
||||
// Path-secret gate: each entry's REL path is scanned against
|
||||
// secrets.ScanBytes. A match means that path is silently dropped
|
||||
// from the listing (NOT replaced with a placeholder row — keeping
|
||||
// the row but with a denial marker would still surface the
|
||||
// filename, which IS the credential shape we're denying). Empty
|
||||
// listing reads as "no files matching" to the canvas, which is
|
||||
// the correct user-facing semantic.
|
||||
func listFilesViaDockerExec(
|
||||
ctx context.Context,
|
||||
instanceID, runtime, sub string,
|
||||
maxDepth int,
|
||||
) ([]eicFileEntry, error) {
|
||||
basePath, err := resolveAgentHomePath(runtime, "")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
listPath := basePath
|
||||
if sub != "" {
|
||||
listPath = filepath.Join(basePath, filepath.Clean(sub))
|
||||
}
|
||||
if maxDepth < 1 {
|
||||
maxDepth = 1
|
||||
}
|
||||
if maxDepth > 5 {
|
||||
maxDepth = 5
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(ctx, eicFileOpTimeout)
|
||||
defer cancel()
|
||||
|
||||
var raw []byte
|
||||
runErr := withEICTunnel(ctx, instanceID, func(s eicSSHSession) error {
|
||||
sshCmd := exec.CommandContext(ctx, "ssh", s.sshArgs(buildDockerExecFindShell(listPath, maxDepth))...)
|
||||
sshCmd.Env = os.Environ()
|
||||
var stdout, stderr bytes.Buffer
|
||||
sshCmd.Stdout = &stdout
|
||||
sshCmd.Stderr = &stderr
|
||||
if rerr := sshCmd.Run(); rerr != nil {
|
||||
// docker-exec exits non-zero if the container is gone, if
|
||||
// `find` itself errored, or if the agent-home dir doesn't
|
||||
// exist yet. We can't distinguish those at this layer
|
||||
// cleanly — surface the stderr to the caller for diagnosis
|
||||
// rather than swallowing.
|
||||
return fmt.Errorf("ssh docker-exec find: %w (%s)", rerr, strings.TrimSpace(stderr.String()))
|
||||
}
|
||||
raw = stdout.Bytes()
|
||||
return nil
|
||||
})
|
||||
if runErr != nil {
|
||||
return nil, runErr
|
||||
}
|
||||
|
||||
entries := parseFindOutput(raw)
|
||||
|
||||
// Path-secret gate.
|
||||
filtered := entries[:0]
|
||||
for _, e := range entries {
|
||||
m, _ := secrets.ScanString(e.Path)
|
||||
if m != nil {
|
||||
// Path itself looks like a credential. Drop the row.
|
||||
// (Don't log the path either — it's the credential.)
|
||||
continue
|
||||
}
|
||||
filtered = append(filtered, e)
|
||||
}
|
||||
return filtered, nil
|
||||
}
|
||||
|
||||
// readFileViaDockerExec reads the file at /agent-home/{relPath} for
|
||||
// the given workspace + runtime. Returns the bytes OR the canonical
|
||||
// secretShapeDeniedMarker when EITHER the path OR the content
|
||||
// matched a credential regex.
|
||||
//
|
||||
// The marker is returned as a successful 200-equivalent at the
|
||||
// handler layer — NOT a 4xx — because the canvas treats the marker
|
||||
// body as the "render placeholder" signal. A 4xx would land in the
|
||||
// error banner instead of the placeholder render, which is the
|
||||
// wrong UX (the file IS there; we just won't show it).
|
||||
//
|
||||
// If the file doesn't exist, returns the same nil + os.ErrNotExist
|
||||
// shape readFileViaEIC does, so the handler's NotFound branch
|
||||
// reuses unchanged.
|
||||
func readFileViaDockerExec(
|
||||
ctx context.Context,
|
||||
instanceID, runtime, relPath string,
|
||||
) ([]byte, error) {
|
||||
absPath, err := resolveAgentHomePath(runtime, relPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Path-shape gate FIRST. If the path itself matches a credential
|
||||
// regex, refuse before doing any work (no IO, no log line with
|
||||
// the offending path).
|
||||
if m, _ := secrets.ScanString(absPath); m != nil {
|
||||
return []byte(secretShapeDeniedMarker), nil
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(ctx, eicFileOpTimeout)
|
||||
defer cancel()
|
||||
|
||||
var raw []byte
|
||||
runErr := withEICTunnel(ctx, instanceID, func(s eicSSHSession) error {
|
||||
sshCmd := exec.CommandContext(ctx, "ssh", s.sshArgs(buildDockerExecCatShell(absPath))...)
|
||||
sshCmd.Env = os.Environ()
|
||||
var stdout, stderr bytes.Buffer
|
||||
sshCmd.Stdout = &stdout
|
||||
sshCmd.Stderr = &stderr
|
||||
rerr := sshCmd.Run()
|
||||
raw = stdout.Bytes()
|
||||
if rerr != nil {
|
||||
// Same shape as readFileViaEIC: empty stdout + empty
|
||||
// stderr + non-zero exit → missing file. Real
|
||||
// auth/tunnel/container-missing errors produce stderr.
|
||||
if len(raw) == 0 && stderr.Len() == 0 {
|
||||
return errFileNotExist
|
||||
}
|
||||
return fmt.Errorf("ssh docker-exec cat: %w (%s)", rerr, strings.TrimSpace(stderr.String()))
|
||||
}
|
||||
log.Printf("readFileViaDockerExec: ws instance=%s runtime=%s read %d bytes ← /agent-home%s",
|
||||
instanceID, runtime, len(raw), relPath)
|
||||
return nil
|
||||
})
|
||||
if runErr != nil {
|
||||
return nil, runErr
|
||||
}
|
||||
|
||||
// Content-shape gate. Scan the bytes against the SSOT regex.
|
||||
// Match → return marker. Note: this scans the WHOLE file in
|
||||
// memory; today there's no streaming path because the existing
|
||||
// EIC read also slurps the whole file. If a future Phase
|
||||
// streams large files, the gate would need a streaming
|
||||
// scanner.
|
||||
if m, _ := secrets.ScanBytes(raw); m != nil {
|
||||
return []byte(secretShapeDeniedMarker), nil
|
||||
}
|
||||
|
||||
return raw, nil
|
||||
}
|
||||
|
||||
// errFileNotExist is the sentinel readFileViaDockerExec returns when
|
||||
// the file is missing. Mirrors readFileViaEIC's sentinel so the
|
||||
// handler dispatch can map both to HTTP 404 via the same errors.Is
|
||||
// branch. Declared at package scope (not constructed inline) so
|
||||
// errors.Is comparison works.
|
||||
//
|
||||
// We don't reuse os.ErrNotExist directly because the surrounding
|
||||
// codebase wraps errors with extra context; having a dedicated
|
||||
// sentinel keeps the dispatch test pinned to THIS package's
|
||||
// invariant rather than a stdlib symbol.
|
||||
var errFileNotExist = fmt.Errorf("agent-home file does not exist")
|
||||
@@ -0,0 +1,255 @@
|
||||
package handlers
|
||||
|
||||
// Tests for template_files_docker_exec.go — internal#425 Phase 2b.
|
||||
//
|
||||
// What we pin here (without spinning up a real EIC tunnel or
|
||||
// container):
|
||||
//
|
||||
// 1. agentHomePathPrefix: Hongming's Decisions §1 set is present
|
||||
// and points at the right per-runtime paths.
|
||||
// 2. resolveAgentHomePath: traversal rejected, unknown runtime
|
||||
// rejected, normal path joined correctly.
|
||||
// 3. buildDockerExecFindShell / buildDockerExecCatShell: the
|
||||
// `sudo -n docker exec molecule-workspace sh -c '<inner>'`
|
||||
// wrapper shape is exact (so the security boundary on the
|
||||
// sudo / exec target stays grep-able).
|
||||
// 4. secretShapeDeniedMarker: matches the canvas-side constant
|
||||
// `<denied: secret-shape>` so a typo on either side fails here
|
||||
// instead of silently failing in production.
|
||||
// 5. errFileNotExist is errors.Is-aware (so the handler's 404
|
||||
// branch in templates.go ReadFile catches wrapped-by-callee
|
||||
// sentinels).
|
||||
//
|
||||
// What we DON'T cover unit-side: the full SSH + docker-exec
|
||||
// roundtrip. That's the staging-tenant smoke (Phase 2b SOP gate
|
||||
// per feedback_staging_e2e_merge_gate) — fresh EC2-per-workspace
|
||||
// tenant on staging, exercise GET .../files?root=/agent-home,
|
||||
// verify the canvas renders the real container-internal state.
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/secrets"
|
||||
)
|
||||
|
||||
// TestAgentHomePathPrefix_HongmingDecisionsSection1 pins the per-
|
||||
// runtime mapping decided 2026-05-15 (internal#425). Adding a
|
||||
// runtime here is fine; CHANGING an existing one without coordinated
|
||||
// migration of the canvas + the per-runtime template will silently
|
||||
// break tenants — failing this test forces the change to be intent-
|
||||
// ional. Each entry's value is the absolute path INSIDE the
|
||||
// container's overlay filesystem (NOT on the host).
|
||||
func TestAgentHomePathPrefix_HongmingDecisionsSection1(t *testing.T) {
|
||||
expect := map[string]string{
|
||||
// Hongming Decisions §1 (internal#425 2026-05-15):
|
||||
// "/home/agent (where Hermes's actual state lives —
|
||||
// .hermes/, sessions, paired devices). Note: Hermes today
|
||||
// runs with HOME=/tmp per its start.sh, so the new mapping
|
||||
// needs a separate path resolver in agentHomePathPrefix
|
||||
// and does NOT need to match $HOME."
|
||||
"hermes": "/home/agent",
|
||||
"claude-code": "/home/agent/.claude",
|
||||
"openclaw": "/root/.openclaw",
|
||||
}
|
||||
if len(agentHomePathPrefix) != len(expect) {
|
||||
t.Errorf("agentHomePathPrefix has %d entries; expected %d (%v)",
|
||||
len(agentHomePathPrefix), len(expect), expect)
|
||||
}
|
||||
for k, v := range expect {
|
||||
if got := agentHomePathPrefix[k]; got != v {
|
||||
t.Errorf("agentHomePathPrefix[%q] = %q; want %q", k, got, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveAgentHomePath_RejectsTraversal(t *testing.T) {
|
||||
cases := []string{
|
||||
"../etc/passwd",
|
||||
"foo/../../etc",
|
||||
"/abs/path",
|
||||
}
|
||||
for _, p := range cases {
|
||||
t.Run(p, func(t *testing.T) {
|
||||
_, err := resolveAgentHomePath("openclaw", p)
|
||||
if err == nil {
|
||||
t.Fatalf("expected error for traversal path %q", p)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveAgentHomePath_RejectsUnknownRuntime(t *testing.T) {
|
||||
_, err := resolveAgentHomePath("unknown-future-runtime", "notes.md")
|
||||
if err == nil {
|
||||
t.Fatal("expected error for unknown runtime")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "/agent-home is not supported") {
|
||||
t.Errorf("error should mention /agent-home unsupported; got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveAgentHomePath_NormalPath(t *testing.T) {
|
||||
cases := []struct {
|
||||
runtime string
|
||||
rel string
|
||||
expected string
|
||||
}{
|
||||
{"openclaw", "skills/notes.md", "/root/.openclaw/skills/notes.md"},
|
||||
{"hermes", "sessions/01.json", "/home/agent/sessions/01.json"},
|
||||
{"claude-code", "settings.json", "/home/agent/.claude/settings.json"},
|
||||
// Empty rel resolves to the base — needed by listFilesViaDockerExec.
|
||||
{"openclaw", "", "/root/.openclaw"},
|
||||
// Case-insensitive runtime match (lowercase normalisation).
|
||||
{"OPENCLAW", "x.md", "/root/.openclaw/x.md"},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.runtime+":"+tc.rel, func(t *testing.T) {
|
||||
got, err := resolveAgentHomePath(tc.runtime, tc.rel)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if got != tc.expected {
|
||||
t.Errorf("got %q; want %q", got, tc.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildDockerExecFindShell_WrapperShape(t *testing.T) {
|
||||
// Pin the exact wrapper shape. Three invariants:
|
||||
// - starts with `sudo -n docker exec molecule-workspace sh -c '`
|
||||
// - ends with `'`
|
||||
// - the body between the quotes contains the find shell from
|
||||
// buildFindShell (we don't pin the body byte-for-byte because
|
||||
// buildFindShell can evolve; we pin "contains find -maxdepth").
|
||||
shell := buildDockerExecFindShell("/root/.openclaw", 2)
|
||||
if !strings.HasPrefix(shell, "sudo -n docker exec molecule-workspace sh -c '") {
|
||||
t.Errorf("missing canonical wrapper prefix; got: %s", shell)
|
||||
}
|
||||
if !strings.HasSuffix(shell, "'") {
|
||||
t.Errorf("missing trailing quote on docker-exec wrapper; got: %s", shell)
|
||||
}
|
||||
if !strings.Contains(shell, "find") || !strings.Contains(shell, "maxdepth") {
|
||||
t.Errorf("inner shell should contain find -maxdepth; got: %s", shell)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildDockerExecCatShell_WrapperShape(t *testing.T) {
|
||||
shell := buildDockerExecCatShell("/home/agent/.claude/settings.json")
|
||||
if !strings.HasPrefix(shell, "sudo -n docker exec molecule-workspace sh -c '") {
|
||||
t.Errorf("missing canonical wrapper prefix; got: %s", shell)
|
||||
}
|
||||
if !strings.Contains(shell, "sudo -n cat") {
|
||||
t.Errorf("inner shell should be the cat helper; got: %s", shell)
|
||||
}
|
||||
}
|
||||
|
||||
// TestSecretShapeDeniedMarker_CanonicalString — the canvas side
|
||||
// (canvas/src/components/tabs/FilesTab/FileEditor.tsx
|
||||
// SECRET_SHAPE_DENIED_MARKER) is the matching constant. A typo on
|
||||
// either side breaks the placeholder render — but only in production.
|
||||
// Pin the literal string here so the typo is caught at CI time.
|
||||
func TestSecretShapeDeniedMarker_CanonicalString(t *testing.T) {
|
||||
if secretShapeDeniedMarker != "<denied: secret-shape>" {
|
||||
t.Errorf("marker drift: %q", secretShapeDeniedMarker)
|
||||
}
|
||||
}
|
||||
|
||||
// TestPathSecretGate_DropsCredentialShapedRows pins the path-shape
|
||||
// gate that listFilesViaDockerExec applies. The function under test
|
||||
// is inline in listFilesViaDockerExec; we exercise the SAME
|
||||
// secrets.ScanString edge the production path takes.
|
||||
//
|
||||
// Three invariants:
|
||||
// - a row whose Path matches a credential regex is DROPPED (not
|
||||
// replaced with a placeholder row) — keeping the row would
|
||||
// surface the credential-shape filename, defeating the gate
|
||||
// - a normal row passes through unmodified
|
||||
// - the gate doesn't false-positive on plain prose
|
||||
func TestPathSecretGate_DropsCredentialShapedRows(t *testing.T) {
|
||||
entries := []eicFileEntry{
|
||||
{Path: "skills/notes.md", Size: 100, Dir: false},
|
||||
{Path: "ghp_EXAMPLE111122223333444455556666777788889999", Size: 50, Dir: false},
|
||||
{Path: "config.yaml", Size: 200, Dir: false},
|
||||
}
|
||||
filtered := entries[:0]
|
||||
for _, e := range entries {
|
||||
m, _ := secrets.ScanString(e.Path)
|
||||
if m != nil {
|
||||
continue
|
||||
}
|
||||
filtered = append(filtered, e)
|
||||
}
|
||||
if len(filtered) != 2 {
|
||||
t.Fatalf("expected 2 filtered entries; got %d (%v)", len(filtered), filtered)
|
||||
}
|
||||
if filtered[0].Path != "skills/notes.md" || filtered[1].Path != "config.yaml" {
|
||||
t.Errorf("filtered list shape changed unexpectedly: %v", filtered)
|
||||
}
|
||||
}
|
||||
|
||||
// TestContentSecretGate_ReplacesBytesWithMarker pins the content-
|
||||
// shape gate that readFileViaDockerExec applies. Exercises the
|
||||
// SAME secrets.ScanBytes edge the production path takes — a match
|
||||
// means the original bytes are NEVER returned to the caller.
|
||||
func TestContentSecretGate_ReplacesBytesWithMarker(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
content string
|
||||
denied bool
|
||||
}{
|
||||
{
|
||||
name: "credential-in-yaml",
|
||||
content: "api_key: ghp_EXAMPLE111122223333444455556666777788889999\n",
|
||||
denied: true,
|
||||
},
|
||||
{
|
||||
name: "anthropic-key",
|
||||
content: "ANTHROPIC_API_KEY=sk-ant-EXAMPLE" + strings.Repeat("1", 60) + "\n",
|
||||
denied: true,
|
||||
},
|
||||
{
|
||||
name: "plain-markdown",
|
||||
content: "# Skills\n\n- Code review\n- Postgres\n",
|
||||
denied: false,
|
||||
},
|
||||
{
|
||||
name: "yaml-without-credentials",
|
||||
content: "name: openclaw\nversion: 1\n",
|
||||
denied: false,
|
||||
},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
m, _ := secrets.ScanBytes([]byte(tc.content))
|
||||
gotDenied := m != nil
|
||||
if gotDenied != tc.denied {
|
||||
t.Errorf("expected denied=%v, got %v (match=%+v)", tc.denied, gotDenied, m)
|
||||
}
|
||||
// And when denied, the production path returns the marker
|
||||
// — pin that the marker length is fixed (so a future
|
||||
// change can't sneak the secret bytes through as the
|
||||
// marker's prefix/suffix).
|
||||
if gotDenied {
|
||||
marker := []byte(secretShapeDeniedMarker)
|
||||
if len(marker) != len("<denied: secret-shape>") {
|
||||
t.Errorf("marker length drift: %d", len(marker))
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestErrFileNotExist_IsErrorsIsAware — the templates.go ReadFile
|
||||
// dispatch uses errors.Is(err, errFileNotExist) to decide between
|
||||
// 404 and 500. Pin that errors.Is works against this sentinel
|
||||
// after the readFileViaDockerExec wrapping path.
|
||||
func TestErrFileNotExist_IsErrorsIsAware(t *testing.T) {
|
||||
wrapped := fmt.Errorf("outer: %w", errFileNotExist)
|
||||
if !errors.Is(wrapped, errFileNotExist) {
|
||||
t.Errorf("errors.Is failed against wrapped errFileNotExist")
|
||||
}
|
||||
}
|
||||
@@ -18,11 +18,35 @@ import (
|
||||
)
|
||||
|
||||
// allowedRoots are the container paths that the Files API can browse.
|
||||
//
|
||||
// `/agent-home` (added 2026-05-15, internal#425 RFC) is the container's
|
||||
// own $HOME — `/root` for openclaw, `/home/agent` for claude-code/hermes
|
||||
// — browsed via `docker exec` rather than host-side `find`. The
|
||||
// dispatch is stubbed today (returns 501); full implementation lands in
|
||||
// Phase 2b of the RFC. The allowedRoots key is added now so the canvas
|
||||
// can design its root-selector UI against the final shape and the
|
||||
// stub-vs-full transition is server-side only.
|
||||
var allowedRoots = map[string]bool{
|
||||
"/configs": true,
|
||||
"/workspace": true,
|
||||
"/home": true,
|
||||
"/plugins": true,
|
||||
"/configs": true,
|
||||
"/workspace": true,
|
||||
"/home": true,
|
||||
"/plugins": true,
|
||||
"/agent-home": true,
|
||||
}
|
||||
|
||||
// agentHomeStubMessage is the body returned by every Files API verb
|
||||
// when `?root=/agent-home` is requested before Phase 2b lands. Keep the
|
||||
// status code 501 (Not Implemented) — the route exists, the verb is
|
||||
// understood, but the handler is unimplemented. Distinguishes from
|
||||
// 400/404 so a canvas behind a less-current server can render a clean
|
||||
// "feature pending" state instead of a generic error.
|
||||
const agentHomeStubMessage = "/agent-home not implemented yet (internal#425 RFC Phase 2b — docker-exec backend pending)"
|
||||
|
||||
// isAgentHomeStubRequest returns true when the request targets the
|
||||
// stubbed /agent-home root. Centralised so every verb in this file
|
||||
// short-circuits with the same response shape.
|
||||
func isAgentHomeStubRequest(rootPath string) bool {
|
||||
return rootPath == "/agent-home"
|
||||
}
|
||||
|
||||
// maxUploadFiles limits the number of files in a single import/replace.
|
||||
@@ -219,9 +243,13 @@ func (h *TemplatesHandler) ListFiles(c *gin.Context) {
|
||||
// ?depth= — max depth to recurse (default: 1, max: 5)
|
||||
rootPath := c.DefaultQuery("root", "/configs")
|
||||
if !allowedRoots[rootPath] {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "root must be one of: /configs, /workspace, /home, /plugins"})
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "root must be one of: /configs, /workspace, /home, /plugins, /agent-home"})
|
||||
return
|
||||
}
|
||||
// /agent-home dispatch (Phase 2b) — docker-exec backend that
|
||||
// reaches into the container's overlay filesystem. Below the
|
||||
// allowedRoots gate so an /agent-home request still 400s if the
|
||||
// allowlist was removed in a refactor.
|
||||
subPath := c.DefaultQuery("path", "")
|
||||
if subPath != "" {
|
||||
if err := validateRelPath(subPath); err != nil {
|
||||
@@ -268,9 +296,18 @@ func (h *TemplatesHandler) ListFiles(c *gin.Context) {
|
||||
// files / No config files yet" for SaaS workspaces, regardless of
|
||||
// what was actually on disk. See issue #2999.
|
||||
if instanceID != "" {
|
||||
entries, err := listFilesViaEIC(ctx, instanceID, runtime, rootPath, subPath, depth)
|
||||
// internal#425 Phase 2b: /agent-home goes through the
|
||||
// docker-exec backend (reaches into the container's overlay
|
||||
// FS) instead of the host-FS EIC backend.
|
||||
var entries []eicFileEntry
|
||||
var err error
|
||||
if isAgentHomeStubRequest(rootPath) {
|
||||
entries, err = listFilesViaDockerExec(ctx, instanceID, runtime, subPath, depth)
|
||||
} else {
|
||||
entries, err = listFilesViaEIC(ctx, instanceID, runtime, rootPath, subPath, depth)
|
||||
}
|
||||
if err != nil {
|
||||
log.Printf("ListFiles EIC for %s root=%s sub=%s: %v", workspaceID, rootPath, subPath, err)
|
||||
log.Printf("ListFiles backend for %s root=%s sub=%s: %v", workspaceID, rootPath, subPath, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("failed to list files: %v", err)})
|
||||
return
|
||||
}
|
||||
@@ -383,10 +420,9 @@ func (h *TemplatesHandler) ReadFile(c *gin.Context) {
|
||||
ctx := c.Request.Context()
|
||||
rootPath := c.DefaultQuery("root", "/configs")
|
||||
if !allowedRoots[rootPath] {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "root must be one of: /configs, /workspace, /home, /plugins"})
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "root must be one of: /configs, /workspace, /home, /plugins, /agent-home"})
|
||||
return
|
||||
}
|
||||
|
||||
var wsName, instanceID, runtime string
|
||||
if err := db.DB.QueryRowContext(ctx,
|
||||
`SELECT name, COALESCE(instance_id, ''), COALESCE(runtime, '') FROM workspaces WHERE id = $1`,
|
||||
@@ -410,8 +446,20 @@ func (h *TemplatesHandler) ReadFile(c *gin.Context) {
|
||||
// hermes → /home/ubuntu/.hermes); other allow-listed roots
|
||||
// (`/home`, `/workspace`, `/plugins`) pass through literally so
|
||||
// list/read/write/delete agree on what file a tree row points to.
|
||||
//
|
||||
// internal#425 Phase 2b: /agent-home goes through readFileViaDockerExec
|
||||
// (reaches into the container's overlay FS). The docker-exec backend
|
||||
// also applies content-secret-shape scanning — if the file's path or
|
||||
// content matches a credential regex, returns the canonical
|
||||
// `<denied: secret-shape>` body so the canvas renders a placeholder.
|
||||
if instanceID != "" {
|
||||
content, err := readFileViaEIC(ctx, instanceID, runtime, rootPath, filePath)
|
||||
var content []byte
|
||||
var err error
|
||||
if isAgentHomeStubRequest(rootPath) {
|
||||
content, err = readFileViaDockerExec(ctx, instanceID, runtime, filePath)
|
||||
} else {
|
||||
content, err = readFileViaEIC(ctx, instanceID, runtime, rootPath, filePath)
|
||||
}
|
||||
if err == nil {
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"path": filePath,
|
||||
@@ -420,11 +468,11 @@ func (h *TemplatesHandler) ReadFile(c *gin.Context) {
|
||||
})
|
||||
return
|
||||
}
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
if errors.Is(err, os.ErrNotExist) || errors.Is(err, errFileNotExist) {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "file not found on workspace"})
|
||||
return
|
||||
}
|
||||
log.Printf("ReadFile EIC for %s path=%s: %v", workspaceID, filePath, err)
|
||||
log.Printf("ReadFile backend for %s path=%s root=%s: %v", workspaceID, filePath, rootPath, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("failed to read file: %v", err)})
|
||||
return
|
||||
}
|
||||
@@ -496,7 +544,11 @@ func (h *TemplatesHandler) WriteFile(c *gin.Context) {
|
||||
ctx := c.Request.Context()
|
||||
rootPath := c.DefaultQuery("root", "/configs")
|
||||
if !allowedRoots[rootPath] {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "root must be one of: /configs, /workspace, /home, /plugins"})
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "root must be one of: /configs, /workspace, /home, /plugins, /agent-home"})
|
||||
return
|
||||
}
|
||||
if isAgentHomeStubRequest(rootPath) {
|
||||
c.JSON(http.StatusNotImplemented, gin.H{"error": agentHomeStubMessage})
|
||||
return
|
||||
}
|
||||
var wsName, instanceID, runtime string
|
||||
@@ -573,7 +625,11 @@ func (h *TemplatesHandler) DeleteFile(c *gin.Context) {
|
||||
ctx := c.Request.Context()
|
||||
rootPath := c.DefaultQuery("root", "/configs")
|
||||
if !allowedRoots[rootPath] {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "root must be one of: /configs, /workspace, /home, /plugins"})
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "root must be one of: /configs, /workspace, /home, /plugins, /agent-home"})
|
||||
return
|
||||
}
|
||||
if isAgentHomeStubRequest(rootPath) {
|
||||
c.JSON(http.StatusNotImplemented, gin.H{"error": agentHomeStubMessage})
|
||||
return
|
||||
}
|
||||
var wsName, instanceID, runtime string
|
||||
|
||||
@@ -0,0 +1,226 @@
|
||||
// Package secrets provides the canonical SSOT for credential-shaped
|
||||
// regex patterns used by:
|
||||
//
|
||||
// - the CI `Secret scan` workflow (.gitea/workflows/secret-scan.yml)
|
||||
// - the runtime's bundled pre-commit hook
|
||||
// (molecule-ai-workspace-runtime/molecule_runtime/scripts/pre-commit-checks.sh)
|
||||
// - the upcoming Phase 2b docker-exec Files API backend, which has
|
||||
// to refuse to surface files whose path OR content matches a
|
||||
// credential shape (RFC internal#425, Hongming 2026-05-15)
|
||||
//
|
||||
// Before this package, the same regex set lived as duplicate bash
|
||||
// arrays in two unrelated repos; adding a pattern required editing
|
||||
// both, and pattern drift was caught only via secret-scan workflow
|
||||
// failures on PRs that had unrelated changes (#2090-class incident
|
||||
// vector). Centralising in Go makes the Files API the SSOT, with the
|
||||
// YAML + bash arrays generated/asserted from this package so drift
|
||||
// is detected at CI time, not at exfiltration time.
|
||||
//
|
||||
// This file is Phase 2a of the internal#425 RFC. Phase 2b will import
|
||||
// `Patterns` from `template_files_docker_exec.go` to gate
|
||||
// `listFilesViaDockerExec` / `readFileViaDockerExec` against
|
||||
// secret-shaped paths AND content. Until 2b lands, the package has
|
||||
// one consumer: this package's own unit tests, which pin the regex
|
||||
// strings so a refactor that drops or weakens one is caught here.
|
||||
package secrets
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Pattern is one named credential shape — a human label plus the
|
||||
// compiled regex. The label appears in CI error output ("matched:
|
||||
// github-pat") so an operator can identify the family without seeing
|
||||
// the actual matched bytes (echoing the bytes widens the blast radius
|
||||
// per the secret-scan workflow's recovery prose).
|
||||
type Pattern struct {
|
||||
// Name is a short kebab-case identifier (e.g. "github-pat",
|
||||
// "anthropic-api-key"). Stable across versions — consumers may
|
||||
// switch on it.
|
||||
Name string
|
||||
// Description is a one-line human-readable explanation of what
|
||||
// the pattern matches. Used in CI error messages and the Files
|
||||
// API "<denied: secret-shape>" placeholder rationale.
|
||||
Description string
|
||||
// regexSource is the regex literal in Go-RE2 syntax. Stored as a
|
||||
// string so the slice declaration below stays readable; compiled
|
||||
// once via sync.Once into a *regexp.Regexp.
|
||||
regexSource string
|
||||
}
|
||||
|
||||
// Patterns is the canonical credential-shape regex set.
|
||||
//
|
||||
// Adding a pattern here:
|
||||
//
|
||||
// 1. Add a new Pattern{} entry below with a kebab-case Name, a
|
||||
// one-line Description, and the regex literal. Anchor on a
|
||||
// low-false-positive prefix.
|
||||
// 2. Add a positive + negative test case in patterns_test.go.
|
||||
// 3. Mirror the regex string into:
|
||||
// a. .gitea/workflows/secret-scan.yml SECRET_PATTERNS array
|
||||
// b. molecule-ai-workspace-runtime/molecule_runtime/scripts/pre-commit-checks.sh
|
||||
// (or wait for the codegen target that consumes this slice — TBD
|
||||
// follow-up; tracked in the Phase 2a PR description.)
|
||||
//
|
||||
// The order is: alphabetical within each provider family, families
|
||||
// grouped by ecosystem (GitHub family, AI-provider family, chat
|
||||
// family, cloud family). Keep this stable so diffs are reviewable.
|
||||
var Patterns = []Pattern{
|
||||
// --- GitHub token family ---
|
||||
{
|
||||
Name: "github-pat-classic",
|
||||
Description: "GitHub personal access token (classic)",
|
||||
regexSource: `ghp_[A-Za-z0-9]{36,}`,
|
||||
},
|
||||
{
|
||||
Name: "github-app-installation-token",
|
||||
Description: "GitHub App installation token (#2090 vector)",
|
||||
regexSource: `ghs_[A-Za-z0-9]{36,}`,
|
||||
},
|
||||
{
|
||||
Name: "github-oauth-user-to-server",
|
||||
Description: "GitHub OAuth user-to-server token",
|
||||
regexSource: `gho_[A-Za-z0-9]{36,}`,
|
||||
},
|
||||
{
|
||||
Name: "github-oauth-user",
|
||||
Description: "GitHub OAuth user token",
|
||||
regexSource: `ghu_[A-Za-z0-9]{36,}`,
|
||||
},
|
||||
{
|
||||
Name: "github-oauth-refresh",
|
||||
Description: "GitHub OAuth refresh token",
|
||||
regexSource: `ghr_[A-Za-z0-9]{36,}`,
|
||||
},
|
||||
{
|
||||
Name: "github-pat-fine-grained",
|
||||
Description: "GitHub fine-grained personal access token",
|
||||
regexSource: `github_pat_[A-Za-z0-9_]{82,}`,
|
||||
},
|
||||
|
||||
// --- AI-provider API key family ---
|
||||
{
|
||||
Name: "anthropic-api-key",
|
||||
Description: "Anthropic API key",
|
||||
regexSource: `sk-ant-[A-Za-z0-9_-]{40,}`,
|
||||
},
|
||||
{
|
||||
Name: "openai-project-key",
|
||||
Description: "OpenAI project API key",
|
||||
regexSource: `sk-proj-[A-Za-z0-9_-]{40,}`,
|
||||
},
|
||||
{
|
||||
Name: "openai-service-account-key",
|
||||
Description: "OpenAI service-account API key",
|
||||
regexSource: `sk-svcacct-[A-Za-z0-9_-]{40,}`,
|
||||
},
|
||||
{
|
||||
Name: "minimax-api-key",
|
||||
Description: "MiniMax API key (F1088 vector)",
|
||||
regexSource: `sk-cp-[A-Za-z0-9_-]{60,}`,
|
||||
},
|
||||
|
||||
// --- Chat-platform token family ---
|
||||
{
|
||||
Name: "slack-token",
|
||||
Description: "Slack token (xoxb/xoxa/xoxp/xoxr/xoxs)",
|
||||
regexSource: `xox[baprs]-[A-Za-z0-9-]{20,}`,
|
||||
},
|
||||
|
||||
// --- Cloud-provider credential family ---
|
||||
{
|
||||
Name: "aws-access-key-id",
|
||||
Description: "AWS access key ID",
|
||||
regexSource: `AKIA[0-9A-Z]{16}`,
|
||||
},
|
||||
{
|
||||
Name: "aws-sts-temp-access-key-id",
|
||||
Description: "AWS STS temporary access key ID",
|
||||
regexSource: `ASIA[0-9A-Z]{16}`,
|
||||
},
|
||||
}
|
||||
|
||||
// compiledOnce protects the lazy build of compiledPatterns. We compile
|
||||
// lazily so package init is cheap; callers pay only on first match
|
||||
// (typically once per workspace-server boot).
|
||||
var (
|
||||
compiledOnce sync.Once
|
||||
compiledPatterns []*compiledPattern
|
||||
compileErr error
|
||||
)
|
||||
|
||||
type compiledPattern struct {
|
||||
Name string
|
||||
Description string
|
||||
Re *regexp.Regexp
|
||||
}
|
||||
|
||||
// compileAll compiles every Pattern.regexSource into a *regexp.Regexp.
|
||||
// Called once via compiledOnce. Any compile failure here is a build
|
||||
// bug (the unit tests assert each regex compiles) — surfacing via
|
||||
// returned error so callers don't panic in request handling.
|
||||
func compileAll() {
|
||||
out := make([]*compiledPattern, 0, len(Patterns))
|
||||
for _, p := range Patterns {
|
||||
re, err := regexp.Compile(p.regexSource)
|
||||
if err != nil {
|
||||
compileErr = fmt.Errorf("secrets: pattern %q failed to compile: %w", p.Name, err)
|
||||
return
|
||||
}
|
||||
out = append(out, &compiledPattern{Name: p.Name, Description: p.Description, Re: re})
|
||||
}
|
||||
compiledPatterns = out
|
||||
}
|
||||
|
||||
// ScanBytes returns a non-nil Match if any pattern matches anywhere
|
||||
// inside b. Returns (nil, nil) on no match. Returns (nil, err) only
|
||||
// if a regex in the package fails to compile — that's a build bug,
|
||||
// not a runtime data issue.
|
||||
//
|
||||
// Match contains the pattern Name + Description so the caller can
|
||||
// emit a path-or-content-denial rationale WITHOUT round-tripping the
|
||||
// matched bytes (which would defeat the purpose). The matched bytes
|
||||
// stay inside this function.
|
||||
//
|
||||
// The Files API Phase 2b backend will call ScanBytes on:
|
||||
//
|
||||
// - the absolute path string (catches a file literally named
|
||||
// `ghs_abc.txt`)
|
||||
// - the file content (catches a credential pasted into a workspace
|
||||
// file by an agent or user — the Files API refuses to surface it
|
||||
// and the canvas renders "<denied: secret-shape>")
|
||||
//
|
||||
// Ordering: patterns are tried in declaration order. First match
|
||||
// wins. This means narrower patterns (e.g. `sk-svcacct-…`) should
|
||||
// appear in `Patterns` before broader ones (`sk-…`) — today there's
|
||||
// no overlap, so order is descriptive only.
|
||||
func ScanBytes(b []byte) (*Match, error) {
|
||||
compiledOnce.Do(compileAll)
|
||||
if compileErr != nil {
|
||||
return nil, compileErr
|
||||
}
|
||||
for _, cp := range compiledPatterns {
|
||||
if cp.Re.Match(b) {
|
||||
return &Match{Name: cp.Name, Description: cp.Description}, nil
|
||||
}
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// ScanString is the string-input convenience wrapper around ScanBytes.
|
||||
// Identical semantics — the body never copies, []byte(s) is a
|
||||
// zero-copy reinterpret for the regex matcher.
|
||||
func ScanString(s string) (*Match, error) {
|
||||
return ScanBytes([]byte(s))
|
||||
}
|
||||
|
||||
// Match describes which pattern caught a value. Deliberately does
|
||||
// NOT include the matched substring — callers must not echo it.
|
||||
type Match struct {
|
||||
// Name is the pattern's kebab-case identifier (e.g. "github-pat-classic").
|
||||
Name string
|
||||
// Description is the human-readable line for UI / log surfaces.
|
||||
Description string
|
||||
}
|
||||
@@ -0,0 +1,189 @@
|
||||
package secrets
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestEveryPatternCompiles pins that every Pattern.regexSource is a
|
||||
// valid Go-RE2 expression. Without this, a bad regex would silently
|
||||
// disable ScanBytes for everything after it (the lazy compile would
|
||||
// set compileErr and ScanBytes would return that error every call).
|
||||
func TestEveryPatternCompiles(t *testing.T) {
|
||||
for _, p := range Patterns {
|
||||
if p.Name == "" {
|
||||
t.Errorf("pattern with empty Name: regex=%q", p.regexSource)
|
||||
}
|
||||
if p.Description == "" {
|
||||
t.Errorf("pattern %q has empty Description", p.Name)
|
||||
}
|
||||
}
|
||||
// Force compile + check error.
|
||||
if _, err := ScanBytes([]byte("placeholder")); err != nil {
|
||||
t.Fatalf("ScanBytes init failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestNoDuplicateNames — a duplicate pattern Name would make the
|
||||
// "first match wins" semantics surprising to readers and any caller
|
||||
// switching on Match.Name (none today but adding the guard is cheap).
|
||||
func TestNoDuplicateNames(t *testing.T) {
|
||||
seen := map[string]bool{}
|
||||
for _, p := range Patterns {
|
||||
if seen[p.Name] {
|
||||
t.Errorf("duplicate pattern Name: %q", p.Name)
|
||||
}
|
||||
seen[p.Name] = true
|
||||
}
|
||||
}
|
||||
|
||||
// TestKnownPatternsAllPresent — pins which specific Name values are
|
||||
// expected. A future refactor that renames or removes one without
|
||||
// updating consumers (CI workflow, runtime pre-commit hook, Files
|
||||
// API Phase 2b backend) would silently widen the leak surface.
|
||||
// Failing here forces the rename to be intentional.
|
||||
func TestKnownPatternsAllPresent(t *testing.T) {
|
||||
expected := []string{
|
||||
"github-pat-classic",
|
||||
"github-app-installation-token",
|
||||
"github-oauth-user-to-server",
|
||||
"github-oauth-user",
|
||||
"github-oauth-refresh",
|
||||
"github-pat-fine-grained",
|
||||
"anthropic-api-key",
|
||||
"openai-project-key",
|
||||
"openai-service-account-key",
|
||||
"minimax-api-key",
|
||||
"slack-token",
|
||||
"aws-access-key-id",
|
||||
"aws-sts-temp-access-key-id",
|
||||
}
|
||||
got := map[string]bool{}
|
||||
for _, p := range Patterns {
|
||||
got[p.Name] = true
|
||||
}
|
||||
for _, want := range expected {
|
||||
if !got[want] {
|
||||
t.Errorf("expected pattern %q missing from Patterns slice", want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestPositiveMatches — for each pattern, supply a representative
|
||||
// shape and assert ScanBytes returns a Match with the right Name.
|
||||
// These are TEST FIXTURES, not real credentials — each is the
|
||||
// pattern's prefix + a long-enough trailing run of placeholder chars.
|
||||
// `EXAMPLE` is sprinkled in to make grep-finds in CI logs obviously
|
||||
// fake to a human reader (matches saved memory
|
||||
// feedback_assert_exact_not_substring: tighten by Name not body).
|
||||
func TestPositiveMatches(t *testing.T) {
|
||||
cases := []struct {
|
||||
fixture string
|
||||
expectedName string
|
||||
}{
|
||||
{"ghp_EXAMPLE111122223333444455556666777788889999", "github-pat-classic"},
|
||||
{"ghs_EXAMPLE111122223333444455556666777788889999", "github-app-installation-token"},
|
||||
{"gho_EXAMPLE111122223333444455556666777788889999", "github-oauth-user-to-server"},
|
||||
{"ghu_EXAMPLE111122223333444455556666777788889999", "github-oauth-user"},
|
||||
{"ghr_EXAMPLE111122223333444455556666777788889999", "github-oauth-refresh"},
|
||||
{"github_pat_EXAMPLE" + strings.Repeat("1", 80), "github-pat-fine-grained"},
|
||||
{"sk-ant-EXAMPLE" + strings.Repeat("1", 40), "anthropic-api-key"},
|
||||
{"sk-proj-EXAMPLE" + strings.Repeat("1", 40), "openai-project-key"},
|
||||
{"sk-svcacct-EXAMPLE" + strings.Repeat("1", 40), "openai-service-account-key"},
|
||||
{"sk-cp-EXAMPLE" + strings.Repeat("1", 60), "minimax-api-key"},
|
||||
{"xoxb-" + strings.Repeat("a", 25), "slack-token"},
|
||||
{"xoxa-" + strings.Repeat("a", 25), "slack-token"},
|
||||
// AWS regex requires [0-9A-Z]{16} — uppercase + digits only.
|
||||
{"AKIA1234567890ABCDEF", "aws-access-key-id"},
|
||||
{"ASIA1234567890ABCDEF", "aws-sts-temp-access-key-id"},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.expectedName, func(t *testing.T) {
|
||||
m, err := ScanBytes([]byte(tc.fixture))
|
||||
if err != nil {
|
||||
t.Fatalf("ScanBytes(%q) errored: %v", tc.fixture, err)
|
||||
}
|
||||
if m == nil {
|
||||
t.Fatalf("ScanBytes(%q) returned no match — expected %q", tc.fixture, tc.expectedName)
|
||||
}
|
||||
if m.Name != tc.expectedName {
|
||||
t.Errorf("ScanBytes(%q) matched %q; expected %q", tc.fixture, m.Name, tc.expectedName)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestNegativeShapes — strings that look credential-adjacent but
|
||||
// shouldn't match (too short, wrong prefix, missing trailing bytes).
|
||||
// Failing here means a pattern is too loose, which would generate
|
||||
// false-positive denial in Files API and false-positive workflow
|
||||
// failures in CI.
|
||||
func TestNegativeShapes(t *testing.T) {
|
||||
cases := []string{
|
||||
// Too-short variants — anchored on the length suffix.
|
||||
"ghp_tooshort",
|
||||
"ghs_alsoshort1234",
|
||||
"github_pat_short",
|
||||
"sk-ant-short",
|
||||
"sk-cp-not-enough-bytes-here",
|
||||
// Looks like one of the prefixes but isn't (different letter).
|
||||
"gha_EXAMPLE_thirty_six_or_more_chars_here_xxx",
|
||||
// Slack family — wrong letter after xox.
|
||||
"xoxz-aaaaaaaaaaaaaaaaaaaaaaaaa",
|
||||
// AWS-shaped but wrong length suffix.
|
||||
"AKIATOOSHORT",
|
||||
// Empty / whitespace.
|
||||
"",
|
||||
" ",
|
||||
// Plain prose mentioning the prefix as part of a longer word.
|
||||
"see also `ghp_HOWTO.md` in the repo",
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(c, func(t *testing.T) {
|
||||
m, err := ScanBytes([]byte(c))
|
||||
if err != nil {
|
||||
t.Fatalf("ScanBytes(%q) errored: %v", c, err)
|
||||
}
|
||||
if m != nil {
|
||||
t.Errorf("ScanBytes(%q) unexpectedly matched %q", c, m.Name)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestScanString_NoOp — sanity-check ScanString is the zero-copy
|
||||
// wrapper around ScanBytes. Without this, a future refactor that
|
||||
// makes ScanString do its own thing (e.g. accidentally normalise
|
||||
// case) would diverge silently.
|
||||
func TestScanString_NoOp(t *testing.T) {
|
||||
in := "ghp_EXAMPLE111122223333444455556666777788889999"
|
||||
m1, err1 := ScanBytes([]byte(in))
|
||||
if err1 != nil {
|
||||
t.Fatalf("ScanBytes errored: %v", err1)
|
||||
}
|
||||
m2, err2 := ScanString(in)
|
||||
if err2 != nil {
|
||||
t.Fatalf("ScanString errored: %v", err2)
|
||||
}
|
||||
if m1 == nil || m2 == nil {
|
||||
t.Fatalf("expected matches; got bytes=%+v string=%+v", m1, m2)
|
||||
}
|
||||
if m1.Name != m2.Name {
|
||||
t.Errorf("ScanString and ScanBytes returned different Names: %q vs %q", m1.Name, m2.Name)
|
||||
}
|
||||
}
|
||||
|
||||
// TestMatch_NoRoundtrip — assert the Match struct does NOT include
|
||||
// the matched substring as a field. Adding such a field would
|
||||
// regress the "matched bytes never leave ScanBytes" invariant that
|
||||
// makes this package safe to call from log/UI surfaces. This is a
|
||||
// reflection-light contract test — checks the field names statically.
|
||||
func TestMatch_NoRoundtrip(t *testing.T) {
|
||||
var m Match
|
||||
// If someone adds a `Matched string` (or similar) field, this
|
||||
// test reads as the canonical place to update + reconsider.
|
||||
_ = m.Name
|
||||
_ = m.Description
|
||||
// The two-field shape is part of the public contract; new fields
|
||||
// require deliberation about whether they leak the secret value.
|
||||
}
|
||||
Reference in New Issue
Block a user