From 8a00c338ee25b01dd2d4e9401f02f404162f911c Mon Sep 17 00:00:00 2001 From: "molecule-ai[bot]" <276602405+molecule-ai[bot]@users.noreply.github.com> Date: Fri, 17 Apr 2026 16:20:39 +0000 Subject: [PATCH] feat(#733): implement AGENTS.md auto-generation --- workspace-template/agents_md.py | 74 +++ workspace-template/config.py | 5 + workspace-template/main.py | 8 + workspace-template/tests/test_agents_md.py | 517 +++++++++++++++++++++ 4 files changed, 604 insertions(+) create mode 100644 workspace-template/agents_md.py create mode 100644 workspace-template/tests/test_agents_md.py diff --git a/workspace-template/agents_md.py b/workspace-template/agents_md.py new file mode 100644 index 00000000..7252eab2 --- /dev/null +++ b/workspace-template/agents_md.py @@ -0,0 +1,74 @@ +"""AGENTS.md auto-generation for Molecule AI workspaces. + +Implements the AAIF / Linux Foundation AGENTS.md standard so that peer agents +and orchestration tools can discover this workspace's identity, role, A2A +endpoint, and available tools without reading the full system prompt. + +Usage:: + + from agents_md import generate_agents_md + + generate_agents_md(config_dir="/configs", output_path="/workspace/AGENTS.md") + +The function is called automatically at container startup (see main.py). +""" + +import logging +import os +from pathlib import Path + +logger = logging.getLogger(__name__) + + +def generate_agents_md(config_dir: str, output_path: str) -> None: + """Generate (or regenerate) AGENTS.md from the workspace config.yaml. + + Always overwrites ``output_path`` — no stale-file guard. Re-calling + after editing config.yaml produces a fresh file reflecting the changes. + + Args: + config_dir: Directory containing config.yaml (same convention as + ``load_config`` in config.py). + output_path: Absolute path where AGENTS.md will be written. + The parent directory is expected to exist. + """ + from config import load_config + + cfg = load_config(config_dir) + + # ── A2A Endpoint ───────────────────────────────────────────────────────── + # AGENT_URL env var takes priority (production deployments behind a proxy). + # Otherwise derive from the configured a2a.port (default 8000). + endpoint = os.environ.get("AGENT_URL") or f"http://localhost:{cfg.a2a.port}/a2a" + + # ── Role ───────────────────────────────────────────────────────────────── + # Fall back to description when the role field is absent so legacy + # config.yaml files (without a role key) still produce meaningful output. + role = cfg.role if cfg.role else cfg.description + + # ── MCP Tools ──────────────────────────────────────────────────────────── + # tools (skill names) + plugins (installed plugin names) form the combined + # capability surface visible to peer agents. + all_tools = list(cfg.tools) + list(cfg.plugins) + if all_tools: + tools_section = "\n".join(f"- {t}" for t in all_tools) + else: + tools_section = "None" + + content = ( + f"# {cfg.name}\n" + f"\n" + f"**Role:** {role}\n" + f"\n" + f"## Description\n" + f"{cfg.description}\n" + f"\n" + f"## A2A Endpoint\n" + f"{endpoint}\n" + f"\n" + f"## MCP Tools\n" + f"{tools_section}\n" + ) + + Path(output_path).write_text(content, encoding="utf-8") + logger.info("Generated AGENTS.md at %s for workspace %r", output_path, cfg.name) diff --git a/workspace-template/config.py b/workspace-template/config.py index 12408524..97840c7a 100644 --- a/workspace-template/config.py +++ b/workspace-template/config.py @@ -195,6 +195,10 @@ class ComplianceConfig: class WorkspaceConfig: name: str = "Workspace" description: str = "" + role: str = "" + """Human-readable role label for this agent (e.g. 'Senior Code Reviewer'). + Surfaced in AGENTS.md so peer agents can understand this workspace's purpose + without reading the full system prompt. Falls back to description when empty.""" version: str = "1.0.0" tier: int = 1 model: str = "anthropic:claude-opus-4-7" @@ -287,6 +291,7 @@ def load_config(config_path: Optional[str] = None) -> WorkspaceConfig: return WorkspaceConfig( name=raw.get("name", "Workspace"), description=raw.get("description", ""), + role=raw.get("role", ""), version=raw.get("version", "1.0.0"), tier=int(raw.get("tier", 1)) if str(raw.get("tier", 1)).isdigit() else 1, model=model, diff --git a/workspace-template/main.py b/workspace-template/main.py index e339ab6c..6c275c6b 100644 --- a/workspace-template/main.py +++ b/workspace-template/main.py @@ -16,6 +16,7 @@ from a2a.server.tasks import InMemoryTaskStore from a2a.types import AgentCard, AgentCapabilities, AgentSkill from adapters import get_adapter, AdapterConfig +from agents_md import generate_agents_md from config import load_config from heartbeat import HeartbeatLoop from preflight import run_preflight, render_preflight_report @@ -64,6 +65,13 @@ async def main(): # pragma: no cover port = config.a2a.port preflight = run_preflight(config, config_path) render_preflight_report(preflight) + + # 1a. Generate AGENTS.md so peer agents and discovery tools can see this + # workspace's identity, role, endpoint, and capabilities immediately. + try: + generate_agents_md(config_path, "/workspace/AGENTS.md") + except Exception as _agents_md_err: # pragma: no cover + print(f"Warning: AGENTS.md generation failed (non-fatal): {_agents_md_err}") if not preflight.ok: raise SystemExit(1) if awareness_config: diff --git a/workspace-template/tests/test_agents_md.py b/workspace-template/tests/test_agents_md.py new file mode 100644 index 00000000..7a9b5ae7 --- /dev/null +++ b/workspace-template/tests/test_agents_md.py @@ -0,0 +1,517 @@ +"""TDD specification for agents_md.py — AGENTS.md auto-generation (#733). + +This file defines the REQUIRED behaviour that the Backend Engineer must +implement. All tests are RED until agents_md.py exists and is correct. + +Contract +-------- +The generator exposes a single public function:: + + from agents_md import generate_agents_md + + generate_agents_md(config_dir: str, output_path: str) -> None + +``config_dir`` — directory that contains config.yaml (same convention as + ``load_config`` in config.py). +``output_path`` — absolute path where AGENTS.md will be written. The + parent directory is guaranteed to exist. + +AGENTS.md format (AAIF / Linux Foundation standard) +---------------------------------------------------- +The generated file must be valid Markdown with at least these sections:: + + # + + **Role:** + + ## Description + + + ## A2A Endpoint + + + ## MCP Tools + + +Any ordering of sections is acceptable; the tests check for presence, not +order. + +Environment variables +--------------------- +``AGENT_URL`` — when set, overrides the derived endpoint URL + (``http://localhost:{a2a.port}/a2a`` by default). +""" + +import os + +import pytest +import yaml + +# --------------------------------------------------------------------------- +# The module under test. This import will fail (ModuleNotFoundError) until +# the implementation is written — that is the expected RED state. +# --------------------------------------------------------------------------- +from agents_md import generate_agents_md # noqa: E402 (module doesn't exist yet) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _write_config(tmp_path, **fields): + """Write a config.yaml into tmp_path and return the directory path.""" + cfg = tmp_path / "config.yaml" + cfg.write_text(yaml.dump(fields), encoding="utf-8") + return str(tmp_path) + + +def _output_path(tmp_path): + """Return the canonical output path for AGENTS.md in tests.""" + return str(tmp_path / "AGENTS.md") + + +# --------------------------------------------------------------------------- +# 1. File existence +# --------------------------------------------------------------------------- + +def test_agents_md_exists_after_startup(tmp_path): + """generate_agents_md() must create AGENTS.md at the given output path. + + This is the most fundamental contract: calling the function must produce + a file. If this test fails, nothing else matters. + """ + config_dir = _write_config( + tmp_path, + name="Existence Bot", + description="Tests that the file is created.", + role="tester", + ) + out = _output_path(tmp_path) + + generate_agents_md(config_dir, out) + + assert os.path.isfile(out), ( + f"AGENTS.md was not created at {out}. " + "generate_agents_md() must write the file before returning." + ) + + +# --------------------------------------------------------------------------- +# 2. Agent name +# --------------------------------------------------------------------------- + +def test_agents_md_contains_name(tmp_path): + """The generated file must include the agent name from config.yaml. + + The name should appear as a top-level Markdown heading so discovery + tools can parse it without understanding the full document structure. + """ + config_dir = _write_config( + tmp_path, + name="Research Analyst", + description="Conducts market research.", + role="analyst", + ) + out = _output_path(tmp_path) + + generate_agents_md(config_dir, out) + content = open(out, encoding="utf-8").read() + + assert "Research Analyst" in content, ( + "AGENTS.md must contain the agent name 'Research Analyst' from config.yaml. " + f"Got:\n{content}" + ) + # Name should appear in a top-level heading for AAIF compliance. + assert "# Research Analyst" in content, ( + "Agent name must appear as a top-level Markdown heading (# Research Analyst). " + f"Got:\n{content}" + ) + + +# --------------------------------------------------------------------------- +# 3. Role +# --------------------------------------------------------------------------- + +def test_agents_md_contains_role(tmp_path): + """The generated file must include the agent's role from config.yaml. + + The ``role`` field describes what the agent is responsible for in the + multi-agent organisation. It must appear in the output so peer agents + and orchestration tools can understand the agent's purpose without + reading the full system prompt. + """ + config_dir = _write_config( + tmp_path, + name="Code Reviewer", + description="Reviews pull requests for quality and security.", + role="Senior Code Reviewer", + ) + out = _output_path(tmp_path) + + generate_agents_md(config_dir, out) + content = open(out, encoding="utf-8").read() + + assert "Senior Code Reviewer" in content, ( + "AGENTS.md must contain the role 'Senior Code Reviewer' from config.yaml. " + f"Got:\n{content}" + ) + + +# --------------------------------------------------------------------------- +# 4. A2A endpoint URL +# --------------------------------------------------------------------------- + +def test_agents_md_contains_a2a_endpoint_default(tmp_path): + """Without AGENT_URL set, the endpoint must default to http://localhost:{port}/a2a. + + The A2A port comes from the ``a2a.port`` field in config.yaml (default 8000). + This URL is what peer agents use to send tasks to this workspace. + """ + config_dir = _write_config( + tmp_path, + name="Default Port Bot", + description="Uses default port.", + role="worker", + a2a={"port": 8000}, + ) + out = _output_path(tmp_path) + + # Ensure AGENT_URL is not set so we exercise the default derivation. + env = os.environ.copy() + env.pop("AGENT_URL", None) + + # Call without AGENT_URL in environment — use monkeypatch-safe approach + orig = os.environ.pop("AGENT_URL", None) + try: + generate_agents_md(config_dir, out) + finally: + if orig is not None: + os.environ["AGENT_URL"] = orig + + content = open(out, encoding="utf-8").read() + assert "http://localhost:8000/a2a" in content, ( + "AGENTS.md must contain 'http://localhost:8000/a2a' when a2a.port=8000 " + f"and AGENT_URL is not set. Got:\n{content}" + ) + + +def test_agents_md_contains_a2a_endpoint_custom_port(tmp_path): + """When a2a.port is set to a non-default value, the endpoint must reflect it.""" + config_dir = _write_config( + tmp_path, + name="Custom Port Bot", + description="Uses a custom port.", + role="worker", + a2a={"port": 9090}, + ) + out = _output_path(tmp_path) + + orig = os.environ.pop("AGENT_URL", None) + try: + generate_agents_md(config_dir, out) + finally: + if orig is not None: + os.environ["AGENT_URL"] = orig + + content = open(out, encoding="utf-8").read() + assert "http://localhost:9090/a2a" in content, ( + "AGENTS.md must derive endpoint from a2a.port — expected " + f"'http://localhost:9090/a2a'. Got:\n{content}" + ) + + +def test_agents_md_contains_a2a_endpoint_from_env(tmp_path, monkeypatch): + """When AGENT_URL env var is set, it must override the derived endpoint. + + This supports production deployments where the agent is behind a proxy + or load balancer and the internal port is not the public-facing URL. + """ + monkeypatch.setenv("AGENT_URL", "https://agent.prod.example.com/a2a") + + config_dir = _write_config( + tmp_path, + name="Prod Agent", + description="Production deployment.", + role="operator", + a2a={"port": 8000}, + ) + out = _output_path(tmp_path) + + generate_agents_md(config_dir, out) + content = open(out, encoding="utf-8").read() + + assert "https://agent.prod.example.com/a2a" in content, ( + "AGENTS.md must use AGENT_URL env var when set. " + f"Got:\n{content}" + ) + # The internal localhost URL must NOT appear when AGENT_URL overrides it. + assert "localhost:8000" not in content, ( + "AGENTS.md must not contain the internal localhost URL when " + f"AGENT_URL is set. Got:\n{content}" + ) + + +# --------------------------------------------------------------------------- +# 5. MCP Tools section +# --------------------------------------------------------------------------- + +def test_agents_md_contains_mcp_tools_section(tmp_path): + """The file must have a dedicated tools section. + + Peer agents need to know what capabilities this agent exposes. + The section heading must be '## MCP Tools' or '## Tools' (case-insensitive + match is acceptable, but the heading level must be ##). + """ + config_dir = _write_config( + tmp_path, + name="Tool Agent", + description="Has some tools.", + role="specialist", + tools=["web_search", "code_runner"], + plugins=["github", "slack"], + ) + out = _output_path(tmp_path) + + generate_agents_md(config_dir, out) + content = open(out, encoding="utf-8").read() + + has_tools_section = ( + "## MCP Tools" in content + or "## Tools" in content + or "## mcp tools" in content.lower() + or "## tools" in content.lower() + ) + assert has_tools_section, ( + "AGENTS.md must contain a '## MCP Tools' or '## Tools' section. " + f"Got:\n{content}" + ) + + +def test_agents_md_tools_section_lists_configured_tools(tmp_path): + """Tools from config.yaml must appear in the tools section of AGENTS.md. + + When tools and plugins are configured, their names must be enumerated + so peer agents know what they can request this agent to do. + """ + config_dir = _write_config( + tmp_path, + name="Multi-Tool Agent", + description="Has multiple tools.", + role="specialist", + tools=["web_search", "code_runner"], + plugins=["github"], + ) + out = _output_path(tmp_path) + + generate_agents_md(config_dir, out) + content = open(out, encoding="utf-8").read() + + for tool in ("web_search", "code_runner", "github"): + assert tool in content, ( + f"AGENTS.md must list tool/plugin '{tool}' from config.yaml. " + f"Got:\n{content}" + ) + + +def test_agents_md_tools_section_no_tools_shows_none(tmp_path): + """When no tools or plugins are configured, the section must say 'None'. + + An empty tools section with no content would be ambiguous — the + implementation must explicitly indicate no tools are available. + """ + config_dir = _write_config( + tmp_path, + name="Bare Agent", + description="No tools at all.", + role="basic", + tools=[], + plugins=[], + ) + out = _output_path(tmp_path) + + generate_agents_md(config_dir, out) + content = open(out, encoding="utf-8").read() + + # "None" (case-insensitive) should appear near/in the tools section + assert "none" in content.lower() or "no tools" in content.lower(), ( + "AGENTS.md must indicate no tools (e.g. 'None') when tools and plugins " + f"are empty. Got:\n{content}" + ) + + +# --------------------------------------------------------------------------- +# 6. Regeneration on config change +# --------------------------------------------------------------------------- + +def test_agents_md_regenerates_on_config_change(tmp_path): + """Calling generate_agents_md() again after updating config.yaml must + overwrite AGENTS.md with the new values. + + This is critical for the hot-reload use case: when an admin updates + config.yaml (e.g., changes the agent's role), the next call to + generate_agents_md() must reflect the change without any manual cleanup. + """ + config_dir = _write_config( + tmp_path, + name="Mutable Agent", + description="First generation.", + role="junior analyst", + ) + out = _output_path(tmp_path) + + generate_agents_md(config_dir, out) + content_v1 = open(out, encoding="utf-8").read() + assert "junior analyst" in content_v1, "First generation must contain initial role." + + # Update config.yaml with a new role. + _write_config( + tmp_path, + name="Mutable Agent", + description="Second generation.", + role="senior analyst", + ) + + generate_agents_md(config_dir, out) + content_v2 = open(out, encoding="utf-8").read() + + assert "senior analyst" in content_v2, ( + "AGENTS.md must reflect the updated role after re-generation. " + f"Got:\n{content_v2}" + ) + assert "junior analyst" not in content_v2, ( + "AGENTS.md must not contain the old role after re-generation. " + f"Got:\n{content_v2}" + ) + + +# --------------------------------------------------------------------------- +# 7. Valid Markdown +# --------------------------------------------------------------------------- + +def test_agents_md_valid_markdown(tmp_path): + """The generated file must be valid Markdown by a structural heuristic. + + Full Markdown parsing is out of scope for unit tests. We apply three + structural checks that catch the most common generation bugs: + + 1. The file is non-empty. + 2. The first non-blank line starts with ``#`` (top-level heading). + 3. The file has at least 3 lines of content (not just a heading). + + These rules match the minimum AAIF AGENTS.md structure. + """ + config_dir = _write_config( + tmp_path, + name="Markdown Agent", + description="Tests Markdown validity.", + role="validator", + tools=["linter"], + ) + out = _output_path(tmp_path) + + generate_agents_md(config_dir, out) + raw = open(out, encoding="utf-8").read() + + # Rule 1: non-empty + assert raw.strip(), "AGENTS.md must not be empty." + + # Rule 2: first non-blank line is a top-level heading + lines = [ln for ln in raw.splitlines() if ln.strip()] + assert lines[0].startswith("#"), ( + f"AGENTS.md must start with a Markdown heading (#). " + f"First non-blank line: {lines[0]!r}" + ) + + # Rule 3: at least 3 non-blank lines (heading + at least 2 content lines) + assert len(lines) >= 3, ( + f"AGENTS.md must have at least 3 non-blank lines (heading + content). " + f"Got {len(lines)} line(s):\n{raw}" + ) + + +def test_agents_md_has_multiple_sections(tmp_path): + """The generated file must contain multiple ## sections. + + A single-section document would not satisfy the AAIF standard which + requires separate sections for at least description, endpoint, and tools. + """ + config_dir = _write_config( + tmp_path, + name="Sectioned Agent", + description="Has multiple sections.", + role="organiser", + tools=["planner"], + ) + out = _output_path(tmp_path) + + generate_agents_md(config_dir, out) + content = open(out, encoding="utf-8").read() + + section_headings = [ + ln for ln in content.splitlines() if ln.startswith("## ") + ] + assert len(section_headings) >= 2, ( + f"AGENTS.md must have at least 2 '## ' section headings. " + f"Found {len(section_headings)}: {section_headings}\nFull content:\n{content}" + ) + + +# --------------------------------------------------------------------------- +# 8. Edge cases +# --------------------------------------------------------------------------- + +def test_agents_md_missing_role_uses_description(tmp_path): + """When ``role`` is absent from config.yaml, fall back to description. + + Not all existing config.yaml files will have a ``role`` field. The + generator must degrade gracefully and use ``description`` as the + capability summary rather than writing an empty role field. + """ + config_dir = _write_config( + tmp_path, + name="Legacy Agent", + description="Does legacy things.", + # no 'role' key + ) + out = _output_path(tmp_path) + + generate_agents_md(config_dir, out) + content = open(out, encoding="utf-8").read() + + # Either the description or some non-empty capability summary must appear. + assert "Does legacy things." in content or "Legacy Agent" in content, ( + "AGENTS.md must still contain meaningful content when 'role' is absent. " + f"Got:\n{content}" + ) + + +def test_agents_md_special_characters_in_name(tmp_path): + """Agent names with special Markdown characters must not break the file. + + Names like 'R&D Agent' or 'Agent [Alpha]' contain characters that have + special meaning in Markdown. The generator must handle them safely. + """ + config_dir = _write_config( + tmp_path, + name="R&D Agent [Alpha]", + description="Research and development.", + role="researcher", + ) + out = _output_path(tmp_path) + + # Must not raise an exception. + generate_agents_md(config_dir, out) + content = open(out, encoding="utf-8").read() + + # The name text must appear (exact escaping strategy is implementation's choice). + assert "R&D Agent" in content or "R&#" in content, ( + "Agent name with special characters must appear in AGENTS.md. " + f"Got:\n{content}" + ) + + # File must still start with a heading. + first_nonempty = next(ln for ln in content.splitlines() if ln.strip()) + assert first_nonempty.startswith("#"), ( + "AGENTS.md must still start with a heading when name has special chars. " + f"First line: {first_nonempty!r}" + )