2026-05-12 07:18:51 +00:00
3 changed files with 996 additions and 0 deletions
--- a/.gitea/scripts/lint_continue_on_error_tracking.py
+++ b/.gitea/scripts/lint_continue_on_error_tracking.py
@ -0,0 +1,436 @@
+#!/usr/bin/env python3
+"""lint_continue_on_error_tracking — Tier 2e per internal#350.
+
+Rule
+----
+Every `continue-on-error: true` directive in `.gitea/workflows/*.yml`
+must be accompanied by a tracker reference comment within 2 lines
+(above OR below the directive's line). The reference is one of:
+
+  * `# mc#NNNN`          — molecule-core issue
+  * `# internal#NNNN`    — molecule-ai/internal issue
+
+The referenced issue must satisfy ALL of:
+
+  1. Exists (HTTP 200 on `/repos/{owner}/{name}/issues/{num}`)
+  2. `state == "open"`
+  3. `created_at` is ≤ MAX_AGE_DAYS days ago (default 14)
+
+A passing reference establishes an audit trail and a forced renewal
+cadence — after 14 days the issue must either be CLOSED (the masked
+defect was fixed) or the comment must point at a NEW tracker
+(deliberate decision to keep masking, requires a paper-trail).
+
+The class this prevents
+-----------------------
+Phase-3-masked failures. `continue-on-error: true` on `platform-build`
+had been hiding mc#664-class regressions for ~3 weeks before #656
+surfaced them on 2026-05-12. A 14-day cap forces a tracker review
+cycle and surfaces mask-drift within at most 14 days of the original
+defect.
+
+Behaviour-based gate
+--------------------
+We parse via PyYAML AST (per `feedback_behavior_based_ast_gates`) to
+detect `continue-on-error: <truthy>` at job-key level, then map each
+location back to its source line via PyYAML's line-tracking loader.
+Comments are scanned from the raw text within a 2-line window of
+that source line. Reformatting (block-scalar vs flow-style) does not
+break the rule because the source-line anchor is the directive's
+own line.
+
+Exit codes
+----------
+  0 — every `continue-on-error: true` has a passing tracker, OR
+      the issue-API endpoint returned 403/404 (token-scope; graceful
+      degrade per Tier 2a contract — surface via ::error:: on stderr
+      but don't red-X every PR over auth).
+  1 — at least one violation (missing/closed/too-old/non-existent
+      tracker).
+  2 — env contract violation, YAML parse error, or workflows-dir
+      missing.
+
+Env
+---
+  GITEA_TOKEN     — read scope on the configured repos.
+                    Auto-injected `GITHUB_TOKEN` works for same-repo
+                    issue reads; for `internal#NNN` we need a token
+                    with `molecule-ai/internal` read scope. Use
+                    DRIFT_BOT_TOKEN (same persona as other Tier 2
+                    lints).
+  GITEA_HOST      — e.g. git.moleculesai.app
+  REPO            — `owner/name` for `mc#NNNN` lookups
+  INTERNAL_REPO   — `owner/name` for `internal#NNNN` lookups
+                    (defaults to derived `molecule-ai/internal`)
+  WORKFLOWS_DIR   — defaults to `.gitea/workflows`
+  MAX_AGE_DAYS    — defaults to 14
+
+Memory cross-links
+------------------
+  - internal#350 (the RFC that specs this lint)
+  - mc#664 (the masked-3-weeks empirical case)
+  - feedback_chained_defects_in_never_tested_workflows
+  - feedback_behavior_based_ast_gates
+  - feedback_strict_root_only_after_class_a
+"""
+from __future__ import annotations
+
+import json
+import os
+import re
+import sys
+import urllib.error
+import urllib.parse
+import urllib.request
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+from typing import Any
+
+try:
+    import yaml
+except ImportError:
+    sys.stderr.write(
+        "::error::PyYAML is required. Install with: pip install PyYAML\n"
+    )
+    sys.exit(2)
+
+
+# ---------------------------------------------------------------------------
+# Tracker comment regex.
+# Matches: `# mc#1234`, `# internal#42`, `# mc#1234 - description`
+# Does NOT match: `# mc1234` (missing inner #), `mc#1234` (no leading
+# `#` comment marker), `# MC#1234` (case-sensitive — `mc` and `internal`
+# are conventional lower-case repo slugs).
+TRACKER_RE = re.compile(
+    r"#\s*(?P<slug>mc|internal)#(?P<num>\d+)\b"
+)
+
+# Truthy continue-on-error values we treat as "true". PyYAML decodes
+# `continue-on-error: true` to Python `True`. `continue-on-error: "true"`
+# decodes to the string "true" — Gitea's evaluator coerces strings,
+# so we treat string-`"true"` (case-insensitive) as truthy too.
+def _is_truthy_coe(v: Any) -> bool:
+    if v is True:
+        return True
+    if isinstance(v, str) and v.strip().lower() == "true":
+        return True
+    return False
+
+
+# ---------------------------------------------------------------------------
+# Env contract
+# ---------------------------------------------------------------------------
+def _env(key: str, default: str | None = None) -> str:
+    v = os.environ.get(key, default)
+    return v if v is not None else ""
+
+
+def _require_env(key: str) -> str:
+    v = os.environ.get(key)
+    if not v:
+        sys.stderr.write(f"::error::missing required env var: {key}\n")
+        sys.exit(2)
+    return v
+
+
+# ---------------------------------------------------------------------------
+# PyYAML line-tracking loader. yaml.SafeLoader nodes carry
+# `start_mark.line` (0-based); using construct_mapping with `deep=True`
+# preserves that on every node. We need the line of each
+# `continue-on-error` key so we can scan the source for comments
+# near it.
+# ---------------------------------------------------------------------------
+class _LineLoader(yaml.SafeLoader):
+    """SafeLoader that annotates every dict with `__line__: {key: line}`."""
+
+
+def _construct_mapping(loader: yaml.SafeLoader, node: yaml.MappingNode) -> dict:
+    mapping = loader.construct_mapping(node, deep=True)
+    # Annotate per-key source lines so we can locate `continue-on-error`.
+    lines: dict[str, int] = {}
+    for k_node, _v_node in node.value:
+        try:
+            key = loader.construct_object(k_node, deep=True)
+        except Exception:
+            continue
+        if isinstance(key, (str, int, bool)):
+            lines[str(key)] = k_node.start_mark.line + 1  # 1-based
+    if isinstance(mapping, dict):
+        mapping["__lines__"] = lines
+    return mapping
+
+
+_LineLoader.add_constructor(
+    yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, _construct_mapping
+)
+
+
+# ---------------------------------------------------------------------------
+# Issue lookup
+# ---------------------------------------------------------------------------
+def fetch_issue(slug_kind: str, num: int) -> tuple[str, dict | None]:
+    """Return `(status, payload_or_none)`.
+
+    status ∈ {"ok", "not_found", "forbidden", "error"}.
+    """
+    repo = (
+        _env("REPO") if slug_kind == "mc" else _env("INTERNAL_REPO")
+    )
+    if not repo:
+        # Fall through gracefully — caller treats as 403 (token-scope).
+        return ("forbidden", None)
+    host = _env("GITEA_HOST")
+    token = _env("GITEA_TOKEN")
+    url = f"https://{host}/api/v1/repos/{repo}/issues/{num}"
+    req = urllib.request.Request(
+        url,
+        headers={
+            "Authorization": f"token {token}",
+            "Accept": "application/json",
+        },
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=20) as resp:
+            return ("ok", json.loads(resp.read()))
+    except urllib.error.HTTPError as e:
+        if e.code == 404:
+            return ("not_found", None)
+        if e.code in (401, 403):
+            return ("forbidden", None)
+        return ("error", None)
+    except (urllib.error.URLError, TimeoutError, json.JSONDecodeError):
+        return ("error", None)
+
+
+# ---------------------------------------------------------------------------
+# Locate every continue-on-error: <truthy> in a workflow doc, with line.
+# ---------------------------------------------------------------------------
+def find_coe_truthies(
+    doc: Any, raw_lines: list[str]
+) -> list[tuple[str, int]]:
+    """Return list of (job_key, source_line_1based).
+
+    `doc` is the LineLoader-parsed mapping. We descend `jobs.<key>` and
+    return only those whose value is truthy per `_is_truthy_coe`.
+    Job-step continue-on-error is intentionally NOT considered: it
+    suppresses step-level failure rollup only, not job-level. The
+    masking class this lint targets is the job-level rollup.
+    """
+    out: list[tuple[str, int]] = []
+    if not isinstance(doc, dict):
+        return out
+    jobs = doc.get("jobs")
+    if not isinstance(jobs, dict):
+        return out
+    for jkey, jbody in jobs.items():
+        if jkey == "__lines__":
+            continue
+        if not isinstance(jbody, dict):
+            continue
+        if "continue-on-error" not in jbody:
+            continue
+        v = jbody["continue-on-error"]
+        if not _is_truthy_coe(v):
+            continue
+        line = jbody.get("__lines__", {}).get("continue-on-error")
+        if not line:
+            # PyYAML line-tracking shouldn't miss but guard for safety.
+            # Fall back to grepping the raw text.
+            line = _grep_first_coe_line(raw_lines, jkey) or 1
+        out.append((str(jkey), int(line)))
+    return out
+
+
+def _grep_first_coe_line(raw_lines: list[str], jkey: str) -> int | None:
+    """Fallback: find the first `continue-on-error:` line after a `jkey:` line."""
+    saw_job = False
+    for i, line in enumerate(raw_lines, start=1):
+        if re.match(rf"^\s*{re.escape(jkey)}\s*:", line):
+            saw_job = True
+            continue
+        if saw_job and "continue-on-error" in line:
+            return i
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Scan window for tracker comment
+# ---------------------------------------------------------------------------
+WINDOW = 2  # lines above OR below the directive's line (inclusive)
+
+
+def find_tracker_in_window(
+    raw_lines: list[str], line_1based: int
+) -> tuple[str, int] | None:
+    """Return (slug, num) if a `# mc#NNN`/`# internal#NNN` appears
+    in raw_lines within ±WINDOW lines of `line_1based`. None otherwise.
+
+    We scan the directive's own line (it may carry an inline comment
+    like `continue-on-error: true  # mc#3`) plus ±WINDOW.
+    """
+    lo = max(1, line_1based - WINDOW)
+    hi = min(len(raw_lines), line_1based + WINDOW)
+    for i in range(lo, hi + 1):
+        line = raw_lines[i - 1]
+        # Only the comment portion (after `#`) is considered, so
+        # trailing-inline comments on the directive line are matched.
+        m = TRACKER_RE.search(line)
+        if m:
+            return (m.group("slug"), int(m.group("num")))
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Tracker validation
+# ---------------------------------------------------------------------------
+def validate_tracker(
+    slug: str, num: int, max_age_days: int
+) -> tuple[bool, str]:
+    """Return (ok?, reason). On 403, ok=True is returned with reason
+    explaining graceful-degrade — caller treats 403 as a non-fatal
+    skip (same as Tier 2a contract).
+    """
+    status, payload = fetch_issue(slug, num)
+    if status == "forbidden":
+        sys.stderr.write(
+            f"::error::issue {slug}#{num} unreadable (HTTP 403 — token "
+            f"scope). Cannot validate; skipping this check to avoid "
+            f"red-X on every PR. Fix the token, not the lint.\n"
+        )
+        return (True, "forbidden — skipped")
+    if status == "not_found":
+        return (False, f"{slug}#{num} does not exist (404)")
+    if status == "error":
+        sys.stderr.write(
+            f"::error::issue {slug}#{num} fetch errored — treating as "
+            f"unverified, skipping this check.\n"
+        )
+        return (True, "fetch-error — skipped")
+
+    assert payload is not None
+    state = payload.get("state", "")
+    if state != "open":
+        return (False, f"{slug}#{num} state={state!r} (must be open)")
+
+    created = payload.get("created_at", "")
+    try:
+        # Gitea returns ISO-8601 with timezone; Python 3.11+
+        # fromisoformat handles `Z` suffix natively from 3.11. Older
+        # runtimes need explicit replace.
+        created_dt = datetime.fromisoformat(created.replace("Z", "+00:00"))
+    except ValueError:
+        return (False, f"{slug}#{num} created_at unparseable: {created!r}")
+
+    age = datetime.now(timezone.utc) - created_dt
+    # Inclusive boundary at MAX_AGE_DAYS: `age.days` truncates to a
+    # whole-day floor, so an issue created 14d 0h 5m ago has
+    # `age.days == 14` and passes; one created 15d 0h 0m ago has
+    # `age.days == 15` and fails. This is the convention specified
+    # in internal#350 ("≤14 days old").
+    if age.days > max_age_days:
+        return (
+            False,
+            f"{slug}#{num} is {age.days} days old (>{max_age_days}d cap). "
+            f"Close-or-renew the tracker.",
+        )
+    return (True, f"{slug}#{num} open, {age.days}d old, ≤{max_age_days}d")
+
+
+# ---------------------------------------------------------------------------
+# Driver
+# ---------------------------------------------------------------------------
+def _iter_workflow_files(wf_dir: Path) -> list[Path]:
+    return sorted(list(wf_dir.glob("*.yml")) + list(wf_dir.glob("*.yaml")))
+
+
+def run() -> int:
+    wf_dir = Path(_env("WORKFLOWS_DIR", ".gitea/workflows"))
+    max_age = int(_env("MAX_AGE_DAYS", "14"))
+    # Defaults for INTERNAL_REPO when unset (best-effort guess based on
+    # the convention `mc#` = same repo, `internal#` = molecule-ai/internal).
+    if not os.environ.get("INTERNAL_REPO"):
+        os.environ["INTERNAL_REPO"] = "molecule-ai/internal"
+
+    if not wf_dir.is_dir():
+        sys.stderr.write(
+            f"::error::workflows directory not found: {wf_dir}\n"
+        )
+        return 2
+
+    yml_files = _iter_workflow_files(wf_dir)
+    if not yml_files:
+        print(f"::notice::no workflow files under {wf_dir}; nothing to lint.")
+        return 0
+
+    violations: list[str] = []
+    notices: list[str] = []
+    total_coe_true = 0
+
+    for path in yml_files:
+        raw = path.read_text(encoding="utf-8")
+        raw_lines = raw.splitlines()
+        try:
+            doc = yaml.load(raw, Loader=_LineLoader)
+        except yaml.YAMLError as e:
+            sys.stderr.write(
+                f"::error file={path}::YAML parse error: {e}. Skipping "
+                f"this file (lint-workflow-yaml will catch separately).\n"
+            )
+            continue
+
+        coe_locs = find_coe_truthies(doc, raw_lines)
+        for jkey, line in coe_locs:
+            total_coe_true += 1
+            tracker = find_tracker_in_window(raw_lines, line)
+            if tracker is None:
+                violations.append(
+                    f"::error file={path},line={line}::lint-continue-on-error-"
+                    f"tracking (Tier 2e): job '{jkey}' has "
+                    f"`continue-on-error: true` at line {line} with no "
+                    f"`# mc#NNNN` or `# internal#NNNN` tracker comment "
+                    f"within {WINDOW} lines. Add a tracker reference so "
+                    f"this mask has a forced 14-day renewal cycle. "
+                    f"Memory: feedback_chained_defects_in_never_tested_workflows."
+                )
+                continue
+            slug, num = tracker
+            ok, reason = validate_tracker(slug, num, max_age)
+            if ok:
+                notices.append(
+                    f"::notice::{path.name} job '{jkey}' (line {line}): "
+                    f"{reason}"
+                )
+            else:
+                violations.append(
+                    f"::error file={path},line={line}::lint-continue-on-error-"
+                    f"tracking (Tier 2e): job '{jkey}' "
+                    f"`continue-on-error: true` references {slug}#{num}, "
+                    f"but {reason}. FIX: close/fix the underlying defect "
+                    f"and flip continue-on-error: false, OR file a fresh "
+                    f"tracker and update the comment."
+                )
+
+    for n in notices:
+        print(n)
+
+    if violations:
+        print(
+            f"::error::lint-continue-on-error-tracking: "
+            f"{len(violations)} violation(s) across {len(yml_files)} "
+            f"workflow file(s) (of {total_coe_true} `continue-on-error: "
+            f"true` directives in total)."
+        )
+        for v in violations:
+            print(v)
+        return 1
+
+    print(
+        f"::notice::lint-continue-on-error-tracking: "
+        f"all {total_coe_true} `continue-on-error: true` directive(s) "
+        f"have valid trackers (open, ≤{max_age}d old)."
+    )
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(run())
--- a/.gitea/workflows/lint-continue-on-error-tracking.yml
+++ b/.gitea/workflows/lint-continue-on-error-tracking.yml
@ -0,0 +1,120 @@
+name: lint-continue-on-error-tracking
+
+# Tier 2e hard-gate lint (per internal#350) — every
+# `continue-on-error: true` in `.gitea/workflows/*.yml` must carry a
+# `# mc#NNNN` or `# internal#NNNN` tracker comment within 2 lines,
+# the referenced issue must be OPEN, and ≤14 days old.
+#
+# Why this exists
+# ---------------
+# `continue-on-error: true` on `platform-build` had been hiding
+# mc#664-class regressions for ~3 weeks before #656 surfaced them on
+# 2026-05-12. A 14-day cap on tracker age forces a review cycle and
+# surfaces mask-drift within at most 14 days of the original defect.
+# Each `continue-on-error: true` gets a paper trail — close or renew.
+#
+# How the gate works
+# ------------------
+# 1. Walk `.gitea/workflows/*.yml` via PyYAML's line-tracking loader
+#    (per `feedback_behavior_based_ast_gates`) and find every job
+#    whose `continue-on-error` evaluates truthy (`true` or string
+#    `"true"` — Gitea's evaluator coerces strings).
+# 2. For each, scan ±2 lines of the directive's source line for a
+#    `# mc#NNNN` or `# internal#NNNN` comment. Inline-trailing
+#    comments on the directive line count.
+# 3. For each tracker reference, GET the issue from the Gitea API.
+#    Validate: exists, `state == open`, `created_at` ≤ MAX_AGE_DAYS.
+# 4. Aggregate ALL violations (not short-circuit) and exit 1 if any.
+#
+# Triggers
+# --------
+# Runs on PR events (paths-filter on `.gitea/workflows/**`) AND on
+# a daily schedule. PR runs catch the violation at introduction time.
+# Schedule runs catch the AGE-EXPIRY class: a tracker that was ≤14d
+# old when the PR landed but is now 20d old, with the underlying
+# defect still unfixed. Per `feedback_chained_defects_in_never_tested_workflows`,
+# scheduled drift detection is the second half of the gate.
+#
+# Phase contract (RFC internal#219 §1 ladder)
+# -------------------------------------------
+# Lands at `continue-on-error: true` (Phase 3 — surface broken shapes
+# without blocking). The pre-existing `continue-on-error: true`
+# directives on `main` will all violate this lint at first
+# (intentional — they're the masked defects this lint exists to
+# surface). Each must be triaged: file a fresh tracker comment,
+# close-and-flip, or document the deliberate keep-mask in a fresh
+# 14-day-renewable tracker. After main is clean for 3 days,
+# follow-up PR flips this workflow's continue-on-error to false.
+# Tracking: internal#350.
+#
+# Cross-links
+# -----------
+# - internal#350 (the RFC that specs this lint)
+# - mc#664 (the empirical masked-3-weeks case)
+# - feedback_chained_defects_in_never_tested_workflows
+# - feedback_behavior_based_ast_gates
+# - feedback_strict_root_only_after_class_a
+#
+# Auth: DRIFT_BOT_TOKEN — same persona used by ci-required-drift.yml
+# (provisioned under internal#329). Auto-injected GITHUB_TOKEN is
+# insufficient because `internal#NNN` references cross repositories
+# (molecule-core → molecule-ai/internal).
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened]
+    paths:
+      - '.gitea/workflows/**'
+      - '.gitea/scripts/lint_continue_on_error_tracking.py'
+      - 'tests/test_lint_continue_on_error_tracking.py'
+  push:
+    branches: [main, staging]
+    paths:
+      - '.gitea/workflows/**'
+      - '.gitea/scripts/lint_continue_on_error_tracking.py'
+  schedule:
+    # Daily at 13:11 UTC — off-peak, prime-staggered from the other
+    # Tier-2 lint schedules (ci-required-drift runs hourly :00).
+    - cron: '11 13 * * *'
+  workflow_dispatch:
+
+env:
+  GITHUB_SERVER_URL: https://git.moleculesai.app
+
+permissions:
+  contents: read
+
+concurrency:
+  group: lint-coe-tracking-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  lint:
+    name: lint-continue-on-error-tracking
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    # Phase 3 (RFC #219 §1): surface masked defects without blocking
+    # PRs. Pre-existing continue-on-error: true directives on main
+    # all violate this lint at first — intentional. Flip to false
+    # follow-up after main is clean for 3 days. internal#350.
+    continue-on-error: true
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5.6.0
+        with:
+          python-version: '3.12'
+      - name: Install PyYAML
+        run: python -m pip install --quiet 'PyYAML==6.0.2'
+      - name: Run lint-continue-on-error-tracking
+        env:
+          GITEA_TOKEN: ${{ secrets.DRIFT_BOT_TOKEN }}
+          GITEA_HOST: git.moleculesai.app
+          REPO: ${{ github.repository }}
+          INTERNAL_REPO: molecule-ai/internal
+          WORKFLOWS_DIR: .gitea/workflows
+          MAX_AGE_DAYS: '14'
+        run: python3 .gitea/scripts/lint_continue_on_error_tracking.py
+      - name: Run lint-continue-on-error-tracking unit tests
+        run: |
+          python -m pip install --quiet pytest
+          python3 -m pytest tests/test_lint_continue_on_error_tracking.py -v
--- a/tests/test_lint_continue_on_error_tracking.py
+++ b/tests/test_lint_continue_on_error_tracking.py
@ -0,0 +1,440 @@
+"""Tests for `.gitea/scripts/lint_continue_on_error_tracking.py` — Tier 2e lint.
+
+Structural enforcement of internal#350 Tier 2e: every
+`continue-on-error: true` directive in `.gitea/workflows/*.yml` must be
+accompanied by a `# mc#NNNN` or `# internal#NNNN` comment within 2 lines
+(above OR below), the referenced issue must be OPEN, and ≤14 days old
+counted from `created_at`. Older than 14 days → fail, forces close-or-renew.
+
+The class this lint exists to prevent: Phase-3-masked failures.
+`continue-on-error: true` on platform-build had been hiding mc#664-class
+regressions for ~3 weeks before #656 surfaced them. A 14-day cap forces
+a tracker review cycle, preventing indefinite-mask drift.
+
+Test classes (per `feedback_branch_count_before_approving`):
+
+  - test_coe_false_is_ignored                  — `continue-on-error: false`
+    has no tracker requirement. Exit 0.
+  - test_coe_true_with_open_recent_mc_passes   — coe true + adjacent
+    `# mc#1234` comment, issue open and 5 days old. Exit 0.
+  - test_coe_true_with_open_recent_internal    — adjacent `# internal#42`,
+    open, 1 day old. Exit 0.
+  - test_coe_true_no_comment_fails             — coe true with no
+    nearby tracker comment. Exit 1, names the file+line and the
+    required tracker shape.
+  - test_coe_true_comment_too_far_away_fails   — `# mc#1234` 5 lines
+    above the coe directive — outside the 2-line window. Exit 1.
+  - test_coe_true_closed_issue_fails           — issue exists but is
+    `state=closed`. Exit 1, names the issue.
+  - test_coe_true_too_old_issue_fails          — issue open but
+    `created_at` is 20 days ago. Exit 1, mentions the age cap.
+  - test_coe_true_at_14d_passes                — boundary: exactly 14d
+    old. Inclusive. Exit 0.
+  - test_coe_true_at_15d_fails                 — boundary: 15d old.
+    Exclusive. Exit 1.
+  - test_coe_true_api_404_fails                — referenced issue
+    doesn't exist (deleted or typo). Exit 1.
+  - test_coe_true_api_403_skips                — token-scope issue,
+    graceful-degrade per Tier 2a contract: exit 0 with ::error::,
+    do NOT red-X every PR over auth.
+  - test_two_coe_true_one_violating            — multi-violation
+    aggregation: one passes, one fails → exit 1, all violations
+    surfaced (not short-circuited).
+  - test_coe_true_with_comment_AFTER_directive — comment on the line
+    below the directive (within 2 lines) still satisfies. Exit 0.
+  - test_coe_value_quoted_string_true_caught   — `continue-on-error: "true"`
+    parses to the string "true" via PyYAML which is truthy but NOT
+    boolean `True` — the lint catches the IR `True` from
+    `continue-on-error: true`, and also flags string `"true"` because
+    Gitea's evaluator coerces it.
+
+Stubs:
+  - `subprocess.run` is NOT used (this lint reads only files +
+    HTTP); `urllib.request.urlopen` IS stubbed via monkeypatch on
+    the module-level `api()` to drive issue-API responses.
+
+Run:
+    python3 -m pytest tests/test_lint_continue_on_error_tracking.py -v
+"""
+from __future__ import annotations
+
+import importlib.util
+import os
+import sys
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+from unittest import mock
+
+import pytest
+
+
+SCRIPT_PATH = (
+    Path(__file__).resolve().parent.parent
+    / ".gitea"
+    / "scripts"
+    / "lint_continue_on_error_tracking.py"
+)
+
+
+def _now_iso() -> str:
+    return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+
+
+def _iso_days_ago(days: int) -> str:
+    dt = datetime.now(timezone.utc) - timedelta(days=days)
+    return dt.strftime("%Y-%m-%dT%H:%M:%SZ")
+
+
+def _import_lint():
+    spec = importlib.util.spec_from_file_location(
+        f"lint_coe_tracking_{os.getpid()}",
+        SCRIPT_PATH,
+    )
+    m = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(m)
+    return m
+
+
+@pytest.fixture()
+def envset(tmp_path, monkeypatch):
+    wf_dir = tmp_path / ".gitea" / "workflows"
+    wf_dir.mkdir(parents=True)
+    monkeypatch.setenv("WORKFLOWS_DIR", str(wf_dir))
+    monkeypatch.setenv("GITEA_TOKEN", "fake-token")
+    monkeypatch.setenv("GITEA_HOST", "git.example.test")
+    monkeypatch.setenv("REPO", "owner/molecule-core")
+    monkeypatch.setenv("INTERNAL_REPO", "owner/internal")
+    monkeypatch.setenv("MAX_AGE_DAYS", "14")
+    return wf_dir
+
+
+def _write_wf(wf_dir: Path, name: str, content: str) -> Path:
+    p = wf_dir / name
+    p.write_text(content)
+    return p
+
+
+def _stub_issue_api(monkeypatch, lint_mod, responses: dict[str, dict]):
+    """Stub the module's `fetch_issue` to drive issue lookups.
+
+    responses keyed by `"<repo-suffix>#NNN"` (e.g. `"mc#1234"`, `"internal#42"`).
+    Each value is either:
+      - a dict {"state": "open"|"closed", "created_at": "..."} — normal hit
+      - the string "404" — issue not found
+      - the string "403" — auth denied (token scope)
+      - the string "500" — server error
+    """
+
+    def fake_fetch(slug_kind: str, num: int):
+        key = f"{slug_kind}#{num}"
+        r = responses.get(key)
+        if r is None:
+            # Tests must declare every issue they reference.
+            raise AssertionError(f"no test stub for {key}")
+        if r == "404":
+            return ("not_found", None)
+        if r == "403":
+            return ("forbidden", None)
+        if r == "500":
+            return ("error", None)
+        return ("ok", r)
+
+    monkeypatch.setattr(lint_mod, "fetch_issue", fake_fetch)
+
+
+# ---------------------------------------------------------------------------
+# continue-on-error: false → no tracker required
+# ---------------------------------------------------------------------------
+def test_coe_false_is_ignored(envset, monkeypatch, capsys):
+    _write_wf(
+        envset,
+        "ok.yml",
+        "name: ok\non: [push]\njobs:\n  a:\n    runs-on: x\n    continue-on-error: false\n    steps:\n      - run: echo hi\n",
+    )
+    m = _import_lint()
+    _stub_issue_api(monkeypatch, m, {})
+    rc = m.run()
+    assert rc == 0
+
+
+# ---------------------------------------------------------------------------
+# coe true + adjacent OPEN recent mc# tracker → pass
+# ---------------------------------------------------------------------------
+def test_coe_true_with_open_recent_mc_passes(envset, monkeypatch, capsys):
+    _write_wf(
+        envset,
+        "wf.yml",
+        "name: w\non: [push]\njobs:\n  a:\n    runs-on: x\n"
+        "    # mc#1234 — surfacing flaky test, fix-or-renew\n"
+        "    continue-on-error: true\n"
+        "    steps:\n      - run: echo hi\n",
+    )
+    m = _import_lint()
+    _stub_issue_api(
+        monkeypatch,
+        m,
+        {"mc#1234": {"state": "open", "created_at": _iso_days_ago(5)}},
+    )
+    rc = m.run()
+    assert rc == 0
+
+
+def test_coe_true_with_open_recent_internal(envset, monkeypatch, capsys):
+    _write_wf(
+        envset,
+        "wf.yml",
+        "name: w\non: [push]\njobs:\n  a:\n    runs-on: x\n"
+        "    continue-on-error: true\n"
+        "    # internal#42 — phase-3 ladder soak\n"
+        "    steps:\n      - run: echo hi\n",
+    )
+    m = _import_lint()
+    _stub_issue_api(
+        monkeypatch,
+        m,
+        {"internal#42": {"state": "open", "created_at": _iso_days_ago(1)}},
+    )
+    rc = m.run()
+    assert rc == 0
+
+
+# ---------------------------------------------------------------------------
+# coe true + no nearby tracker comment → fail
+# ---------------------------------------------------------------------------
+def test_coe_true_no_comment_fails(envset, monkeypatch, capsys):
+    _write_wf(
+        envset,
+        "bad.yml",
+        "name: b\non: [push]\njobs:\n  a:\n    runs-on: x\n"
+        "    continue-on-error: true\n"
+        "    steps:\n      - run: echo hi\n",
+    )
+    m = _import_lint()
+    _stub_issue_api(monkeypatch, m, {})
+    rc = m.run()
+    assert rc == 1
+    out = capsys.readouterr().out
+    assert "bad.yml" in out
+    assert "mc#" in out.lower() or "internal#" in out.lower()
+
+
+# ---------------------------------------------------------------------------
+# Comment too far away — outside the 2-line window → fail
+# ---------------------------------------------------------------------------
+def test_coe_true_comment_too_far_away_fails(envset, monkeypatch, capsys):
+    _write_wf(
+        envset,
+        "far.yml",
+        "name: f\non: [push]\n"
+        "# mc#1234 — referenced too far above\n"
+        "jobs:\n"
+        "  a:\n"
+        "    runs-on: x\n"
+        "    name: stage\n"
+        "    timeout-minutes: 5\n"
+        "    continue-on-error: true\n"
+        "    steps:\n      - run: echo hi\n",
+    )
+    m = _import_lint()
+    _stub_issue_api(
+        monkeypatch,
+        m,
+        {"mc#1234": {"state": "open", "created_at": _iso_days_ago(1)}},
+    )
+    rc = m.run()
+    assert rc == 1
+
+
+# ---------------------------------------------------------------------------
+# Closed issue → fail
+# ---------------------------------------------------------------------------
+def test_coe_true_closed_issue_fails(envset, monkeypatch, capsys):
+    _write_wf(
+        envset,
+        "wf.yml",
+        "name: w\non: [push]\njobs:\n  a:\n    runs-on: x\n"
+        "    # mc#999\n"
+        "    continue-on-error: true\n"
+        "    steps:\n      - run: echo hi\n",
+    )
+    m = _import_lint()
+    _stub_issue_api(
+        monkeypatch,
+        m,
+        {"mc#999": {"state": "closed", "created_at": _iso_days_ago(1)}},
+    )
+    rc = m.run()
+    assert rc == 1
+    out = capsys.readouterr().out
+    assert "999" in out
+    assert "closed" in out.lower()
+
+
+# ---------------------------------------------------------------------------
+# Issue is too old (>14d) → fail
+# ---------------------------------------------------------------------------
+def test_coe_true_too_old_issue_fails(envset, monkeypatch, capsys):
+    _write_wf(
+        envset,
+        "wf.yml",
+        "name: w\non: [push]\njobs:\n  a:\n    runs-on: x\n"
+        "    # mc#7\n"
+        "    continue-on-error: true\n"
+        "    steps:\n      - run: echo hi\n",
+    )
+    m = _import_lint()
+    _stub_issue_api(
+        monkeypatch,
+        m,
+        {"mc#7": {"state": "open", "created_at": _iso_days_ago(20)}},
+    )
+    rc = m.run()
+    assert rc == 1
+    out = capsys.readouterr().out
+    assert "20" in out or "14" in out
+
+
+def test_coe_true_at_14d_passes(envset, monkeypatch, capsys):
+    _write_wf(
+        envset,
+        "wf.yml",
+        "name: w\non: [push]\njobs:\n  a:\n    runs-on: x\n"
+        "    # mc#7\n"
+        "    continue-on-error: true\n"
+        "    steps:\n      - run: echo hi\n",
+    )
+    m = _import_lint()
+    _stub_issue_api(
+        monkeypatch,
+        m,
+        {"mc#7": {"state": "open", "created_at": _iso_days_ago(14)}},
+    )
+    rc = m.run()
+    assert rc == 0
+
+
+def test_coe_true_at_15d_fails(envset, monkeypatch, capsys):
+    _write_wf(
+        envset,
+        "wf.yml",
+        "name: w\non: [push]\njobs:\n  a:\n    runs-on: x\n"
+        "    # mc#7\n"
+        "    continue-on-error: true\n"
+        "    steps:\n      - run: echo hi\n",
+    )
+    m = _import_lint()
+    _stub_issue_api(
+        monkeypatch,
+        m,
+        {"mc#7": {"state": "open", "created_at": _iso_days_ago(15)}},
+    )
+    rc = m.run()
+    assert rc == 1
+
+
+# ---------------------------------------------------------------------------
+# 404 (deleted/typo) → fail
+# ---------------------------------------------------------------------------
+def test_coe_true_api_404_fails(envset, monkeypatch, capsys):
+    _write_wf(
+        envset,
+        "wf.yml",
+        "name: w\non: [push]\njobs:\n  a:\n    runs-on: x\n"
+        "    # mc#9999\n"
+        "    continue-on-error: true\n"
+        "    steps:\n      - run: echo hi\n",
+    )
+    m = _import_lint()
+    _stub_issue_api(monkeypatch, m, {"mc#9999": "404"})
+    rc = m.run()
+    assert rc == 1
+
+
+# ---------------------------------------------------------------------------
+# 403 (token-scope, not lint's fault) → exit 0 with ::error:: per
+# Tier 2a graceful-degrade contract.
+# ---------------------------------------------------------------------------
+def test_coe_true_api_403_skips(envset, monkeypatch, capsys):
+    _write_wf(
+        envset,
+        "wf.yml",
+        "name: w\non: [push]\njobs:\n  a:\n    runs-on: x\n"
+        "    # mc#1\n"
+        "    continue-on-error: true\n"
+        "    steps:\n      - run: echo hi\n",
+    )
+    m = _import_lint()
+    _stub_issue_api(monkeypatch, m, {"mc#1": "403"})
+    rc = m.run()
+    assert rc == 0
+    err = capsys.readouterr().err
+    assert "403" in err or "scope" in err.lower() or "token" in err.lower()
+
+
+# ---------------------------------------------------------------------------
+# Multi-violation aggregation — all surfaced, not short-circuited
+# ---------------------------------------------------------------------------
+def test_two_coe_true_one_violating(envset, monkeypatch, capsys):
+    _write_wf(
+        envset,
+        "two.yml",
+        "name: t\non: [push]\njobs:\n"
+        "  good:\n"
+        "    runs-on: x\n"
+        "    # mc#100\n"
+        "    continue-on-error: true\n"
+        "    steps:\n      - run: echo a\n"
+        "  bad:\n"
+        "    runs-on: x\n"
+        "    continue-on-error: true\n"
+        "    steps:\n      - run: echo b\n",
+    )
+    m = _import_lint()
+    _stub_issue_api(
+        monkeypatch,
+        m,
+        {"mc#100": {"state": "open", "created_at": _iso_days_ago(2)}},
+    )
+    rc = m.run()
+    assert rc == 1
+    out = capsys.readouterr().out
+    assert "bad" in out.lower() or "no tracker" in out.lower()
+
+
+# ---------------------------------------------------------------------------
+# Comment on line AFTER the directive — within 2-line window → pass
+# ---------------------------------------------------------------------------
+def test_coe_true_with_comment_AFTER_directive(envset, monkeypatch, capsys):
+    _write_wf(
+        envset,
+        "after.yml",
+        "name: a\non: [push]\njobs:\n  a:\n    runs-on: x\n"
+        "    continue-on-error: true  # mc#3\n"
+        "    steps:\n      - run: echo hi\n",
+    )
+    m = _import_lint()
+    _stub_issue_api(
+        monkeypatch,
+        m,
+        {"mc#3": {"state": "open", "created_at": _iso_days_ago(0)}},
+    )
+    rc = m.run()
+    assert rc == 0
+
+
+# ---------------------------------------------------------------------------
+# Quoted string `"true"` — coerced by Gitea evaluator; should be caught
+# ---------------------------------------------------------------------------
+def test_coe_value_quoted_string_true_caught(envset, monkeypatch, capsys):
+    _write_wf(
+        envset,
+        "quoted.yml",
+        "name: q\non: [push]\njobs:\n  a:\n    runs-on: x\n"
+        "    continue-on-error: \"true\"\n"
+        "    steps:\n      - run: echo hi\n",
+    )
+    m = _import_lint()
+    _stub_issue_api(monkeypatch, m, {})
+    rc = m.run()
+    # No tracker → fail
+    assert rc == 1