Merge pull request #2961 from Molecule-AI/fix/doctor-register-side-effect
fix(mcp-doctor): heartbeat (idempotent) instead of register (UPSERT) — self-review of #2954
This commit is contained in:
commit
0301f90183
@ -27,9 +27,14 @@ Six checks, in operator-encounter order:
|
||||
Catches DNS/firewall/wrong-scheme issues
|
||||
before the operator hits the real
|
||||
register call.
|
||||
6. Registry register — POST ${PLATFORM_URL}/registry/register
|
||||
6. Token auth — POST ${PLATFORM_URL}/registry/heartbeat
|
||||
with the resolved workspace_id+token
|
||||
returns 2xx. End-to-end auth verification.
|
||||
Uses heartbeat (idempotent timestamp
|
||||
update) instead of register (UPSERT —
|
||||
would clobber agent_card metadata) so
|
||||
the doctor is safe to run against a
|
||||
live workspace.
|
||||
|
||||
Each check prints one of:
|
||||
[OK] <one-line status>
|
||||
@ -170,18 +175,31 @@ def check_path_for_binary() -> Verdict:
|
||||
return "fail"
|
||||
|
||||
|
||||
def _resolve_token_summary() -> Optional[str]:
|
||||
"""Return a short non-secret description of how the token was
|
||||
sourced (e.g. "env MOLECULE_WORKSPACE_TOKEN", "file .auth_token"),
|
||||
or None if no token is reachable.
|
||||
def _resolve_token() -> tuple[Optional[str], Optional[str]]:
|
||||
"""Return ``(token_value, source_label)`` if the operator's
|
||||
environment exposes a token, else ``(None, None)``.
|
||||
|
||||
Single source of truth used by both ``check_env_vars()`` (which
|
||||
only needs the source label) and ``check_register()`` (which
|
||||
needs the actual value to send a Bearer header). Keeping these
|
||||
in one place means a future env-var addition only updates the
|
||||
resolver — not two parallel readers that can drift.
|
||||
"""
|
||||
if os.environ.get("MOLECULE_WORKSPACE_TOKEN", "").strip():
|
||||
return "env MOLECULE_WORKSPACE_TOKEN"
|
||||
val = os.environ.get("MOLECULE_WORKSPACE_TOKEN", "").strip()
|
||||
if val:
|
||||
return val, "env MOLECULE_WORKSPACE_TOKEN"
|
||||
file_var = os.environ.get("MOLECULE_WORKSPACE_TOKEN_FILE", "").strip()
|
||||
if file_var:
|
||||
if os.path.isfile(file_var):
|
||||
return f"file {file_var} (via MOLECULE_WORKSPACE_TOKEN_FILE)"
|
||||
return None
|
||||
try:
|
||||
from pathlib import Path as _Path
|
||||
return (
|
||||
_Path(file_var).read_text().strip(),
|
||||
f"file {file_var} (via MOLECULE_WORKSPACE_TOKEN_FILE)",
|
||||
)
|
||||
except OSError:
|
||||
return None, None
|
||||
return None, None
|
||||
# Per-runtime container path used by the in-platform path; rarely
|
||||
# set on external setups but check anyway so the message is
|
||||
# accurate for both shapes.
|
||||
@ -189,10 +207,22 @@ def _resolve_token_summary() -> Optional[str]:
|
||||
import configs_dir
|
||||
candidate = configs_dir.resolve() / ".auth_token"
|
||||
if candidate.is_file():
|
||||
return f"file {candidate}"
|
||||
try:
|
||||
return candidate.read_text().strip(), f"file {candidate}"
|
||||
except OSError:
|
||||
return None, None
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
return None, None
|
||||
|
||||
|
||||
def _resolve_token_summary() -> Optional[str]:
|
||||
"""Return just the source label (no secret value). Convenience
|
||||
wrapper around :func:`_resolve_token` for callers that don't
|
||||
need the value itself.
|
||||
"""
|
||||
_, label = _resolve_token()
|
||||
return label
|
||||
|
||||
|
||||
def check_env_vars() -> Verdict:
|
||||
@ -281,42 +311,35 @@ def check_platform_health() -> Verdict:
|
||||
return "ok"
|
||||
|
||||
|
||||
def check_register() -> Verdict:
|
||||
"""Light end-to-end auth check via POST /registry/register.
|
||||
def check_token_auth() -> Verdict:
|
||||
"""Light auth check via POST /registry/heartbeat.
|
||||
|
||||
Doesn't write a heartbeat or change platform state beyond what
|
||||
a normal molecule-mcp boot would do — the register endpoint is
|
||||
idempotent. Skipped when env vars failed earlier so the operator
|
||||
isn't shown a redundant 401.
|
||||
Why heartbeat and not register: register is an UPSERT — sending
|
||||
it from doctor would clobber the workspace's actual agent_card
|
||||
(name, description, version) until the real agent next calls
|
||||
register. That's an invisible production-disruption: someone
|
||||
runs ``molecule-mcp doctor`` against a live workspace and the
|
||||
canvas briefly displays "doctor-probe" as the agent name.
|
||||
|
||||
Heartbeat only updates last_heartbeat_at (and clears
|
||||
awaiting_agent if needed) — that's exactly what a normal
|
||||
molecule-mcp boot does every 20s, so an extra heartbeat from
|
||||
the doctor is indistinguishable from background traffic.
|
||||
|
||||
Skipped when env vars failed earlier so the operator isn't shown
|
||||
a redundant 401.
|
||||
"""
|
||||
label = "Registry register"
|
||||
label = "Token auth"
|
||||
base = os.environ.get("PLATFORM_URL", "").strip().rstrip("/")
|
||||
workspace_id = os.environ.get("WORKSPACE_ID", "").strip()
|
||||
token_summary = _resolve_token_summary()
|
||||
if not (base and workspace_id and token_summary):
|
||||
token, source_label = _resolve_token()
|
||||
if not (base and workspace_id and token):
|
||||
_warn(label, "skipped (Env vars must pass first)", "fix Env vars, re-run")
|
||||
return "warn"
|
||||
# Get the token value matching the resolver's source preference.
|
||||
token = os.environ.get("MOLECULE_WORKSPACE_TOKEN", "").strip()
|
||||
if not token:
|
||||
file_var = os.environ.get("MOLECULE_WORKSPACE_TOKEN_FILE", "").strip()
|
||||
if file_var and os.path.isfile(file_var):
|
||||
try:
|
||||
token = open(file_var).read().strip()
|
||||
except Exception as e:
|
||||
_fail(label, f"could not read token file: {e}", "fix the file permissions or path")
|
||||
return "fail"
|
||||
if not token:
|
||||
_warn(label, "skipped (no token resolvable)", "set MOLECULE_WORKSPACE_TOKEN or MOLECULE_WORKSPACE_TOKEN_FILE")
|
||||
return "warn"
|
||||
import json
|
||||
body = json.dumps({
|
||||
"id": workspace_id,
|
||||
"url": "", # external URL is unknown to doctor; register accepts empty
|
||||
"agent_card": {"name": "doctor-probe", "version": "0.0.0"},
|
||||
}).encode()
|
||||
body = json.dumps({"id": workspace_id}).encode()
|
||||
req = urllib_request.Request(
|
||||
f"{base}/registry/register",
|
||||
f"{base}/registry/heartbeat",
|
||||
data=body,
|
||||
method="POST",
|
||||
headers={
|
||||
@ -333,10 +356,10 @@ def check_register() -> Verdict:
|
||||
status = getattr(e, "code", None)
|
||||
err = str(e.reason if hasattr(e, "reason") else e)
|
||||
if status is None:
|
||||
_fail(label, f"POST {base}/registry/register failed: {err}", "check network")
|
||||
_fail(label, f"POST {base}/registry/heartbeat failed: {err}", "check network")
|
||||
return "fail"
|
||||
except Exception as e:
|
||||
_fail(label, f"POST register failed: {e}", "check network")
|
||||
_fail(label, f"POST heartbeat failed: {e}", "check network")
|
||||
return "fail"
|
||||
if status == 401:
|
||||
_fail(
|
||||
@ -355,19 +378,26 @@ def check_register() -> Verdict:
|
||||
)
|
||||
return "fail"
|
||||
if not (200 <= status < 300):
|
||||
_fail(label, f"POST register returned HTTP {status}", "see platform logs")
|
||||
_fail(label, f"POST heartbeat returned HTTP {status}", "see platform logs")
|
||||
return "fail"
|
||||
_ok(label, f"POST {base}/registry/register → {status}")
|
||||
_ok(label, f"POST {base}/registry/heartbeat → {status} (token from {source_label})")
|
||||
return "ok"
|
||||
|
||||
|
||||
# Back-compat alias: the previous name was check_register, but the
|
||||
# implementation switched to a non-mutating heartbeat probe (see
|
||||
# check_token_auth's docstring). Kept so external test suites or
|
||||
# pinned-import scripts don't break on the rename.
|
||||
check_register = check_token_auth
|
||||
|
||||
|
||||
CHECKS = [
|
||||
check_python_version,
|
||||
check_wheel_install,
|
||||
check_path_for_binary,
|
||||
check_env_vars,
|
||||
check_platform_health,
|
||||
check_register,
|
||||
check_token_auth,
|
||||
]
|
||||
|
||||
|
||||
|
||||
@ -105,6 +105,75 @@ def test_check_register_skipped_without_env(monkeypatch):
|
||||
assert mcp_doctor.check_register() == "warn"
|
||||
|
||||
|
||||
def test_check_token_auth_uses_heartbeat_endpoint(monkeypatch):
|
||||
"""Pin: doctor MUST hit /registry/heartbeat, not /registry/register.
|
||||
|
||||
register is an UPSERT — using it from doctor would clobber the
|
||||
workspace's actual agent_card metadata until the real agent next
|
||||
calls register. heartbeat only updates last_heartbeat_at, which
|
||||
a normal molecule-mcp boot does every 20s anyway, so the doctor's
|
||||
extra heartbeat is indistinguishable from background traffic.
|
||||
|
||||
This test pins the URL via a urllib mock so a future refactor
|
||||
that accidentally re-routes through /registry/register fails
|
||||
here at PR-review time, not after operators report
|
||||
"doctor-probe" briefly appearing as their agent name in canvas.
|
||||
"""
|
||||
monkeypatch.setenv("PLATFORM_URL", "https://x.moleculesai.app")
|
||||
monkeypatch.setenv("WORKSPACE_ID", "ws-test")
|
||||
monkeypatch.setenv("MOLECULE_WORKSPACE_TOKEN", "tok-abc")
|
||||
monkeypatch.delenv("MOLECULE_WORKSPACE_TOKEN_FILE", raising=False)
|
||||
|
||||
captured: dict[str, object] = {}
|
||||
|
||||
class _FakeResp:
|
||||
status = 200
|
||||
def __enter__(self): return self
|
||||
def __exit__(self, *a): pass
|
||||
|
||||
def fake_urlopen(req, timeout=None):
|
||||
captured["full_url"] = req.full_url
|
||||
captured["method"] = req.get_method()
|
||||
return _FakeResp()
|
||||
|
||||
monkeypatch.setattr(mcp_doctor.urllib_request, "urlopen", fake_urlopen)
|
||||
verdict = mcp_doctor.check_token_auth()
|
||||
assert verdict == "ok"
|
||||
assert captured["method"] == "POST"
|
||||
# The load-bearing assertion — must use heartbeat, never register.
|
||||
assert captured["full_url"].endswith("/registry/heartbeat"), (
|
||||
f"doctor must use /registry/heartbeat (idempotent), not register "
|
||||
f"(UPSERT — clobbers agent_card). Got: {captured['full_url']}"
|
||||
)
|
||||
assert "/registry/register" not in str(captured["full_url"]), (
|
||||
"doctor must NEVER POST to /registry/register — that's a UPSERT "
|
||||
"that overwrites agent_card metadata until the real agent next "
|
||||
"calls register."
|
||||
)
|
||||
|
||||
|
||||
def test_resolve_token_returns_value_and_label_for_env(monkeypatch):
|
||||
"""The single resolver returns both the value (for Bearer header)
|
||||
and a non-secret label (for the env-vars summary). Drift between
|
||||
label and value is the previous bug shape."""
|
||||
monkeypatch.setenv("PLATFORM_URL", "https://x.moleculesai.app")
|
||||
monkeypatch.setenv("MOLECULE_WORKSPACE_TOKEN", "secret-tok-abc")
|
||||
monkeypatch.delenv("MOLECULE_WORKSPACE_TOKEN_FILE", raising=False)
|
||||
val, label = mcp_doctor._resolve_token()
|
||||
assert val == "secret-tok-abc"
|
||||
assert label == "env MOLECULE_WORKSPACE_TOKEN"
|
||||
# Summary helper must agree with the resolver's source.
|
||||
assert mcp_doctor._resolve_token_summary() == label
|
||||
|
||||
|
||||
def test_resolve_token_returns_none_when_missing(monkeypatch):
|
||||
monkeypatch.delenv("MOLECULE_WORKSPACE_TOKEN", raising=False)
|
||||
monkeypatch.delenv("MOLECULE_WORKSPACE_TOKEN_FILE", raising=False)
|
||||
val, label = mcp_doctor._resolve_token()
|
||||
assert val is None
|
||||
assert label is None
|
||||
|
||||
|
||||
def test_run_returns_1_when_any_fail(monkeypatch, capsys):
|
||||
"""End-to-end: stripped environment → at least one FAIL →
|
||||
exit 1. Pin the exit-code contract so this is scriptable from
|
||||
|
||||
Loading…
Reference in New Issue
Block a user