From efa68a26b196e90489f266534b8e6e67bf2410e8 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Sun, 3 May 2026 01:01:57 -0700 Subject: [PATCH] feat(workspace): wire observability config into heartbeat + uvicorn (#119 PR-3a) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the hard-coded HEARTBEAT_INTERVAL=30 in heartbeat.py and log_level="info" in main.py with values from ObservabilityConfig (#119 PR-1, schema landed in PR #2538). Concrete plumbing: - heartbeat.HeartbeatLoop accepts an `interval_seconds=` keyword arg. Defaults to the legacy module constant so 2-arg callers (existing tests, any downstream code that hasn't been updated) keep their existing 30s behavior. - main.py constructs HeartbeatLoop with config.observability.heartbeat_interval_seconds — the value the config parser already clamped to [5, 300]. - main.py's uvicorn.Config takes log_level from config.observability.log_level (lowercased — uvicorn's convention differs from Python logging's) with LOG_LEVEL env still winning as an ops-side debugging override. Adapter EventLog wiring deferred to PR-3b (#208 follow-up) — touches adapter_base interface + needs careful design, kept separate to keep this PR small + reviewable. Tests: - test_heartbeat.py: 3 new tests pin default interval, explicit override, and the [5, 300] band that the constructor accepts without re-clamping (clamping is the parser's job). - All 88 tests in test_heartbeat.py + test_config.py pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- workspace/heartbeat.py | 25 +++++++++++++++---- workspace/main.py | 23 +++++++++++++++--- workspace/tests/test_heartbeat.py | 40 +++++++++++++++++++++++++++++++ 3 files changed, 80 insertions(+), 8 deletions(-) diff --git a/workspace/heartbeat.py b/workspace/heartbeat.py index e38c0684..d345d5a7 100644 --- a/workspace/heartbeat.py +++ b/workspace/heartbeat.py @@ -131,7 +131,7 @@ def _persist_inbound_secret_from_heartbeat(resp) -> None: ) -HEARTBEAT_INTERVAL = 30 # seconds +HEARTBEAT_INTERVAL = 30 # seconds — fallback default when no per-instance value is passed MAX_CONSECUTIVE_FAILURES = 10 MAX_SEEN_DELEGATION_IDS = 200 SELF_MESSAGE_COOLDOWN = 60 # seconds — minimum between self-messages to prevent loops @@ -142,9 +142,22 @@ DELEGATION_RESULTS_FILE = os.environ.get("DELEGATION_RESULTS_FILE", "/tmp/delega class HeartbeatLoop: - def __init__(self, platform_url: str, workspace_id: str): + def __init__( + self, + platform_url: str, + workspace_id: str, + interval_seconds: int = HEARTBEAT_INTERVAL, + ): self.platform_url = platform_url self.workspace_id = workspace_id + # Per-instance interval — main.py threads ObservabilityConfig. + # heartbeat_interval_seconds (clamped to [5, 300] at parse time) + # in here so operators can tune cadence per-workspace via the + # `observability:` block in config.yaml. Defaults to the + # legacy module constant so callers that haven't been updated + # yet (and tests that construct HeartbeatLoop directly with the + # 2-arg signature) keep their existing 30s behavior. + self._interval_seconds = interval_seconds self.start_time = time.time() self.error_count = 0 self.request_count = 0 @@ -280,13 +293,15 @@ class HeartbeatLoop: except Exception as e: logger.debug("Delegation check failed: %s", e) - await asyncio.sleep(HEARTBEAT_INTERVAL) + await asyncio.sleep(self._interval_seconds) except asyncio.CancelledError: raise except Exception as e: - logger.error("Heartbeat loop error: %s — retrying in 30s", e) - await asyncio.sleep(HEARTBEAT_INTERVAL) + logger.error( + "Heartbeat loop error: %s — retrying in %ds", e, self._interval_seconds + ) + await asyncio.sleep(self._interval_seconds) finally: if client: try: diff --git a/workspace/main.py b/workspace/main.py index 356080f3..6b9c9ad9 100644 --- a/workspace/main.py +++ b/workspace/main.py @@ -107,8 +107,16 @@ async def main(): # pragma: no cover else: print("Governance: disabled (set governance.enabled: true in config.yaml to activate)") - # 2. Create heartbeat (passed to adapter for task tracking) - heartbeat = HeartbeatLoop(platform_url, workspace_id) + # 2. Create heartbeat (passed to adapter for task tracking). + # interval is sourced from observability.heartbeat_interval_seconds + # in config.yaml — clamped to [5, 300] at parse time. Operators + # who want a faster crash-detection signal lower it; ones who want + # to reduce platform write load raise it. + heartbeat = HeartbeatLoop( + platform_url, + workspace_id, + interval_seconds=config.observability.heartbeat_interval_seconds, + ) # 3. Get adapter for this runtime runtime = config.runtime or "langgraph" @@ -458,11 +466,20 @@ async def main(): # pragma: no cover built_app = make_trace_middleware(starlette_app) + # uvicorn expects the level name in lowercase ("debug" / "info" / + # "warning" / "error" / "critical"). config.observability.log_level + # is uppercased at parse time (config.py.load_config) for the + # Python ``logging`` module's convention; lower it here so both + # consumers get the form they expect from one source of truth. + # An ``LOG_LEVEL`` env var still wins as an ops-side debugging + # override — set it on the workspace process to bypass YAML + # without a config edit + restart cycle. + uvicorn_log_level = os.environ.get("LOG_LEVEL", config.observability.log_level).lower() server_config = uvicorn.Config( built_app, host="0.0.0.0", port=port, - log_level="info", + log_level=uvicorn_log_level, ) server = uvicorn.Server(server_config) diff --git a/workspace/tests/test_heartbeat.py b/workspace/tests/test_heartbeat.py index 89d4594e..2d7891cf 100644 --- a/workspace/tests/test_heartbeat.py +++ b/workspace/tests/test_heartbeat.py @@ -501,3 +501,43 @@ async def test_heartbeat_loop_persists_secret_from_response(monkeypatch): assert saved == ["from-heartbeat"], ( "in-container heartbeat must persist platform_inbound_secret from 200 response" ) + + +# --------------------------------------------------------------------------- +# observability.heartbeat_interval_seconds wiring (#119 PR-3) — pin that the +# per-instance interval flows from ObservabilityConfig through the +# constructor to the asyncio.sleep call. Tests below use the public +# attribute, but the attribute IS the wire because it's read directly by +# the loop body. +# --------------------------------------------------------------------------- + + +def test_init_default_interval_matches_legacy_constant(): + """When the 2-arg constructor is used (legacy callers, existing tests), + the per-instance interval falls back to the module-level + HEARTBEAT_INTERVAL constant — preserves backward compat without a + behavior change for code that hasn't been updated to pass the + observability-driven value.""" + from heartbeat import HEARTBEAT_INTERVAL + + hb = HeartbeatLoop("http://localhost:8080", "ws-1") + assert hb._interval_seconds == HEARTBEAT_INTERVAL + + +def test_init_accepts_explicit_interval(): + """Passing interval_seconds threads ObservabilityConfig.heartbeat_interval_seconds + through to the loop. The integration site (workspace/main.py) does + this with the value from config.observability.heartbeat_interval_seconds.""" + hb = HeartbeatLoop("http://localhost:8080", "ws-1", interval_seconds=60) + assert hb._interval_seconds == 60 + + +def test_init_accepts_floor_of_5(): + """The config parser clamps to [5, 300]; the constructor itself accepts + any positive int — clamping is the parser's job, not the loop's. This + test pins that no defensive re-clamp happens here (which would + silently break operators who deliberately want 5s in dev).""" + hb = HeartbeatLoop("http://localhost:8080", "ws-1", interval_seconds=5) + assert hb._interval_seconds == 5 + hb2 = HeartbeatLoop("http://localhost:8080", "ws-1", interval_seconds=300) + assert hb2._interval_seconds == 300