Merge pull request #2450 from Molecule-AI/feat/observability-config-schema

feat(config): observability block schema (#119 PR-1 of 4)
This commit is contained in:
Hongming Wang 2026-05-01 05:20:11 +00:00 committed by GitHub
commit c06c4c0f56
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 178 additions and 0 deletions

View File

@ -166,6 +166,43 @@ class SecurityScanConfig:
operators who require a CVE gate know the gate is absent. Closes #268."""
@dataclass
class ObservabilityConfig:
"""Observability settings — heartbeat cadence and log verbosity.
Hermes-style block: groups platform-runtime knobs that operators
typically tune together (cadence, verbosity) into one declarative
section instead of scattering them across env vars and hard-coded
constants. Adopting this shape unblocks per-workspace tuning without
a code change and pre-positions the schema for tracing/event-log
settings that will land in follow-up PRs (#119 PR-2 / PR-3).
Today only ``heartbeat_interval_seconds`` and ``log_level`` have live
consumers; both fields are accepted but not yet wired to their final
sites in this PR (schema-only). Wiring lands in PR-3 of the series.
Example config.yaml snippet::
observability:
heartbeat_interval_seconds: 60
log_level: DEBUG
"""
heartbeat_interval_seconds: int = 30
"""Seconds between heartbeats sent to the platform. Default 30 matches
``workspace/heartbeat.py``'s long-standing constant. Lower values
reduce platform-side detection latency for crashed workspaces; higher
values reduce platform write load. Bounds: clamped to [5, 300] at
parse time outside that range the workspace either floods the
platform or looks dead before the next beat."""
log_level: str = "INFO"
"""Python ``logging`` level for the workspace runtime. Accepts the
standard names (DEBUG, INFO, WARNING, ERROR, CRITICAL). Today the
runtime reads ``LOG_LEVEL`` env; PR-3 of the #119 stack switches to
this field with env still honored as an override for ops debugging."""
@dataclass
class ComplianceConfig:
"""OWASP Top 10 for Agentic Applications compliance settings.
@ -264,6 +301,7 @@ class WorkspaceConfig:
governance: GovernanceConfig = field(default_factory=GovernanceConfig)
security_scan: SecurityScanConfig = field(default_factory=SecurityScanConfig)
compliance: ComplianceConfig = field(default_factory=ComplianceConfig)
observability: ObservabilityConfig = field(default_factory=ObservabilityConfig)
sub_workspaces: list[dict] = field(default_factory=list)
effort: str = ""
"""Claude output effort level for the agentic loop: low | medium | high | xhigh | max.
@ -289,6 +327,22 @@ def _derive_provider_from_model(model: str) -> str:
return ""
def _clamp_heartbeat(value: object) -> int:
"""Coerce raw YAML/env input into the [5, 300]-second heartbeat band.
Outside that band the workspace either floods the platform with
sub-second beats or looks dead long before the next one both
real failure modes seen on incidents, neither benign. Coerce here
so adapters and ``heartbeat.py`` can read the value without
re-validating.
"""
try:
n = int(value)
except (TypeError, ValueError):
return 30
return max(5, min(300, n))
def load_config(config_path: Optional[str] = None) -> WorkspaceConfig:
"""Load config from WORKSPACE_CONFIG_PATH or the given path."""
if config_path is None:
@ -336,6 +390,7 @@ def load_config(config_path: Optional[str] = None) -> WorkspaceConfig:
_ss_raw = raw.get("security_scan", {})
security_scan_raw = _ss_raw if isinstance(_ss_raw, dict) else {"mode": str(_ss_raw)}
compliance_raw = raw.get("compliance", {})
observability_raw = raw.get("observability", {})
# Resolve initial_prompt: inline string or file reference
initial_prompt = raw.get("initial_prompt", "")
@ -445,6 +500,12 @@ def load_config(config_path: Optional[str] = None) -> WorkspaceConfig:
max_tool_calls_per_task=int(compliance_raw.get("max_tool_calls_per_task", 50)),
max_task_duration_seconds=int(compliance_raw.get("max_task_duration_seconds", 300)),
),
observability=ObservabilityConfig(
heartbeat_interval_seconds=_clamp_heartbeat(
observability_raw.get("heartbeat_interval_seconds", 30)
),
log_level=str(observability_raw.get("log_level", "INFO")).upper(),
),
sub_workspaces=raw.get("sub_workspaces", []),
effort=str(raw.get("effort", "")),
task_budget=int(raw.get("task_budget", 0)),

View File

@ -9,6 +9,7 @@ from config import (
A2AConfig,
ComplianceConfig,
DelegationConfig,
ObservabilityConfig,
SandboxConfig,
WorkspaceConfig,
load_config,
@ -523,3 +524,119 @@ def test_compliance_default_via_load_config(tmp_path, yaml_payload, expected_mod
# prompt_injection was never overridden in any payload — must stay at
# the dataclass default regardless of the mode value.
assert cfg.compliance.prompt_injection == "detect"
# ===== Observability block (#119 PR-1) =====
#
# Hermes-style declarative block grouping cadence + verbosity knobs into one
# place. Schema-only in this PR — wiring into heartbeat.py / main.py lands in
# PR-3. These tests pin the schema so the wiring PR can rely on the parsed
# values matching the documented contract (defaults, clamping bounds,
# log-level normalization).
def test_observability_dataclass_default():
"""ObservabilityConfig() — no args — yields the documented defaults."""
cfg = ObservabilityConfig()
assert cfg.heartbeat_interval_seconds == 30
assert cfg.log_level == "INFO"
def test_observability_default_when_yaml_omits_block(tmp_path):
"""No ``observability:`` key in YAML → dataclass defaults."""
config_yaml = tmp_path / "config.yaml"
config_yaml.write_text(yaml.dump({}))
cfg = load_config(str(tmp_path))
assert cfg.observability.heartbeat_interval_seconds == 30
assert cfg.observability.log_level == "INFO"
def test_observability_explicit_yaml_override(tmp_path):
"""Explicit YAML values flow through load_config to ObservabilityConfig."""
config_yaml = tmp_path / "config.yaml"
config_yaml.write_text(
yaml.dump(
{
"observability": {
"heartbeat_interval_seconds": 60,
"log_level": "DEBUG",
}
}
)
)
cfg = load_config(str(tmp_path))
assert cfg.observability.heartbeat_interval_seconds == 60
assert cfg.observability.log_level == "DEBUG"
def test_observability_partial_override_keeps_other_defaults(tmp_path):
"""Setting only heartbeat preserves the log_level default — and vice versa."""
config_yaml = tmp_path / "config.yaml"
config_yaml.write_text(
yaml.dump({"observability": {"heartbeat_interval_seconds": 45}})
)
cfg = load_config(str(tmp_path))
assert cfg.observability.heartbeat_interval_seconds == 45
assert cfg.observability.log_level == "INFO"
@pytest.mark.parametrize(
"raw, expected",
[
# In-band values pass through unchanged.
(5, 5),
(30, 30),
(300, 300),
# Below floor → clamped up to 5s. Sub-5s heartbeats flooded the
# platform during incident IR-2026-03-11 (workspace stuck in a
# tight loop emitting beats faster than the platform could ack).
(1, 5),
(0, 5),
(-7, 5),
# Above ceiling → clamped down to 300s. >5min beats let crashed
# workspaces look healthy long enough to mask the failure.
(301, 300),
(3600, 300),
# Non-integer YAML values fall back to the documented default
# rather than crashing the workspace at boot.
("not-a-number", 30),
(None, 30),
],
ids=[
"floor_in_band",
"default_in_band",
"ceiling_in_band",
"below_floor_one",
"below_floor_zero",
"below_floor_negative",
"above_ceiling_just",
"above_ceiling_far",
"garbage_string",
"null",
],
)
def test_observability_heartbeat_clamp(tmp_path, raw, expected):
"""heartbeat_interval_seconds is clamped to the [5, 300] band at parse."""
config_yaml = tmp_path / "config.yaml"
config_yaml.write_text(
yaml.dump({"observability": {"heartbeat_interval_seconds": raw}})
)
cfg = load_config(str(tmp_path))
assert cfg.observability.heartbeat_interval_seconds == expected
def test_observability_log_level_uppercased(tmp_path):
"""Lowercase or mixed-case log levels normalize to the canonical form
Python's ``logging`` module expects, so operators can write either
``debug`` or ``DEBUG`` in YAML without surprise."""
config_yaml = tmp_path / "config.yaml"
config_yaml.write_text(
yaml.dump({"observability": {"log_level": "debug"}})
)
cfg = load_config(str(tmp_path))
assert cfg.observability.log_level == "DEBUG"