feat(runtime): adapter-declared idle_timeout_override end-to-end
Capability primitive #2 (task #117). The first cross-cutting capability where the adapter actually displaces platform behavior — claude-code's streaming session can legitimately go silent for 8+ minutes during synthesis + slow tool calls; the platform's hardcoded 5min idle timer in a2a_proxy.go cancels it mid-flight (the bug PR #2128 patched at the env-var layer). This PR fixes it at the right layer: the adapter declares "I need 600s" and the platform's dispatch path honors it. Wire shape (Python → Go): POST /registry/heartbeat { "workspace_id": "...", ... "runtime_metadata": { "capabilities": {"heartbeat": false, "scheduler": false, ...}, "idle_timeout_seconds": 600 // optional, omitted = use default } } Default behavior preserved: any adapter that doesn't override BaseAdapter.idle_timeout_override() (returns None by default) sends no idle_timeout_seconds field; the Go side falls through to idleTimeoutDuration (env A2A_IDLE_TIMEOUT_SECONDS, default 5min). Existing langgraph / crewai / deepagents workspaces are unaffected. Components: Python: - adapter_base.py: idle_timeout_override() method on BaseAdapter returning None (the platform-default sentinel). - heartbeat.py: _runtime_metadata_payload() lazy-imports the active adapter and assembles the capability + override block. Try/except swallows ANY error so heartbeat never breaks because of capability discovery — observability outranks capability accuracy. Go: - models.HeartbeatPayload.RuntimeMetadata (pointer so absent = "old runtime, didn't say"; explicit zero-cap = "new runtime, declared no native ownership"). - handlers.runtimeOverrides: in-memory sync.Map cache keyed by workspaceID. Populated by the heartbeat handler, consulted on every dispatchA2A. Reset on platform restart (worst-case 30s of platform-default behavior — acceptable; nothing about overrides is correctness-critical). - a2a_proxy.dispatchA2A: looks up the override before applyIdle Timeout; falls through to global default when absent. Tests: Python (17, all new): - RuntimeCapabilities dataclass shape (frozen, defaults, wire keys) - BaseAdapter.capabilities() default + override + sibling isolation - idle_timeout_override default, positive override, dropped-override - Heartbeat metadata producer: default adapter emits all-False, native adapter emits flag + override, missing ADAPTER_MODULE returns {} (graceful), zero/negative override is omitted from wire, exception inside adapter swallowed Go (6, all new): - SetIdleTimeout + IdleTimeout round-trip - Zero/negative duration clears the override - Empty workspace_id ignored - Replacement (heartbeat overwrites prior value) - Reset clears entire cache - Concurrent reads + writes (sync.Map invariant) Verification: - 1308 / 1308 workspace pytest pass (was 1300, +8) - All Go handlers tests pass (6 new + existing) - go vet clean See project memory `project_runtime_native_pluggable.md` for the architecture principle this implements. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
751b6aa2d9
commit
0d3058585b
@ -588,7 +588,18 @@ func (h *WorkspaceHandler) dispatchA2A(ctx context.Context, workspaceID, agentUR
|
||||
if concrete, ok := h.broadcaster.(*events.Broadcaster); ok {
|
||||
b = concrete
|
||||
}
|
||||
forwardCtx, idleCancel := applyIdleTimeout(forwardCtx, b, workspaceID, idleTimeoutDuration)
|
||||
// Per-workspace idle-timeout override (capability primitive #2 —
|
||||
// see workspace/adapter_base.py:idle_timeout_override). The
|
||||
// adapter declares a longer/shorter window than the platform
|
||||
// default in its heartbeat; the heartbeat handler stashes it in
|
||||
// runtimeOverrides; we honor it here. Falls through to the global
|
||||
// default (env A2A_IDLE_TIMEOUT_SECONDS, default 5min) when no
|
||||
// override is registered for this workspace.
|
||||
idle := idleTimeoutDuration
|
||||
if perWorkspace, ok := runtimeOverrides.IdleTimeout(workspaceID); ok {
|
||||
idle = perWorkspace
|
||||
}
|
||||
forwardCtx, idleCancel := applyIdleTimeout(forwardCtx, b, workspaceID, idle)
|
||||
cancel := func() {
|
||||
idleCancel()
|
||||
if ceilingCancel != nil {
|
||||
|
||||
@ -11,6 +11,7 @@ import (
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/db"
|
||||
"github.com/Molecule-AI/molecule-monorepo/platform/internal/events"
|
||||
@ -461,6 +462,22 @@ func (h *RegistryHandler) Heartbeat(c *gin.Context) {
|
||||
"uptime_seconds": payload.UptimeSeconds,
|
||||
})
|
||||
|
||||
// Refresh per-workspace runtime overrides from the heartbeat's
|
||||
// runtime_metadata block (introduced for the native+pluggable
|
||||
// runtime principle — see project memory). Only idle_timeout_seconds
|
||||
// is consumed today; capability flags are stored for future
|
||||
// consumers (heartbeat-skip, scheduler-skip, etc.) by subsequent
|
||||
// PRs in task #117. A nil RuntimeMetadata or absent field clears
|
||||
// the override so the dispatch path uses the global default.
|
||||
if payload.RuntimeMetadata != nil && payload.RuntimeMetadata.IdleTimeoutSeconds != nil {
|
||||
runtimeOverrides.SetIdleTimeout(
|
||||
payload.WorkspaceID,
|
||||
time.Duration(*payload.RuntimeMetadata.IdleTimeoutSeconds)*time.Second,
|
||||
)
|
||||
} else {
|
||||
runtimeOverrides.SetIdleTimeout(payload.WorkspaceID, 0) // clear
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{"status": "ok"})
|
||||
}
|
||||
|
||||
|
||||
84
workspace-server/internal/handlers/runtime_overrides.go
Normal file
84
workspace-server/internal/handlers/runtime_overrides.go
Normal file
@ -0,0 +1,84 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// runtimeOverrides is the in-memory cache of per-workspace, adapter-
|
||||
// declared overrides for cross-cutting capabilities. Populated by the
|
||||
// heartbeat handler from HeartbeatPayload.RuntimeMetadata; consumed by
|
||||
// dispatch paths (a2a_proxy.dispatchA2A reads IdleTimeout) before
|
||||
// applying their own platform-default behavior.
|
||||
//
|
||||
// Why an in-memory cache and not a DB column:
|
||||
// - Heartbeats arrive every ~30s, so a fresh override propagates
|
||||
// within a heartbeat cycle of any change in adapter declarations.
|
||||
// - On platform restart the cache resets to empty until each
|
||||
// workspace's next heartbeat repopulates it. Worst-case window =
|
||||
// 30s of platform-default behavior. Acceptable; nothing about
|
||||
// these overrides is correctness-critical (they tune timeouts +
|
||||
// enable native ownership of fallback features, not state).
|
||||
// - DB-roundtripping every dispatch would add latency to a hot
|
||||
// path (a2a_proxy is on every agent → agent call). The cache is
|
||||
// a sync.Map — atomic ptr load per dispatch, zero lock contention
|
||||
// under steady load.
|
||||
//
|
||||
// Stale entries: a workspace that goes offline never sends another
|
||||
// heartbeat, but the cache entry persists until the platform restarts.
|
||||
// Acceptable because dispatchA2A only consults the cache when actually
|
||||
// dispatching to that workspace — a stale entry for an offline
|
||||
// workspace just means "use the override that was active when it was
|
||||
// last alive" (correct behavior; the workspace will get the same
|
||||
// timeouts when it comes back).
|
||||
//
|
||||
// See workspace/adapter_base.py:idle_timeout_override and project
|
||||
// memory `project_runtime_native_pluggable.md`.
|
||||
var runtimeOverrides runtimeOverrideCache
|
||||
|
||||
type runtimeOverrideEntry struct {
|
||||
idleTimeout time.Duration // 0 means "no override; use global default"
|
||||
}
|
||||
|
||||
type runtimeOverrideCache struct {
|
||||
m sync.Map // key: workspaceID (string), value: runtimeOverrideEntry
|
||||
}
|
||||
|
||||
// SetIdleTimeout records the per-workspace idle-timeout override sent
|
||||
// in the most recent heartbeat. d == 0 clears the override (falling
|
||||
// back to the global default), so a runtime that previously declared
|
||||
// an override and then dropped it cleanly returns to platform behavior.
|
||||
func (c *runtimeOverrideCache) SetIdleTimeout(workspaceID string, d time.Duration) {
|
||||
if workspaceID == "" {
|
||||
return
|
||||
}
|
||||
if d <= 0 {
|
||||
c.m.Delete(workspaceID)
|
||||
return
|
||||
}
|
||||
c.m.Store(workspaceID, runtimeOverrideEntry{idleTimeout: d})
|
||||
}
|
||||
|
||||
// IdleTimeout returns the per-workspace override and ok=true when one
|
||||
// is in effect; ok=false means dispatchA2A should fall back to the
|
||||
// global idleTimeoutDuration.
|
||||
func (c *runtimeOverrideCache) IdleTimeout(workspaceID string) (time.Duration, bool) {
|
||||
v, ok := c.m.Load(workspaceID)
|
||||
if !ok {
|
||||
return 0, false
|
||||
}
|
||||
e, ok := v.(runtimeOverrideEntry)
|
||||
if !ok || e.idleTimeout <= 0 {
|
||||
return 0, false
|
||||
}
|
||||
return e.idleTimeout, true
|
||||
}
|
||||
|
||||
// Reset clears the entire cache. Test-only; production code never
|
||||
// needs this since heartbeats refresh entries naturally.
|
||||
func (c *runtimeOverrideCache) Reset() {
|
||||
c.m.Range(func(k, _ any) bool {
|
||||
c.m.Delete(k)
|
||||
return true
|
||||
})
|
||||
}
|
||||
118
workspace-server/internal/handlers/runtime_overrides_test.go
Normal file
118
workspace-server/internal/handlers/runtime_overrides_test.go
Normal file
@ -0,0 +1,118 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestRuntimeOverrideCache_SetAndGet(t *testing.T) {
|
||||
c := &runtimeOverrideCache{}
|
||||
|
||||
if _, ok := c.IdleTimeout("ws-a"); ok {
|
||||
t.Fatal("empty cache should not return any override")
|
||||
}
|
||||
|
||||
c.SetIdleTimeout("ws-a", 10*time.Minute)
|
||||
got, ok := c.IdleTimeout("ws-a")
|
||||
if !ok || got != 10*time.Minute {
|
||||
t.Fatalf("expected 10m override; got=%v ok=%v", got, ok)
|
||||
}
|
||||
|
||||
// Sibling workspace unaffected — pin against the trap where a
|
||||
// shared map without proper keying would leak overrides across
|
||||
// workspaces (a hard-to-debug "claude-code's longer timeout
|
||||
// somehow applied to langgraph too").
|
||||
if _, ok := c.IdleTimeout("ws-b"); ok {
|
||||
t.Fatal("override for ws-a leaked to ws-b")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRuntimeOverrideCache_ZeroOrNegativeClears(t *testing.T) {
|
||||
// Adapter dropping the override (returning None / 0 from
|
||||
// idle_timeout_override) must restore platform-default behavior.
|
||||
// If the cache held the previous value indefinitely, an adapter
|
||||
// downgrade would silently keep the longer timeout active.
|
||||
c := &runtimeOverrideCache{}
|
||||
c.SetIdleTimeout("ws-a", 10*time.Minute)
|
||||
if _, ok := c.IdleTimeout("ws-a"); !ok {
|
||||
t.Fatal("setup: override should be set")
|
||||
}
|
||||
|
||||
c.SetIdleTimeout("ws-a", 0)
|
||||
if _, ok := c.IdleTimeout("ws-a"); ok {
|
||||
t.Fatal("zero duration should clear override")
|
||||
}
|
||||
|
||||
c.SetIdleTimeout("ws-a", 5*time.Minute)
|
||||
c.SetIdleTimeout("ws-a", -1*time.Second)
|
||||
if _, ok := c.IdleTimeout("ws-a"); ok {
|
||||
t.Fatal("negative duration should clear override")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRuntimeOverrideCache_EmptyWorkspaceIDIgnored(t *testing.T) {
|
||||
// Defensive: a misrouted heartbeat with empty workspace_id
|
||||
// should NOT pollute the cache with a "" key. workspaceID == ""
|
||||
// is also the value dispatchA2A passes when the workspace is
|
||||
// indeterminate, and that path must not surface a stored value.
|
||||
c := &runtimeOverrideCache{}
|
||||
c.SetIdleTimeout("", 10*time.Minute)
|
||||
if _, ok := c.IdleTimeout(""); ok {
|
||||
t.Fatal("empty workspace_id must not store overrides")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRuntimeOverrideCache_SetReplaces(t *testing.T) {
|
||||
// A heartbeat with a new override value replaces, doesn't append.
|
||||
c := &runtimeOverrideCache{}
|
||||
c.SetIdleTimeout("ws-a", 10*time.Minute)
|
||||
c.SetIdleTimeout("ws-a", 20*time.Minute)
|
||||
got, _ := c.IdleTimeout("ws-a")
|
||||
if got != 20*time.Minute {
|
||||
t.Fatalf("expected 20m after replacement; got %v", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRuntimeOverrideCache_Reset(t *testing.T) {
|
||||
c := &runtimeOverrideCache{}
|
||||
c.SetIdleTimeout("ws-a", 10*time.Minute)
|
||||
c.SetIdleTimeout("ws-b", 20*time.Minute)
|
||||
c.Reset()
|
||||
if _, ok := c.IdleTimeout("ws-a"); ok {
|
||||
t.Fatal("reset should clear ws-a")
|
||||
}
|
||||
if _, ok := c.IdleTimeout("ws-b"); ok {
|
||||
t.Fatal("reset should clear ws-b")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRuntimeOverrideCache_ConcurrentSafe(t *testing.T) {
|
||||
// dispatchA2A reads the cache on every request; heartbeat handlers
|
||||
// write on every 30s. Different workspaces will be hot in different
|
||||
// goroutines. The sync.Map underlying the cache promises this; the
|
||||
// test pins it so a future "let me just use a regular map with a
|
||||
// mutex" change can't silently regress under load.
|
||||
c := &runtimeOverrideCache{}
|
||||
var wg sync.WaitGroup
|
||||
const N = 100
|
||||
|
||||
for i := 0; i < N; i++ {
|
||||
wg.Add(2)
|
||||
go func(i int) {
|
||||
defer wg.Done()
|
||||
c.SetIdleTimeout("ws", time.Duration(i+1)*time.Second)
|
||||
}(i)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
_, _ = c.IdleTimeout("ws")
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
// Final value must be SOME positive duration written by one of the
|
||||
// goroutines — not corrupted, not zero.
|
||||
got, ok := c.IdleTimeout("ws")
|
||||
if !ok || got <= 0 || got > time.Duration(N)*time.Second {
|
||||
t.Fatalf("expected a valid override after concurrent writes; got %v ok=%v", got, ok)
|
||||
}
|
||||
}
|
||||
@ -70,6 +70,40 @@ type HeartbeatPayload struct {
|
||||
// non-empty value is "wedged"; future values can extend this without
|
||||
// migration.
|
||||
RuntimeState string `json:"runtime_state"`
|
||||
|
||||
// RuntimeMetadata is the adapter-declared capability map + per-
|
||||
// capability override values. The Python runtime builds this from
|
||||
// BaseAdapter.capabilities() + per-hook methods (e.g.
|
||||
// idle_timeout_override()) — see workspace/heartbeat.py:
|
||||
// _runtime_metadata_payload. Optional: missing means "use platform
|
||||
// defaults for everything", matching pre-2026-04 behavior.
|
||||
//
|
||||
// Pointer (not value) so a missing JSON field is nil rather than a
|
||||
// zero-value RuntimeMetadata{} that would falsely claim "all caps =
|
||||
// false declared explicitly". Lets the platform distinguish "adapter
|
||||
// said no native ownership" from "old runtime version, didn't say".
|
||||
RuntimeMetadata *RuntimeMetadata `json:"runtime_metadata,omitempty"`
|
||||
}
|
||||
|
||||
// RuntimeMetadata is the adapter-declared capability + override block
|
||||
// the Python runtime sends in the heartbeat payload. New fields can be
|
||||
// added with `omitempty` without breaking older runtime versions.
|
||||
//
|
||||
// See project memory `project_runtime_native_pluggable.md` for the
|
||||
// principle and workspace/adapter_base.py:RuntimeCapabilities for the
|
||||
// Python source of truth.
|
||||
type RuntimeMetadata struct {
|
||||
// Capabilities maps capability name → "adapter owns it natively".
|
||||
// Keys (heartbeat, scheduler, session, status_mgmt, retry,
|
||||
// activity_decoration, channel_dispatch) match
|
||||
// RuntimeCapabilities.to_dict() in adapter_base.py — keep in sync.
|
||||
Capabilities map[string]bool `json:"capabilities,omitempty"`
|
||||
|
||||
// IdleTimeoutSeconds, when set, overrides the per-dispatch silence
|
||||
// window in a2a_proxy.go for this workspace's A2A traffic. Pointer
|
||||
// so nil means "no override; use the global default". Zero / negative
|
||||
// is treated as nil by the consumer (a2a_proxy.go).
|
||||
IdleTimeoutSeconds *int `json:"idle_timeout_seconds,omitempty"`
|
||||
}
|
||||
|
||||
type UpdateCardPayload struct {
|
||||
|
||||
@ -164,6 +164,29 @@ class BaseAdapter(ABC):
|
||||
project memory `project_runtime_native_pluggable.md`."""
|
||||
return RuntimeCapabilities()
|
||||
|
||||
def idle_timeout_override(self) -> int | None:
|
||||
"""Per-A2A-dispatch silence window override, in SECONDS.
|
||||
|
||||
Return None to use the platform default (env var
|
||||
A2A_IDLE_TIMEOUT_SECONDS, falling back to 5 minutes — see
|
||||
a2a_proxy.go:defaultIdleTimeoutDuration). Override when this
|
||||
runtime's SDK can legitimately go silent longer than the
|
||||
default before the dispatch should be considered wedged.
|
||||
|
||||
Why this is per-adapter, not just env: the env value is a
|
||||
cluster-wide knob set by ops. Different SDKs have different
|
||||
latency profiles — claude-code synthesis on Opus + tool use
|
||||
legitimately runs 8-10 min between broadcasts; hermes synth
|
||||
with custom providers can be even slower. Hardcoding 5min for
|
||||
everyone either cancels real work (claude-code synth) or
|
||||
leaves wedged runtimes (langgraph) hanging too long.
|
||||
|
||||
Platform reads this from the heartbeat payload and stashes
|
||||
it per-workspace; dispatchA2A consults it before applying the
|
||||
idle timer. None / unset / zero falls through to the global
|
||||
default — same behavior as before this hook landed."""
|
||||
return None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Plugin install hooks
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@ -43,6 +43,43 @@ def _runtime_state_payload() -> dict:
|
||||
"sample_error": wedge_reason(),
|
||||
}
|
||||
|
||||
|
||||
def _runtime_metadata_payload() -> dict:
|
||||
"""Build the {runtime_metadata} portion of the heartbeat body —
|
||||
adapter-declared capabilities + per-capability override values
|
||||
(idle timeout, etc.). The platform reads this to route capabilities
|
||||
to the right owner: native (adapter) vs fallback (platform).
|
||||
|
||||
Returns an empty dict if the adapter can't be loaded or introspected.
|
||||
Heartbeat must NEVER fail because of capability discovery — observability
|
||||
is more important than capability accuracy. The platform falls through
|
||||
to its own defaults when fields are missing.
|
||||
|
||||
See project memory `project_runtime_native_pluggable.md` and
|
||||
workspace/adapter_base.py:RuntimeCapabilities.
|
||||
"""
|
||||
try:
|
||||
from adapters import get_adapter
|
||||
# ADAPTER_MODULE wins over the runtime arg in get_adapter — pass
|
||||
# an empty string to force the env-var path.
|
||||
adapter_cls = get_adapter("")
|
||||
adapter = adapter_cls()
|
||||
caps = adapter.capabilities()
|
||||
meta: dict = {"capabilities": caps.to_dict()}
|
||||
idle = adapter.idle_timeout_override()
|
||||
# Only include the override when it's a positive integer. None /
|
||||
# zero / negative falls through to the platform's global default
|
||||
# (env A2A_IDLE_TIMEOUT_SECONDS, default 5min) — that "absent
|
||||
# field = use default" contract is what keeps the wire small.
|
||||
if isinstance(idle, int) and idle > 0:
|
||||
meta["idle_timeout_seconds"] = idle
|
||||
return {"runtime_metadata": meta}
|
||||
except Exception as e:
|
||||
# debug-level: missing ADAPTER_MODULE in dev / test envs is normal
|
||||
logger.debug("runtime_metadata: failed to read adapter caps: %s", e)
|
||||
return {}
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
HEARTBEAT_INTERVAL = 30 # seconds
|
||||
@ -123,6 +160,7 @@ class HeartbeatLoop:
|
||||
# sample_error field. The platform reads
|
||||
# runtime_state to flip status → degraded.
|
||||
body.update(_runtime_state_payload())
|
||||
body.update(_runtime_metadata_payload())
|
||||
await client.post(
|
||||
f"{self.platform_url}/registry/heartbeat",
|
||||
json=body,
|
||||
|
||||
147
workspace/tests/test_heartbeat_runtime_metadata.py
Normal file
147
workspace/tests/test_heartbeat_runtime_metadata.py
Normal file
@ -0,0 +1,147 @@
|
||||
"""Tests for heartbeat._runtime_metadata_payload — the heartbeat-side
|
||||
producer that sends adapter capability declarations + the
|
||||
idle_timeout_override value to the platform every 30s. Capability
|
||||
primitive #2 (task #117) wires this into the platform's a2a_proxy.
|
||||
|
||||
Tests use sys.modules monkey-patching to stub the `adapters` module
|
||||
because workspace/heartbeat.py lazy-imports it inside the helper —
|
||||
keeping heartbeat resilient to a missing/broken adapter discovery
|
||||
path."""
|
||||
import sys
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from adapter_base import BaseAdapter, RuntimeCapabilities
|
||||
from heartbeat import _runtime_metadata_payload
|
||||
|
||||
|
||||
class _FakeAdapter(BaseAdapter):
|
||||
"""Default adapter — every capability False, no idle override.
|
||||
Matches today's behavior for any runtime that doesn't opt in."""
|
||||
|
||||
@staticmethod
|
||||
def name() -> str:
|
||||
return "fake"
|
||||
|
||||
@staticmethod
|
||||
def display_name() -> str:
|
||||
return "Fake"
|
||||
|
||||
@staticmethod
|
||||
def description() -> str:
|
||||
return "Fake adapter for heartbeat metadata tests"
|
||||
|
||||
async def setup(self, config) -> None:
|
||||
return None
|
||||
|
||||
async def create_executor(self, config): # pragma: no cover
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class _NativeAdapter(_FakeAdapter):
|
||||
"""Adapter that declares native heartbeat + 600s idle override —
|
||||
matches what claude-code's adapter will declare once #87 lands."""
|
||||
|
||||
def capabilities(self) -> RuntimeCapabilities:
|
||||
return RuntimeCapabilities(provides_native_heartbeat=True)
|
||||
|
||||
def idle_timeout_override(self) -> int:
|
||||
return 600
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def stub_adapters_module(request):
|
||||
"""Install a fake `adapters` module that returns the requested
|
||||
adapter class from get_adapter(). Cleans up after the test."""
|
||||
adapter_cls = getattr(request, "param", _FakeAdapter)
|
||||
fake_mod = SimpleNamespace(get_adapter=lambda runtime: adapter_cls)
|
||||
saved = sys.modules.get("adapters")
|
||||
sys.modules["adapters"] = fake_mod # type: ignore[assignment]
|
||||
try:
|
||||
yield adapter_cls
|
||||
finally:
|
||||
if saved is None:
|
||||
sys.modules.pop("adapters", None)
|
||||
else:
|
||||
sys.modules["adapters"] = saved
|
||||
|
||||
|
||||
@pytest.mark.parametrize("stub_adapters_module", [_FakeAdapter], indirect=True)
|
||||
def test_default_adapter_emits_all_false_capabilities_no_idle_override(stub_adapters_module):
|
||||
"""Default-adapter heartbeat MUST carry the runtime_metadata block
|
||||
with all-False caps and no idle_timeout_seconds. The block being
|
||||
present (even with zero info) is the wire signal that this runtime
|
||||
speaks the new protocol — older runtimes omit the field entirely."""
|
||||
payload = _runtime_metadata_payload()
|
||||
assert "runtime_metadata" in payload
|
||||
meta = payload["runtime_metadata"]
|
||||
assert meta["capabilities"] == {
|
||||
"heartbeat": False,
|
||||
"scheduler": False,
|
||||
"session": False,
|
||||
"status_mgmt": False,
|
||||
"retry": False,
|
||||
"activity_decoration": False,
|
||||
"channel_dispatch": False,
|
||||
}
|
||||
# No override key at all — pin the "absent field = use platform
|
||||
# default" wire contract Go side relies on.
|
||||
assert "idle_timeout_seconds" not in meta
|
||||
|
||||
|
||||
@pytest.mark.parametrize("stub_adapters_module", [_NativeAdapter], indirect=True)
|
||||
def test_native_adapter_emits_capability_flag_and_idle_override(stub_adapters_module):
|
||||
payload = _runtime_metadata_payload()
|
||||
meta = payload["runtime_metadata"]
|
||||
assert meta["capabilities"]["heartbeat"] is True
|
||||
# Sibling caps untouched — declaring one capability doesn't
|
||||
# accidentally claim ownership of the others.
|
||||
assert meta["capabilities"]["scheduler"] is False
|
||||
assert meta["idle_timeout_seconds"] == 600
|
||||
|
||||
|
||||
def test_returns_empty_dict_when_adapter_module_missing(monkeypatch):
|
||||
"""get_adapter() raises KeyError when ADAPTER_MODULE is unset.
|
||||
Heartbeat must NEVER fail — the metadata is optional, the
|
||||
heartbeat itself (alive signal) is load-bearing. Pin that the
|
||||
helper swallows the error and returns {}."""
|
||||
# Remove any stub from prior tests.
|
||||
monkeypatch.delitem(sys.modules, "adapters", raising=False)
|
||||
# Force get_adapter to raise by ensuring ADAPTER_MODULE is unset.
|
||||
monkeypatch.delenv("ADAPTER_MODULE", raising=False)
|
||||
payload = _runtime_metadata_payload()
|
||||
assert payload == {}
|
||||
|
||||
|
||||
@pytest.mark.parametrize("stub_adapters_module", [_FakeAdapter], indirect=True)
|
||||
def test_idle_timeout_override_zero_or_negative_omitted(stub_adapters_module, monkeypatch):
|
||||
"""An adapter that returns 0 or negative from idle_timeout_override
|
||||
means 'use the platform default' — same as None. Don't ship a
|
||||
bogus value to the wire that the Go side would have to filter."""
|
||||
class _BadOverrideAdapter(_FakeAdapter):
|
||||
def idle_timeout_override(self) -> int:
|
||||
return 0
|
||||
|
||||
fake_mod = SimpleNamespace(get_adapter=lambda runtime: _BadOverrideAdapter)
|
||||
monkeypatch.setitem(sys.modules, "adapters", fake_mod)
|
||||
|
||||
payload = _runtime_metadata_payload()
|
||||
assert "idle_timeout_seconds" not in payload["runtime_metadata"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("stub_adapters_module", [_FakeAdapter], indirect=True)
|
||||
def test_swallows_unexpected_exception_inside_adapter(stub_adapters_module, monkeypatch):
|
||||
"""Adapter capabilities() / idle_timeout_override() throwing must
|
||||
NOT crash heartbeat. Returns {} so no field is sent and the
|
||||
platform falls through to defaults."""
|
||||
class _BrokenAdapter(_FakeAdapter):
|
||||
def capabilities(self):
|
||||
raise RuntimeError("simulated broken adapter init")
|
||||
|
||||
fake_mod = SimpleNamespace(get_adapter=lambda runtime: _BrokenAdapter)
|
||||
monkeypatch.setitem(sys.modules, "adapters", fake_mod)
|
||||
|
||||
payload = _runtime_metadata_payload()
|
||||
assert payload == {}
|
||||
@ -152,3 +152,35 @@ class TestBaseAdapterCapabilitiesDefault:
|
||||
native = _NativeHeartbeatAdapter().capabilities()
|
||||
assert minimal.provides_native_heartbeat is False
|
||||
assert native.provides_native_heartbeat is True
|
||||
|
||||
|
||||
class TestIdleTimeoutOverride:
|
||||
"""The idle_timeout_override() hook — the first capability primitive
|
||||
with an actual platform consumer (workspace-server's a2a_proxy.go
|
||||
consults this per-workspace before applying its idle timer).
|
||||
|
||||
Default behavior MUST be no-op (return None → platform uses global
|
||||
default). Subclasses override to declare longer/shorter window."""
|
||||
|
||||
def test_default_returns_none(self):
|
||||
# If this default ever flips to a positive number, every adapter
|
||||
# silently gets that idle timeout. The platform's global default
|
||||
# (env A2A_IDLE_TIMEOUT_SECONDS, default 5min) would stop being
|
||||
# the floor — instead this hook would be — and ops would lose
|
||||
# the central knob.
|
||||
assert _MinimalAdapter().idle_timeout_override() is None
|
||||
|
||||
def test_subclass_can_override_to_positive_seconds(self):
|
||||
class _SlowAdapter(_MinimalAdapter):
|
||||
def idle_timeout_override(self) -> int:
|
||||
return 600 # 10 min — typical for a slow synth runtime
|
||||
assert _SlowAdapter().idle_timeout_override() == 600
|
||||
|
||||
def test_subclass_can_explicitly_keep_default_via_none(self):
|
||||
# An adapter that overrode this in an old version then dropped
|
||||
# the override (back to None) should cleanly fall back to the
|
||||
# platform default. Pinning here makes the round-trip explicit.
|
||||
class _DroppedOverrideAdapter(_MinimalAdapter):
|
||||
def idle_timeout_override(self):
|
||||
return None
|
||||
assert _DroppedOverrideAdapter().idle_timeout_override() is None
|
||||
|
||||
Loading…
Reference in New Issue
Block a user