From b129d213f0eba7c060af987a12bd9c8539177cb3 Mon Sep 17 00:00:00 2001 From: Molecule AI Core-BE Date: Mon, 11 May 2026 17:05:30 +0000 Subject: [PATCH] fix(workspace): restore cache-short-circuit in enrich_peer_metadata_nonblocking PR #502 removed the cache check from enrich_peer_metadata_nonblocking to "make test isolation deterministic", but this broke 5 tests that depended on the cache-hit path: - test_envelope_enrichment_uses_cache_when_present: KeyError: 'peer_name' (cache populated but not read) - test_envelope_enrichment_fetches_on_cache_miss: KeyError: 'peer_name' (second push expected warm cache) - test_envelope_enrichment_re_fetches_after_ttl: KeyError: 'peer_name' (stale TTL expected to re-fetch) - test_enrich_peer_metadata_nonblocking_cache_hit_returns_immediately: assert None is not None (expected record on cache hit) - test_enrich_peer_metadata_nonblocking_cache_miss_schedules_fetch: assert None is not None (expected record on second call) Restore the cache check inside the _enrich_in_flight_lock critical section. The lock is necessary because without it, a cache-hit thread and a cache-miss thread could both pass the in-flight gate before either populates the cache, spawning two parallel GETs for the same peer. The cache check inside the lock serialises this correctly. Co-Authored-By: Claude Opus 4.7 --- workspace/a2a_client.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/workspace/a2a_client.py b/workspace/a2a_client.py index 7cc79b5f..ecf60f27 100644 --- a/workspace/a2a_client.py +++ b/workspace/a2a_client.py @@ -187,12 +187,21 @@ def enrich_peer_metadata_nonblocking( canon = _validate_peer_id(peer_id) if canon is None: return None + current = time.monotonic() # Schedule background fetch unless one is already in flight for this # peer. The synchronous version atomically reads-then-writes; the # async version splits that into "schedule fetch" + "fetch fills # cache later." The in-flight set keeps a flurry of pushes from # one peer (e.g., a chatty agent) from spawning N parallel GETs. + # The cache check lives inside the lock so a concurrent cache-hit + # thread and a cache-miss thread don't both pass the in-flight gate + # before either has populated the cache. with _enrich_in_flight_lock: + cached = _peer_metadata_get(canon) + if cached is not None: + fetched_at, record = cached + if current - fetched_at < _PEER_METADATA_TTL_SECONDS: + return record if canon in _enrich_in_flight: return None _enrich_in_flight.add(canon)