fix: cap context pressure percentage at 100% in display (#3480)
* fix: cap context pressure percentage at 100% in display The forward-looking token estimate can overshoot the compaction threshold (e.g. a large tool result pushes it from 70% to 109% in one step). The progress bar was already capped via min(), but pct_int was not — causing the user to see '109% to compaction' which is confusing. Cap pct_int at 100 in both CLI and gateway display functions. Reported by @JoshExile82. * refactor: use real API token counts for compression decisions Replace the rough chars/3 estimation with actual prompt_tokens + completion_tokens from the API response. The estimation was needed to predict whether tool results would push context past the threshold, but the default 50% threshold leaves ample headroom — if tool results push past it, the next API call reports real usage and triggers compression then. This removes all estimation from the compression and context pressure paths, making both 100% data-driven from provider-reported token counts. Also removes the dead _msg_count_before_tools variable.
This commit is contained in:
parent
03f24c1edd
commit
15cfd20820
@ -699,7 +699,7 @@ def format_context_pressure(
|
||||
threshold_percent: Compaction threshold as a fraction of context window.
|
||||
compression_enabled: Whether auto-compression is active.
|
||||
"""
|
||||
pct_int = int(compaction_progress * 100)
|
||||
pct_int = min(int(compaction_progress * 100), 100)
|
||||
filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH)
|
||||
bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled)
|
||||
|
||||
@ -729,7 +729,7 @@ def format_context_pressure_gateway(
|
||||
No ANSI — just Unicode and plain text suitable for Telegram/Discord/etc.
|
||||
The percentage shows progress toward the compaction threshold.
|
||||
"""
|
||||
pct_int = int(compaction_progress * 100)
|
||||
pct_int = min(int(compaction_progress * 100), 100)
|
||||
filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH)
|
||||
bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled)
|
||||
|
||||
|
||||
23
run_agent.py
23
run_agent.py
@ -7311,7 +7311,6 @@ class AIAgent:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
_msg_count_before_tools = len(messages)
|
||||
self._execute_tool_calls(assistant_message, messages, effective_task_id, api_call_count)
|
||||
|
||||
# Signal that a paragraph break is needed before the next
|
||||
@ -7329,18 +7328,18 @@ class AIAgent:
|
||||
if _tc_names == {"execute_code"}:
|
||||
self.iteration_budget.refund()
|
||||
|
||||
# Estimate next prompt size using real token counts from the
|
||||
# last API response + rough estimate of newly appended tool
|
||||
# results. This catches cases where tool results push the
|
||||
# context past the limit that last_prompt_tokens alone misses
|
||||
# (e.g. large file reads, web extractions).
|
||||
# Use real token counts from the API response to decide
|
||||
# compression. prompt_tokens + completion_tokens is the
|
||||
# actual context size the provider reported plus the
|
||||
# assistant turn — a tight lower bound for the next prompt.
|
||||
# Tool results appended above aren't counted yet, but the
|
||||
# threshold (default 50%) leaves ample headroom; if tool
|
||||
# results push past it, the next API call will report the
|
||||
# real total and trigger compression then.
|
||||
_compressor = self.context_compressor
|
||||
_new_tool_msgs = messages[_msg_count_before_tools:]
|
||||
_new_chars = sum(len(str(m.get("content", "") or "")) for m in _new_tool_msgs)
|
||||
_estimated_next_prompt = (
|
||||
_real_tokens = (
|
||||
_compressor.last_prompt_tokens
|
||||
+ _compressor.last_completion_tokens
|
||||
+ _new_chars // 3 # conservative: JSON-heavy tool results ≈ 3 chars/token
|
||||
)
|
||||
|
||||
# ── Context pressure warnings (user-facing only) ──────────
|
||||
@ -7350,12 +7349,12 @@ class AIAgent:
|
||||
# Does not inject into messages — just prints to CLI output
|
||||
# and fires status_callback for gateway platforms.
|
||||
if _compressor.threshold_tokens > 0:
|
||||
_compaction_progress = _estimated_next_prompt / _compressor.threshold_tokens
|
||||
_compaction_progress = _real_tokens / _compressor.threshold_tokens
|
||||
if _compaction_progress >= 0.85 and not self._context_pressure_warned:
|
||||
self._context_pressure_warned = True
|
||||
self._emit_context_pressure(_compaction_progress, _compressor)
|
||||
|
||||
if self.compression_enabled and _compressor.should_compress(_estimated_next_prompt):
|
||||
if self.compression_enabled and _compressor.should_compress(_real_tokens):
|
||||
messages, active_system_prompt = self._compress_context(
|
||||
messages, system_message,
|
||||
approx_tokens=self.context_compressor.last_prompt_tokens,
|
||||
|
||||
@ -69,10 +69,12 @@ class TestFormatContextPressure:
|
||||
assert isinstance(result, str)
|
||||
|
||||
def test_over_100_percent_capped(self):
|
||||
"""Progress > 1.0 should not break the bar."""
|
||||
"""Progress > 1.0 should cap both bar and percentage text at 100%."""
|
||||
line = format_context_pressure(1.05, 100_000, 0.50)
|
||||
assert "▰" in line
|
||||
assert line.count("▰") == 20
|
||||
assert "100%" in line
|
||||
assert "105%" not in line
|
||||
|
||||
|
||||
class TestFormatContextPressureGateway:
|
||||
@ -100,6 +102,13 @@ class TestFormatContextPressureGateway:
|
||||
msg = format_context_pressure_gateway(0.80, 0.50)
|
||||
assert "▰" in msg
|
||||
|
||||
def test_over_100_percent_capped(self):
|
||||
"""Progress > 1.0 should cap percentage text at 100%."""
|
||||
msg = format_context_pressure_gateway(1.09, 0.50)
|
||||
assert "100% to compaction" in msg
|
||||
assert "109%" not in msg
|
||||
assert msg.count("▰") == 20
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# AIAgent context pressure flag tests
|
||||
|
||||
Loading…
Reference in New Issue
Block a user