fix: cap context pressure percentage at 100% in display (#3480)

* fix: cap context pressure percentage at 100% in display

The forward-looking token estimate can overshoot the compaction threshold
(e.g. a large tool result pushes it from 70% to 109% in one step). The
progress bar was already capped via min(), but pct_int was not — causing
the user to see '109% to compaction' which is confusing.

Cap pct_int at 100 in both CLI and gateway display functions.

Reported by @JoshExile82.

* refactor: use real API token counts for compression decisions

Replace the rough chars/3 estimation with actual prompt_tokens +
completion_tokens from the API response. The estimation was needed to
predict whether tool results would push context past the threshold, but
the default 50% threshold leaves ample headroom — if tool results push
past it, the next API call reports real usage and triggers compression
then.

This removes all estimation from the compression and context pressure
paths, making both 100% data-driven from provider-reported token counts.

Also removes the dead _msg_count_before_tools variable.
This commit is contained in:
Teknium 2026-03-27 21:42:09 -07:00 committed by GitHub
parent 03f24c1edd
commit 15cfd20820
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 23 additions and 15 deletions

View File

@ -699,7 +699,7 @@ def format_context_pressure(
threshold_percent: Compaction threshold as a fraction of context window.
compression_enabled: Whether auto-compression is active.
"""
pct_int = int(compaction_progress * 100)
pct_int = min(int(compaction_progress * 100), 100)
filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH)
bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled)
@ -729,7 +729,7 @@ def format_context_pressure_gateway(
No ANSI just Unicode and plain text suitable for Telegram/Discord/etc.
The percentage shows progress toward the compaction threshold.
"""
pct_int = int(compaction_progress * 100)
pct_int = min(int(compaction_progress * 100), 100)
filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH)
bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled)

View File

@ -7311,7 +7311,6 @@ class AIAgent:
except Exception:
pass
_msg_count_before_tools = len(messages)
self._execute_tool_calls(assistant_message, messages, effective_task_id, api_call_count)
# Signal that a paragraph break is needed before the next
@ -7329,18 +7328,18 @@ class AIAgent:
if _tc_names == {"execute_code"}:
self.iteration_budget.refund()
# Estimate next prompt size using real token counts from the
# last API response + rough estimate of newly appended tool
# results. This catches cases where tool results push the
# context past the limit that last_prompt_tokens alone misses
# (e.g. large file reads, web extractions).
# Use real token counts from the API response to decide
# compression. prompt_tokens + completion_tokens is the
# actual context size the provider reported plus the
# assistant turn — a tight lower bound for the next prompt.
# Tool results appended above aren't counted yet, but the
# threshold (default 50%) leaves ample headroom; if tool
# results push past it, the next API call will report the
# real total and trigger compression then.
_compressor = self.context_compressor
_new_tool_msgs = messages[_msg_count_before_tools:]
_new_chars = sum(len(str(m.get("content", "") or "")) for m in _new_tool_msgs)
_estimated_next_prompt = (
_real_tokens = (
_compressor.last_prompt_tokens
+ _compressor.last_completion_tokens
+ _new_chars // 3 # conservative: JSON-heavy tool results ≈ 3 chars/token
)
# ── Context pressure warnings (user-facing only) ──────────
@ -7350,12 +7349,12 @@ class AIAgent:
# Does not inject into messages — just prints to CLI output
# and fires status_callback for gateway platforms.
if _compressor.threshold_tokens > 0:
_compaction_progress = _estimated_next_prompt / _compressor.threshold_tokens
_compaction_progress = _real_tokens / _compressor.threshold_tokens
if _compaction_progress >= 0.85 and not self._context_pressure_warned:
self._context_pressure_warned = True
self._emit_context_pressure(_compaction_progress, _compressor)
if self.compression_enabled and _compressor.should_compress(_estimated_next_prompt):
if self.compression_enabled and _compressor.should_compress(_real_tokens):
messages, active_system_prompt = self._compress_context(
messages, system_message,
approx_tokens=self.context_compressor.last_prompt_tokens,

View File

@ -69,10 +69,12 @@ class TestFormatContextPressure:
assert isinstance(result, str)
def test_over_100_percent_capped(self):
"""Progress > 1.0 should not break the bar."""
"""Progress > 1.0 should cap both bar and percentage text at 100%."""
line = format_context_pressure(1.05, 100_000, 0.50)
assert "" in line
assert line.count("") == 20
assert "100%" in line
assert "105%" not in line
class TestFormatContextPressureGateway:
@ -100,6 +102,13 @@ class TestFormatContextPressureGateway:
msg = format_context_pressure_gateway(0.80, 0.50)
assert "" in msg
def test_over_100_percent_capped(self):
"""Progress > 1.0 should cap percentage text at 100%."""
msg = format_context_pressure_gateway(1.09, 0.50)
assert "100% to compaction" in msg
assert "109%" not in msg
assert msg.count("") == 20
# ---------------------------------------------------------------------------
# AIAgent context pressure flag tests