fix: cap context pressure percentage at 100% in display (#3480)

* fix: cap context pressure percentage at 100% in display The forward-looking token estimate can overshoot the compaction threshold (e.g. a large tool result pushes it from 70% to 109% in one step). The progress bar was already capped via min(), but pct_int was not — causing the user to see '109% to compaction' which is confusing. Cap pct_int at 100 in both CLI and gateway display functions. Reported by @JoshExile82. * refactor: use real API token counts for compression decisions Replace the rough chars/3 estimation with actual prompt_tokens + completion_tokens from the API response. The estimation was needed to predict whether tool results would push context past the threshold, but the default 50% threshold leaves ample headroom — if tool results push past it, the next API call reports real usage and triggers compression then. This removes all estimation from the compression and context pressure paths, making both 100% data-driven from provider-reported token counts. Also removes the dead _msg_count_before_tools variable.
2026-03-27 21:42:09 -07:00 · 2026-03-27 21:42:09 -07:00 · 15cfd20820
commit 15cfd20820
parent 03f24c1edd
3 changed files with 23 additions and 15 deletions
--- a/agent/display.py
+++ b/agent/display.py
@ -699,7 +699,7 @@ def format_context_pressure(
        threshold_percent: Compaction threshold as a fraction of context window.
        compression_enabled: Whether auto-compression is active.
    """
-    pct_int = int(compaction_progress * 100)
+    pct_int = min(int(compaction_progress * 100), 100)
    filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH)
    bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled)

@ -729,7 +729,7 @@ def format_context_pressure_gateway(
    No ANSI — just Unicode and plain text suitable for Telegram/Discord/etc.
    The percentage shows progress toward the compaction threshold.
    """
-    pct_int = int(compaction_progress * 100)
+    pct_int = min(int(compaction_progress * 100), 100)
    filled = min(int(compaction_progress * _BAR_WIDTH), _BAR_WIDTH)
    bar = _BAR_FILLED * filled + _BAR_EMPTY * (_BAR_WIDTH - filled)

--- a/run_agent.py
+++ b/run_agent.py
@ -7311,7 +7311,6 @@ class AIAgent:
                        except Exception:
                            pass

-                    _msg_count_before_tools = len(messages)
                    self._execute_tool_calls(assistant_message, messages, effective_task_id, api_call_count)

                    # Signal that a paragraph break is needed before the next
@ -7329,18 +7328,18 @@ class AIAgent:
                    if _tc_names == {"execute_code"}:
                        self.iteration_budget.refund()
                    
-                    # Estimate next prompt size using real token counts from the
-                    # last API response + rough estimate of newly appended tool
-                    # results.  This catches cases where tool results push the
-                    # context past the limit that last_prompt_tokens alone misses
-                    # (e.g. large file reads, web extractions).
+                    # Use real token counts from the API response to decide
+                    # compression.  prompt_tokens + completion_tokens is the
+                    # actual context size the provider reported plus the
+                    # assistant turn — a tight lower bound for the next prompt.
+                    # Tool results appended above aren't counted yet, but the
+                    # threshold (default 50%) leaves ample headroom; if tool
+                    # results push past it, the next API call will report the
+                    # real total and trigger compression then.
                    _compressor = self.context_compressor
-                    _new_tool_msgs = messages[_msg_count_before_tools:]
-                    _new_chars = sum(len(str(m.get("content", "") or "")) for m in _new_tool_msgs)
-                    _estimated_next_prompt = (
+                    _real_tokens = (
                        _compressor.last_prompt_tokens
                        + _compressor.last_completion_tokens
-                        + _new_chars // 3  # conservative: JSON-heavy tool results ≈ 3 chars/token
                    )

                    # ── Context pressure warnings (user-facing only) ──────────
@ -7350,12 +7349,12 @@ class AIAgent:
                    # Does not inject into messages — just prints to CLI output
                    # and fires status_callback for gateway platforms.
                    if _compressor.threshold_tokens > 0:
-                        _compaction_progress = _estimated_next_prompt / _compressor.threshold_tokens
+                        _compaction_progress = _real_tokens / _compressor.threshold_tokens
                        if _compaction_progress >= 0.85 and not self._context_pressure_warned:
                            self._context_pressure_warned = True
                            self._emit_context_pressure(_compaction_progress, _compressor)

-                    if self.compression_enabled and _compressor.should_compress(_estimated_next_prompt):
+                    if self.compression_enabled and _compressor.should_compress(_real_tokens):
                        messages, active_system_prompt = self._compress_context(
                            messages, system_message,
                            approx_tokens=self.context_compressor.last_prompt_tokens,
--- a/tests/test_context_pressure.py
+++ b/tests/test_context_pressure.py
@ -69,10 +69,12 @@ class TestFormatContextPressure:
        assert isinstance(result, str)

    def test_over_100_percent_capped(self):
-        """Progress > 1.0 should not break the bar."""
+        """Progress > 1.0 should cap both bar and percentage text at 100%."""
        line = format_context_pressure(1.05, 100_000, 0.50)
        assert "▰" in line
        assert line.count("▰") == 20
+        assert "100%" in line
+        assert "105%" not in line


 class TestFormatContextPressureGateway:
@ -100,6 +102,13 @@ class TestFormatContextPressureGateway:
        msg = format_context_pressure_gateway(0.80, 0.50)
        assert "▰" in msg

+    def test_over_100_percent_capped(self):
+        """Progress > 1.0 should cap percentage text at 100%."""
+        msg = format_context_pressure_gateway(1.09, 0.50)
+        assert "100% to compaction" in msg
+        assert "109%" not in msg
+        assert msg.count("▰") == 20
+

 # ---------------------------------------------------------------------------
 # AIAgent context pressure flag tests