From d15efc9c1be088de7b97bfdb658858788cb2b410 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 13 Apr 2026 19:22:23 -0700 Subject: [PATCH 1/2] fix: correct GPT-5 family context lengths in fallback defaults (#9309) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The generic 'gpt-5' fallback was set to 128,000 — which is the max OUTPUT tokens, not the context window. GPT-5 base and most variants (codex, mini) have 400,000 context. This caused /model to report 128k for models like gpt-5.3-codex when models.dev was unavailable. Added specific entries for GPT-5 variants with different context sizes: - gpt-5.4, gpt-5.4-pro: 1,050,000 (1.05M) - gpt-5.4-mini, gpt-5.4-nano: 400,000 - gpt-5.3-codex-spark: 128,000 (reduced) - gpt-5.1-chat: 128,000 (chat variant) - gpt-5 (catch-all): 400,000 Sources: https://developers.openai.com/api/docs/models --- agent/model_metadata.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 98bb9543..842373c1 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -106,9 +106,15 @@ DEFAULT_CONTEXT_LENGTHS = { "claude-sonnet-4.6": 1000000, # Catch-all for older Claude models (must sort after specific entries) "claude": 200000, - # OpenAI + # OpenAI — GPT-5 family (most have 400k; specific overrides first) + # Source: https://developers.openai.com/api/docs/models + "gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4) + "gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4) + "gpt-5.4": 1050000, # GPT-5.4, GPT-5.4 Pro (1.05M context) + "gpt-5.3-codex-spark": 128000, # Spark variant has reduced 128k context + "gpt-5.1-chat": 128000, # Chat variant has 128k context + "gpt-5": 400000, # GPT-5.x base, mini, codex variants (400k) "gpt-4.1": 1047576, - "gpt-5": 128000, "gpt-4": 128000, # Google "gemini": 1048576, From 0cc7f79016cab874da869587db09f52f7330ce2d Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 13 Apr 2026 19:22:43 -0700 Subject: [PATCH 2/2] fix(streaming): prevent duplicate Telegram replies when stream task is cancelled (#9319) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the 5-second stream_task timeout in gateway/run.py expires (due to slow Telegram API calls from rate limiting after several messages), the stream consumer is cancelled via asyncio.CancelledError. The CancelledError handler did a best-effort final edit but never set final_response_sent, so the gateway fell through to the normal send path and delivered the full response again as a reply — causing a duplicate. The fix: in the CancelledError handler, set final_response_sent = True when already_sent is True (i.e., the stream consumer had already delivered content to the user). This tells the gateway's already_sent check that the response was delivered, preventing the duplicate send. Adds two tests verifying the cancellation behavior: - Cancelled with already_sent=True → final_response_sent=True (no dup) - Cancelled with already_sent=False → final_response_sent=False (normal send path proceeds) Reported by community user hume on Discord. --- gateway/stream_consumer.py | 8 +++ tests/gateway/test_stream_consumer.py | 81 +++++++++++++++++++++++++++ 2 files changed, 89 insertions(+) diff --git a/gateway/stream_consumer.py b/gateway/stream_consumer.py index e743df8d..240084e9 100644 --- a/gateway/stream_consumer.py +++ b/gateway/stream_consumer.py @@ -280,6 +280,14 @@ class GatewayStreamConsumer: await self._send_or_edit(self._accumulated) except Exception: pass + # If we delivered any content before being cancelled, mark the + # final response as sent so the gateway's already_sent check + # doesn't trigger a duplicate message. The 5-second + # stream_task timeout (gateway/run.py) can cancel us while + # waiting on a slow Telegram API call — without this flag the + # gateway falls through to the normal send path. + if self._already_sent: + self._final_response_sent = True except Exception as e: logger.error("Stream consumer error: %s", e) diff --git a/tests/gateway/test_stream_consumer.py b/tests/gateway/test_stream_consumer.py index d6630672..d8a1be2d 100644 --- a/tests/gateway/test_stream_consumer.py +++ b/tests/gateway/test_stream_consumer.py @@ -599,3 +599,84 @@ class TestInterimCommentaryMessages: assert sent_texts == ["Hello ▉", "world"] assert consumer.already_sent is True assert consumer.final_response_sent is True + + +class TestCancelledConsumerSetsFlags: + """Cancellation must set final_response_sent when already_sent is True. + + The 5-second stream_task timeout in gateway/run.py can cancel the + consumer while it's still processing. If final_response_sent stays + False, the gateway falls through to the normal send path and the + user sees a duplicate message. + """ + + @pytest.mark.asyncio + async def test_cancelled_with_already_sent_marks_final_response_sent(self): + """Cancelling after content was sent should set final_response_sent.""" + adapter = MagicMock() + adapter.send = AsyncMock( + return_value=SimpleNamespace(success=True, message_id="msg_1") + ) + adapter.edit_message = AsyncMock( + return_value=SimpleNamespace(success=True) + ) + adapter.MAX_MESSAGE_LENGTH = 4096 + + consumer = GatewayStreamConsumer( + adapter, + "chat_123", + StreamConsumerConfig(edit_interval=0.01, buffer_threshold=5), + ) + + # Stream some text — the consumer sends it and sets already_sent + consumer.on_delta("Hello world") + task = asyncio.create_task(consumer.run()) + await asyncio.sleep(0.08) + + assert consumer.already_sent is True + + # Cancel the task (simulates the 5-second timeout in gateway) + task.cancel() + try: + await task + except asyncio.CancelledError: + pass + + # The fix: final_response_sent should be True even though _DONE + # was never processed, preventing a duplicate message. + assert consumer.final_response_sent is True + + @pytest.mark.asyncio + async def test_cancelled_without_any_sends_does_not_mark_final(self): + """Cancelling before anything was sent should NOT set final_response_sent.""" + adapter = MagicMock() + adapter.send = AsyncMock( + return_value=SimpleNamespace(success=False, message_id=None) + ) + adapter.edit_message = AsyncMock( + return_value=SimpleNamespace(success=True) + ) + adapter.MAX_MESSAGE_LENGTH = 4096 + + consumer = GatewayStreamConsumer( + adapter, + "chat_123", + StreamConsumerConfig(edit_interval=0.01, buffer_threshold=5), + ) + + # Send fails — already_sent stays False + consumer.on_delta("x") + task = asyncio.create_task(consumer.run()) + await asyncio.sleep(0.08) + + assert consumer.already_sent is False + + task.cancel() + try: + await task + except asyncio.CancelledError: + pass + + # Without a successful send, final_response_sent should stay False + # so the normal gateway send path can deliver the response. + assert consumer.final_response_sent is False