diff --git a/engine/core/duplex_pipeline.py b/engine/core/duplex_pipeline.py index e575bab..60bdc45 100644 --- a/engine/core/duplex_pipeline.py +++ b/engine/core/duplex_pipeline.py @@ -1393,6 +1393,11 @@ class DuplexPipeline: async def _on_end_of_utterance(self) -> None: """Handle end of user utterance.""" if self.conversation.state not in (ConversationState.LISTENING, ConversationState.INTERRUPTED): + # Prevent a stale ASR capture watchdog from repeatedly forcing EOU + # once the conversation has already moved past user-listening states. + self._asr_capture_active = False + self._asr_capture_started_ms = 0.0 + self._pending_speech_audio = b"" return # Add a tiny trailing silence tail to stabilize final-token decoding. diff --git a/engine/tests/test_tool_call_flow.py b/engine/tests/test_tool_call_flow.py index 32810a2..236e86a 100644 --- a/engine/tests/test_tool_call_flow.py +++ b/engine/tests/test_tool_call_flow.py @@ -4,6 +4,7 @@ from typing import Any, Dict, List import pytest +from core.conversation import ConversationState from core.duplex_pipeline import DuplexPipeline from models.ws_v1 import ToolCallResultsMessage, parse_client_message from services.base import LLMStreamEvent @@ -439,3 +440,19 @@ async def test_server_tool_timeout_emits_504_and_continues(monkeypatch): finals = [e for e in events if e.get("type") == "assistant.response.final"] assert finals assert "timeout fallback" in finals[-1].get("text", "") + + +@pytest.mark.asyncio +async def test_eou_early_return_clears_stale_asr_capture(monkeypatch): + pipeline, _events = _build_pipeline(monkeypatch, [[LLMStreamEvent(type="done")]]) + await pipeline.conversation.set_state(ConversationState.PROCESSING) + + pipeline._asr_capture_active = True + pipeline._asr_capture_started_ms = 1234.0 + pipeline._pending_speech_audio = b"stale" + + await pipeline._on_end_of_utterance() + + assert pipeline._asr_capture_active is False + assert pipeline._asr_capture_started_ms == 0.0 + assert pipeline._pending_speech_audio == b""