Clear stale ASR capture on end of utterance in DuplexPipeline. Add test to verify behavior when conversation state changes, ensuring proper handling of ASR capture variables.

This commit is contained in:
Xin Wang
2026-02-28 12:32:35 +08:00
parent 8b59569b99
commit aae41d4512
2 changed files with 22 additions and 0 deletions

View File

@@ -1393,6 +1393,11 @@ class DuplexPipeline:
async def _on_end_of_utterance(self) -> None: async def _on_end_of_utterance(self) -> None:
"""Handle end of user utterance.""" """Handle end of user utterance."""
if self.conversation.state not in (ConversationState.LISTENING, ConversationState.INTERRUPTED): if self.conversation.state not in (ConversationState.LISTENING, ConversationState.INTERRUPTED):
# Prevent a stale ASR capture watchdog from repeatedly forcing EOU
# once the conversation has already moved past user-listening states.
self._asr_capture_active = False
self._asr_capture_started_ms = 0.0
self._pending_speech_audio = b""
return return
# Add a tiny trailing silence tail to stabilize final-token decoding. # Add a tiny trailing silence tail to stabilize final-token decoding.

View File

@@ -4,6 +4,7 @@ from typing import Any, Dict, List
import pytest import pytest
from core.conversation import ConversationState
from core.duplex_pipeline import DuplexPipeline from core.duplex_pipeline import DuplexPipeline
from models.ws_v1 import ToolCallResultsMessage, parse_client_message from models.ws_v1 import ToolCallResultsMessage, parse_client_message
from services.base import LLMStreamEvent from services.base import LLMStreamEvent
@@ -439,3 +440,19 @@ async def test_server_tool_timeout_emits_504_and_continues(monkeypatch):
finals = [e for e in events if e.get("type") == "assistant.response.final"] finals = [e for e in events if e.get("type") == "assistant.response.final"]
assert finals assert finals
assert "timeout fallback" in finals[-1].get("text", "") assert "timeout fallback" in finals[-1].get("text", "")
@pytest.mark.asyncio
async def test_eou_early_return_clears_stale_asr_capture(monkeypatch):
pipeline, _events = _build_pipeline(monkeypatch, [[LLMStreamEvent(type="done")]])
await pipeline.conversation.set_state(ConversationState.PROCESSING)
pipeline._asr_capture_active = True
pipeline._asr_capture_started_ms = 1234.0
pipeline._pending_speech_audio = b"stale"
await pipeline._on_end_of_utterance()
assert pipeline._asr_capture_active is False
assert pipeline._asr_capture_started_ms == 0.0
assert pipeline._pending_speech_audio == b""