Merge branch 'master' of https://gitea.xiaowang.eu.org/wx44wx/AI-VideoAssistant

2026-02-28 12:33:23 +08:00
parent c4c473105e aae41d4512
commit a7da109983
2 changed files with 22 additions and 0 deletions
--- a/engine/core/duplex_pipeline.py
+++ b/engine/core/duplex_pipeline.py
@@ -1393,6 +1393,11 @@ class DuplexPipeline:
    async def _on_end_of_utterance(self) -> None:
        """Handle end of user utterance."""
        if self.conversation.state not in (ConversationState.LISTENING, ConversationState.INTERRUPTED):
+            # Prevent a stale ASR capture watchdog from repeatedly forcing EOU
+            # once the conversation has already moved past user-listening states.
+            self._asr_capture_active = False
+            self._asr_capture_started_ms = 0.0
+            self._pending_speech_audio = b""
            return

        # Add a tiny trailing silence tail to stabilize final-token decoding.
--- a/engine/tests/test_tool_call_flow.py
+++ b/engine/tests/test_tool_call_flow.py
@@ -4,6 +4,7 @@ from typing import Any, Dict, List

 import pytest

+from core.conversation import ConversationState
 from core.duplex_pipeline import DuplexPipeline
 from models.ws_v1 import ToolCallResultsMessage, parse_client_message
 from services.base import LLMStreamEvent
@@ -439,3 +440,19 @@ async def test_server_tool_timeout_emits_504_and_continues(monkeypatch):
    finals = [e for e in events if e.get("type") == "assistant.response.final"]
    assert finals
    assert "timeout fallback" in finals[-1].get("text", "")
+
+
+@pytest.mark.asyncio
+async def test_eou_early_return_clears_stale_asr_capture(monkeypatch):
+    pipeline, _events = _build_pipeline(monkeypatch, [[LLMStreamEvent(type="done")]])
+    await pipeline.conversation.set_state(ConversationState.PROCESSING)
+
+    pipeline._asr_capture_active = True
+    pipeline._asr_capture_started_ms = 1234.0
+    pipeline._pending_speech_audio = b"stale"
+
+    await pipeline._on_end_of_utterance()
+
+    assert pipeline._asr_capture_active is False
+    assert pipeline._asr_capture_started_ms == 0.0
+    assert pipeline._pending_speech_audio == b""