Merge pull request #260 from pipecat-ai/aleix/more-interruption-fixes
more interruption fixes
This commit is contained in:
10
CHANGELOG.md
10
CHANGELOG.md
@@ -5,6 +5,16 @@ All notable changes to **pipecat** will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [0.0.34] - 2024-06-25
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue with asynchronous STT services (Deepgram and Azure) that could
|
||||
interruptions to ignore transcriptions.
|
||||
|
||||
- Fixed an issue introduced in 0.0.33 that would cause the LLM to generate
|
||||
shorter output.
|
||||
|
||||
## [0.0.33] - 2024-06-25
|
||||
|
||||
### Changed
|
||||
|
||||
@@ -13,6 +13,7 @@ from pipecat.frames.frames import (
|
||||
Frame,
|
||||
InterimTranscriptionFrame,
|
||||
LLMFullResponseEndFrame,
|
||||
LLMFullResponseStartFrame,
|
||||
LLMResponseEndFrame,
|
||||
LLMResponseStartFrame,
|
||||
LLMMessagesFrame,
|
||||
@@ -151,8 +152,8 @@ class LLMAssistantResponseAggregator(LLMResponseAggregator):
|
||||
super().__init__(
|
||||
messages=messages,
|
||||
role="assistant",
|
||||
start_frame=LLMResponseStartFrame,
|
||||
end_frame=LLMResponseEndFrame,
|
||||
start_frame=LLMFullResponseStartFrame,
|
||||
end_frame=LLMFullResponseEndFrame,
|
||||
accumulator_frame=TextFrame,
|
||||
handle_interruptions=True
|
||||
)
|
||||
|
||||
@@ -20,7 +20,6 @@ from pipecat.frames.frames import (
|
||||
Frame,
|
||||
StartFrame,
|
||||
StartInterruptionFrame,
|
||||
StopInterruptionFrame,
|
||||
SystemFrame,
|
||||
TranscriptionFrame,
|
||||
URLImageRawFrame)
|
||||
@@ -143,7 +142,7 @@ class AzureSTTService(AIService):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, StartInterruptionFrame) or isinstance(frame, StopInterruptionFrame):
|
||||
if isinstance(frame, StartInterruptionFrame):
|
||||
await self._handle_interruptions(frame)
|
||||
elif isinstance(frame, SystemFrame):
|
||||
await self.push_frame(frame, direction)
|
||||
@@ -166,21 +165,14 @@ class AzureSTTService(AIService):
|
||||
await self._push_frame_task
|
||||
|
||||
async def _handle_interruptions(self, frame: Frame):
|
||||
if isinstance(frame, StartInterruptionFrame):
|
||||
# Indicate we are interrupted, we should ignore any out-of-band
|
||||
# transcriptions.
|
||||
self._is_interrupted_event.set()
|
||||
# Cancel the task. This will stop pushing frames downstream.
|
||||
self._push_frame_task.cancel()
|
||||
await self._push_frame_task
|
||||
# Push an out-of-band frame (i.e. not using the ordered push
|
||||
# frame task).
|
||||
await self.push_frame(frame)
|
||||
# Create a new queue and task.
|
||||
self._create_push_task()
|
||||
elif isinstance(frame, StopInterruptionFrame):
|
||||
# We should now be able to receive transcriptions again.
|
||||
self._is_interrupted_event.clear()
|
||||
# Cancel the task. This will stop pushing frames downstream.
|
||||
self._push_frame_task.cancel()
|
||||
await self._push_frame_task
|
||||
# Push an out-of-band frame (i.e. not using the ordered push
|
||||
# frame task).
|
||||
await self.push_frame(frame)
|
||||
# Create a new queue and task.
|
||||
self._create_push_task()
|
||||
|
||||
def _create_push_task(self):
|
||||
self._push_queue = asyncio.Queue()
|
||||
@@ -197,9 +189,6 @@ class AzureSTTService(AIService):
|
||||
break
|
||||
|
||||
def _on_handle_recognized(self, event):
|
||||
if self._is_interrupted_event.is_set():
|
||||
return
|
||||
|
||||
if event.result.reason == ResultReason.RecognizedSpeech and len(event.result.text) > 0:
|
||||
direction = FrameDirection.DOWNSTREAM
|
||||
frame = TranscriptionFrame(event.result.text, "", int(time.time_ns() / 1000000))
|
||||
|
||||
@@ -19,7 +19,6 @@ from pipecat.frames.frames import (
|
||||
InterimTranscriptionFrame,
|
||||
StartFrame,
|
||||
StartInterruptionFrame,
|
||||
StopInterruptionFrame,
|
||||
SystemFrame,
|
||||
TranscriptionFrame)
|
||||
from pipecat.processors.frame_processor import FrameDirection
|
||||
@@ -118,16 +117,12 @@ class DeepgramSTTService(AIService):
|
||||
self._connection = self._client.listen.asynclive.v("1")
|
||||
self._connection.on(LiveTranscriptionEvents.Transcript, self._on_message)
|
||||
|
||||
# This event will be used to ignore out-of-band transcriptions while we
|
||||
# are itnerrupted.
|
||||
self._is_interrupted_event = asyncio.Event()
|
||||
|
||||
self._create_push_task()
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, StartInterruptionFrame) or isinstance(frame, StopInterruptionFrame):
|
||||
if isinstance(frame, StartInterruptionFrame):
|
||||
await self._handle_interruptions(frame)
|
||||
elif isinstance(frame, SystemFrame):
|
||||
await self.push_frame(frame, direction)
|
||||
@@ -153,21 +148,14 @@ class DeepgramSTTService(AIService):
|
||||
await self._push_frame_task
|
||||
|
||||
async def _handle_interruptions(self, frame: Frame):
|
||||
if isinstance(frame, StartInterruptionFrame):
|
||||
# Indicate we are interrupted, we should ignore any out-of-band
|
||||
# transcriptions.
|
||||
self._is_interrupted_event.set()
|
||||
# Cancel the task. This will stop pushing frames downstream.
|
||||
self._push_frame_task.cancel()
|
||||
await self._push_frame_task
|
||||
# Push an out-of-band frame (i.e. not using the ordered push
|
||||
# frame task).
|
||||
await self.push_frame(frame)
|
||||
# Create a new queue and task.
|
||||
self._create_push_task()
|
||||
elif isinstance(frame, StopInterruptionFrame):
|
||||
# We should now be able to receive transcriptions again.
|
||||
self._is_interrupted_event.clear()
|
||||
# Cancel the task. This will stop pushing frames downstream.
|
||||
self._push_frame_task.cancel()
|
||||
await self._push_frame_task
|
||||
# Push an out-of-band frame (i.e. not using the ordered push
|
||||
# frame task).
|
||||
await self.push_frame(frame)
|
||||
# Create a new queue and task.
|
||||
self._create_push_task()
|
||||
|
||||
def _create_push_task(self):
|
||||
self._push_queue = asyncio.Queue()
|
||||
@@ -184,9 +172,6 @@ class DeepgramSTTService(AIService):
|
||||
break
|
||||
|
||||
async def _on_message(self, *args, **kwargs):
|
||||
if self._is_interrupted_event.is_set():
|
||||
return
|
||||
|
||||
result = kwargs["result"]
|
||||
is_final = result.is_final
|
||||
transcript = result.channel.alternatives[0].transcript
|
||||
|
||||
Reference in New Issue
Block a user