Fixing pipeline freeze when using Python 3.10
This commit is contained in:
@@ -47,6 +47,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed a race condition that occurs in Python 3.10+ where the task could miss
|
||||
the `CancelledError` and continue running indefinitely, freezing the pipeline.
|
||||
|
||||
- Fixed a `AWSNovaSonicLLMService` issue introduced in 0.0.72.
|
||||
|
||||
## [0.0.73] - 2025-06-26
|
||||
|
||||
@@ -258,9 +258,11 @@ class ParallelPipeline(BasePipeline):
|
||||
async def _cancel(self):
|
||||
"""Cancel all parallel pipeline processing tasks."""
|
||||
if self._up_task:
|
||||
self._up_queue.cancel()
|
||||
await self.cancel_task(self._up_task)
|
||||
self._up_task = None
|
||||
if self._down_task:
|
||||
self._down_queue.cancel()
|
||||
await self.cancel_task(self._down_task)
|
||||
self._down_task = None
|
||||
|
||||
|
||||
@@ -77,6 +77,7 @@ class ConsumerProcessor(FrameProcessor):
|
||||
async def _cancel(self, _: CancelFrame):
|
||||
"""Cancel the consumer task."""
|
||||
if self._consumer_task:
|
||||
self._queue.cancel()
|
||||
await self.cancel_task(self._consumer_task)
|
||||
|
||||
async def _consumer_task_handler(self):
|
||||
|
||||
@@ -651,6 +651,7 @@ class FrameProcessor(BaseObject):
|
||||
async def __cancel_input_task(self):
|
||||
"""Cancel the input processing task."""
|
||||
if self.__input_frame_task:
|
||||
self.__input_queue.cancel()
|
||||
await self.cancel_task(self.__input_frame_task)
|
||||
self.__input_frame_task = None
|
||||
|
||||
@@ -686,6 +687,7 @@ class FrameProcessor(BaseObject):
|
||||
async def __cancel_push_task(self):
|
||||
"""Cancel the frame pushing task."""
|
||||
if self.__push_frame_task:
|
||||
self.__push_queue.cancel()
|
||||
await self.cancel_task(self.__push_frame_task)
|
||||
self.__push_frame_task = None
|
||||
|
||||
|
||||
@@ -1086,10 +1086,12 @@ class RTVIProcessor(FrameProcessor):
|
||||
async def _cancel_tasks(self):
|
||||
"""Cancel all running tasks."""
|
||||
if self._action_task:
|
||||
self._action_queue.cancel()
|
||||
await self.cancel_task(self._action_task)
|
||||
self._action_task = None
|
||||
|
||||
if self._message_task:
|
||||
self._message_queue.cancel()
|
||||
await self.cancel_task(self._message_task)
|
||||
self._message_task = None
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ from typing import Awaitable, Callable, List, Optional
|
||||
|
||||
from pipecat.frames.frames import Frame, StartFrame
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.utils.asyncio.watchdog_event import WatchdogEvent
|
||||
|
||||
|
||||
class IdleFrameProcessor(FrameProcessor):
|
||||
@@ -77,7 +78,7 @@ class IdleFrameProcessor(FrameProcessor):
|
||||
def _create_idle_task(self):
|
||||
"""Create and start the idle monitoring task."""
|
||||
if not self._idle_task:
|
||||
self._idle_event = asyncio.Event()
|
||||
self._idle_event = WatchdogEvent(self.task_manager)
|
||||
self._idle_task = self.create_task(self._idle_task_handler())
|
||||
|
||||
async def _idle_task_handler(self):
|
||||
|
||||
@@ -15,10 +15,12 @@ from pipecat.frames.frames import (
|
||||
CancelFrame,
|
||||
EndFrame,
|
||||
Frame,
|
||||
StartFrame,
|
||||
UserStartedSpeakingFrame,
|
||||
UserStoppedSpeakingFrame,
|
||||
)
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.utils.asyncio.watchdog_event import WatchdogEvent
|
||||
|
||||
|
||||
class UserIdleProcessor(FrameProcessor):
|
||||
@@ -74,7 +76,7 @@ class UserIdleProcessor(FrameProcessor):
|
||||
self._interrupted = False
|
||||
self._conversation_started = False
|
||||
self._idle_task = None
|
||||
self._idle_event = asyncio.Event()
|
||||
self._idle_event = None
|
||||
|
||||
def _wrap_callback(
|
||||
self,
|
||||
@@ -134,6 +136,9 @@ class UserIdleProcessor(FrameProcessor):
|
||||
"""
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, StartFrame):
|
||||
self._idle_event = WatchdogEvent(self.task_manager)
|
||||
|
||||
# Check for end frames before processing
|
||||
if isinstance(frame, (EndFrame, CancelFrame)):
|
||||
# Stop the idle task, if it exists
|
||||
|
||||
@@ -244,6 +244,7 @@ class TavusVideoService(AIService):
|
||||
async def _cancel_send_task(self):
|
||||
"""Cancel the audio sending task if it exists."""
|
||||
if self._send_task:
|
||||
self._queue.cancel()
|
||||
await self.cancel_task(self._send_task)
|
||||
self._send_task = None
|
||||
|
||||
|
||||
@@ -805,6 +805,7 @@ class AudioContextWordTTSService(WebsocketWordTTSService):
|
||||
|
||||
async def _stop_audio_context_task(self):
|
||||
if self._audio_context_task:
|
||||
self._contexts_queue.cancel()
|
||||
await self.cancel_task(self._audio_context_task)
|
||||
self._audio_context_task = None
|
||||
|
||||
|
||||
@@ -810,6 +810,7 @@ class BaseOutputTransport(FrameProcessor):
|
||||
async def _cancel_clock_task(self):
|
||||
"""Cancel and cleanup the clock processing task."""
|
||||
if self._clock_task:
|
||||
self._clock_queue.cancel()
|
||||
await self._transport.cancel_task(self._clock_task)
|
||||
self._clock_task = None
|
||||
|
||||
|
||||
@@ -295,6 +295,9 @@ class TaskManager(BaseTaskManager):
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.exception(f"{name}: unexpected exception while stopping task: {e}")
|
||||
except BaseException as e:
|
||||
logger.critical(f"{name}: fatal base exception while stopping task: {e}")
|
||||
raise
|
||||
|
||||
async def cancel_task(self, task: asyncio.Task, timeout: Optional[float] = None):
|
||||
"""Cancels the given asyncio Task and awaits its completion with an optional timeout.
|
||||
@@ -394,6 +397,10 @@ class TaskManager(BaseTaskManager):
|
||||
|
||||
while True:
|
||||
try:
|
||||
if task_data.task.done():
|
||||
logger.debug(f"{name}: task is already done, cancelling watchdog task.")
|
||||
break
|
||||
|
||||
start_time = time.time()
|
||||
await asyncio.wait_for(timer.wait(), timeout=watchdog_timeout)
|
||||
total_time = time.time() - start_time
|
||||
@@ -417,7 +424,22 @@ class TaskManager(BaseTaskManager):
|
||||
task_data = self._tasks[name]
|
||||
if task_data.watchdog_task:
|
||||
task_data.watchdog_task.cancel()
|
||||
# In Python 3.10, simply calling task.cancel() looks like is not enough.
|
||||
# Without this, some tasks appear that are never canceled.
|
||||
# Python 3.12 handles this more gracefully, but we keep this for compatibility
|
||||
# and to avoid "Task exception was never retrieved" warnings.
|
||||
self.get_event_loop().create_task(
|
||||
self._cleanup_watchdog(name, task_data.watchdog_task)
|
||||
)
|
||||
task_data.watchdog_task = None
|
||||
del self._tasks[name]
|
||||
except KeyError as e:
|
||||
logger.trace(f"{name}: unable to remove task data (already removed?): {e}")
|
||||
|
||||
async def _cleanup_watchdog(self, name: str, watchdog_task: asyncio.Task):
|
||||
try:
|
||||
await watchdog_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.warning(f"{name}: watchdog task raised exception: {e}")
|
||||
|
||||
@@ -60,3 +60,8 @@ class WatchdogEvent(asyncio.Event):
|
||||
return True
|
||||
except asyncio.TimeoutError:
|
||||
self._manager.task_reset_watchdog()
|
||||
|
||||
def clear(self):
|
||||
if self._manager.task_watchdog_enabled:
|
||||
self._manager.task_reset_watchdog()
|
||||
super().clear()
|
||||
|
||||
@@ -12,10 +12,19 @@ timeouts during legitimate queue operations.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from dataclasses import dataclass
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.utils.asyncio.task_manager import BaseTaskManager
|
||||
|
||||
|
||||
@dataclass
|
||||
class WatchdogPriorityCancelSentinel:
|
||||
def __lt__(self, other):
|
||||
return True
|
||||
|
||||
|
||||
class WatchdogPriorityQueue(asyncio.PriorityQueue):
|
||||
"""Watchdog-enabled asyncio PriorityQueue.
|
||||
|
||||
@@ -49,9 +58,17 @@ class WatchdogPriorityQueue(asyncio.PriorityQueue):
|
||||
The next item from the priority queue.
|
||||
"""
|
||||
if self._manager.task_watchdog_enabled:
|
||||
return await self._watchdog_get()
|
||||
get_result = await self._watchdog_get()
|
||||
else:
|
||||
return await super().get()
|
||||
get_result = await super().get()
|
||||
|
||||
if isinstance(get_result, WatchdogPriorityCancelSentinel):
|
||||
logger.debug(
|
||||
"Received WatchdogPriorityCancelSentinel, throwing CancelledError to force cancelling"
|
||||
)
|
||||
raise asyncio.CancelledError("Cancelling watchdog queue get() call.")
|
||||
else:
|
||||
return get_result
|
||||
|
||||
def task_done(self):
|
||||
"""Mark a task as done and reset watchdog if enabled.
|
||||
@@ -62,6 +79,20 @@ class WatchdogPriorityQueue(asyncio.PriorityQueue):
|
||||
self._manager.task_reset_watchdog()
|
||||
super().task_done()
|
||||
|
||||
def cancel(self):
|
||||
"""Ensures reliable task cancellation by preventing a common race condition.
|
||||
|
||||
The race condition occurs in Python 3.10+ when:
|
||||
1. A value is put in the queue just before task cancellation
|
||||
2. queue.get() completes before the cancellation signal is delivered
|
||||
3. The task misses the CancelledError and continues running indefinitely
|
||||
|
||||
This method prevents the issue by injecting a special sentinel value that
|
||||
forces the task to raise CancelledError when consumed, ensuring proper
|
||||
task termination.
|
||||
"""
|
||||
super().put_nowait(WatchdogPriorityCancelSentinel())
|
||||
|
||||
async def _watchdog_get(self):
|
||||
"""Get item from queue while periodically resetting watchdog timer."""
|
||||
while True:
|
||||
|
||||
@@ -12,10 +12,18 @@ timeouts during legitimate queue operations.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
from dataclasses import dataclass
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.utils.asyncio.task_manager import BaseTaskManager
|
||||
|
||||
|
||||
@dataclass
|
||||
class WatchdogQueueCancelSentinel:
|
||||
pass
|
||||
|
||||
|
||||
class WatchdogQueue(asyncio.Queue):
|
||||
"""Watchdog-enabled asyncio Queue.
|
||||
|
||||
@@ -49,9 +57,17 @@ class WatchdogQueue(asyncio.Queue):
|
||||
The next item from the queue.
|
||||
"""
|
||||
if self._manager.task_watchdog_enabled:
|
||||
return await self._watchdog_get()
|
||||
get_result = await self._watchdog_get()
|
||||
else:
|
||||
return await super().get()
|
||||
get_result = await super().get()
|
||||
|
||||
if isinstance(get_result, WatchdogQueueCancelSentinel):
|
||||
logger.debug(
|
||||
"Received WatchdogQueueCancelFrame, throwing CancelledError to force cancelling"
|
||||
)
|
||||
raise asyncio.CancelledError("Cancelling watchdog queue get() call.")
|
||||
else:
|
||||
return get_result
|
||||
|
||||
def task_done(self):
|
||||
"""Mark a task as done and reset watchdog if enabled.
|
||||
@@ -62,6 +78,20 @@ class WatchdogQueue(asyncio.Queue):
|
||||
self._manager.task_reset_watchdog()
|
||||
super().task_done()
|
||||
|
||||
def cancel(self):
|
||||
"""Ensures reliable task cancellation by preventing a common race condition.
|
||||
|
||||
The race condition occurs in Python 3.10+ when:
|
||||
1. A value is put in the queue just before task cancellation
|
||||
2. queue.get() completes before the cancellation signal is delivered
|
||||
3. The task misses the CancelledError and continues running indefinitely
|
||||
|
||||
This method prevents the issue by injecting a special sentinel value that
|
||||
forces the task to raise CancelledError when consumed, ensuring proper
|
||||
task termination.
|
||||
"""
|
||||
super().put_nowait(WatchdogQueueCancelSentinel())
|
||||
|
||||
async def _watchdog_get(self):
|
||||
"""Get item from queue while periodically resetting watchdog timer."""
|
||||
while True:
|
||||
|
||||
Reference in New Issue
Block a user