From b3bb6fdaa5b5f4c836a3eea766e33323918b2a90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleix=20Conchillo=20Flaqu=C3=A9?= Date: Thu, 16 Apr 2026 09:16:53 -0700 Subject: [PATCH] Modernize Python typing across the codebase Automated via ruff UP006, UP007, UP035, UP045 rules (target: py311): - Replace `typing.List`, `Dict`, `Tuple`, `Set`, `FrozenSet`, `Type` with their built-in equivalents (`list`, `dict`, `tuple`, etc.) - Replace `typing.Optional[X]` with `X | None` - Replace `typing.Union[X, Y]` with `X | Y` - Move `Mapping`, `Sequence`, `Callable`, `Awaitable`, `MutableMapping`, `MutableSequence`, `Iterator`, `AsyncIterator`, `AsyncGenerator` imports from `typing` to `collections.abc` - Remove now-unused `typing` imports - Add `from __future__ import annotations` to 5 files that use forward-reference strings in `X | "Y"` annotations --- .../persistent-context-anthropic.py | 2 +- .../persistent-context-aws-nova-sonic.py | 2 +- .../persistent-context-gemini.py | 2 +- .../persistent-context-grok-realtime.py | 2 +- .../persistent-context-openai-realtime.py | 2 +- ...ersistent-context-openai-responses-http.py | 2 +- .../persistent-context-openai-responses.py | 2 +- .../persistent-context-openai.py | 2 +- examples/rag/rag-gemini.py | 2 +- .../transports/transports-small-webrtc.py | 3 +- .../turn-management-user-assistant-turns.py | 4 +- scripts/evals/eval.py | 16 +- scripts/evals/run-release-evals.py | 4 +- scripts/evals/utils.py | 2 +- scripts/krisp/audio_file_utils.py | 3 +- src/pipecat/adapters/base_llm_adapter.py | 24 +-- .../adapters/schemas/direct_function.py | 21 +-- .../adapters/schemas/function_schema.py | 10 +- src/pipecat/adapters/schemas/tools_schema.py | 12 +- .../adapters/services/anthropic_adapter.py | 22 +-- .../services/aws_nova_sonic_adapter.py | 22 +-- .../adapters/services/bedrock_adapter.py | 24 +-- .../adapters/services/gemini_adapter.py | 36 ++-- .../services/grok_realtime_adapter.py | 22 +-- .../services/inworld_realtime_adapter.py | 22 +-- .../adapters/services/open_ai_adapter.py | 16 +- .../services/open_ai_realtime_adapter.py | 22 +-- .../services/open_ai_responses_adapter.py | 18 +- .../adapters/services/perplexity_adapter.py | 7 +- src/pipecat/audio/dtmf/types.py | 4 +- src/pipecat/audio/dtmf/utils.py | 5 +- src/pipecat/audio/filters/aic_filter.py | 45 +++-- src/pipecat/audio/filters/koala_filter.py | 2 +- src/pipecat/audio/mixers/soundfile_mixer.py | 5 +- src/pipecat/audio/turn/base_turn_analyzer.py | 5 +- src/pipecat/audio/turn/krisp_viva_turn.py | 17 +- .../audio/turn/smart_turn/base_smart_turn.py | 12 +- .../audio/turn/smart_turn/http_smart_turn.py | 10 +- .../smart_turn/local_coreml_smart_turn.py | 4 +- .../turn/smart_turn/local_smart_turn_v2.py | 4 +- .../turn/smart_turn/local_smart_turn_v3.py | 8 +- src/pipecat/audio/vad/aic_vad.py | 19 +- src/pipecat/audio/vad/krisp_viva_vad.py | 7 +- src/pipecat/audio/vad/silero.py | 3 +- src/pipecat/audio/vad/vad_analyzer.py | 3 +- src/pipecat/audio/vad/vad_controller.py | 7 +- src/pipecat/extensions/ivr/ivr_navigator.py | 11 +- .../voicemail/voicemail_detector.py | 17 +- src/pipecat/frames/frames.py | 174 +++++++++--------- src/pipecat/metrics/metrics.py | 10 +- src/pipecat/observers/base_observer.py | 3 +- .../observers/loggers/debug_log_observer.py | 9 +- .../observers/loggers/metrics_log_observer.py | 4 +- .../observers/startup_timing_observer.py | 21 +-- .../observers/user_bot_latency_observer.py | 33 ++-- src/pipecat/pipeline/base_task.py | 2 +- src/pipecat/pipeline/llm_switcher.py | 16 +- src/pipecat/pipeline/parallel_pipeline.py | 7 +- src/pipecat/pipeline/pipeline.py | 12 +- src/pipecat/pipeline/runner.py | 5 +- src/pipecat/pipeline/service_switcher.py | 26 +-- .../pipeline/sync_parallel_pipeline.py | 5 +- src/pipecat/pipeline/task.py | 77 ++++---- src/pipecat/pipeline/task_observer.py | 8 +- .../processors/aggregators/dtmf_aggregator.py | 5 +- src/pipecat/processors/aggregators/gated.py | 4 +- .../processors/aggregators/llm_context.py | 31 ++-- .../aggregators/llm_context_summarizer.py | 14 +- .../aggregators/llm_response_universal.py | 51 ++--- .../aggregators/llm_text_processor.py | 6 +- src/pipecat/processors/async_generator.py | 3 +- .../audio/audio_buffer_processor.py | 6 +- src/pipecat/processors/audio/vad_processor.py | 4 +- src/pipecat/processors/consumer_processor.py | 4 +- .../processors/filters/frame_filter.py | 4 +- .../processors/filters/function_filter.py | 4 +- .../processors/filters/wake_check_filter.py | 3 +- .../filters/wake_notifier_filter.py | 4 +- src/pipecat/processors/frame_processor.py | 70 ++++--- .../processors/frameworks/langchain.py | 6 +- .../processors/frameworks/rtvi/frames.py | 8 +- .../processors/frameworks/rtvi/models.py | 34 ++-- .../processors/frameworks/rtvi/observer.py | 39 ++-- .../processors/frameworks/rtvi/processor.py | 9 +- .../processors/frameworks/strands_agents.py | 8 +- .../processors/gstreamer/pipeline_source.py | 5 +- .../processors/idle_frame_processor.py | 6 +- src/pipecat/processors/logger.py | 6 +- .../metrics/frame_processor_metrics.py | 11 +- src/pipecat/processors/metrics/sentry.py | 9 +- src/pipecat/processors/producer_processor.py | 4 +- src/pipecat/processors/text_transformer.py | 4 +- src/pipecat/runner/daily.py | 17 +- src/pipecat/runner/livekit.py | 3 +- src/pipecat/runner/run.py | 18 +- src/pipecat/runner/types.py | 16 +- src/pipecat/runner/utils.py | 11 +- src/pipecat/serializers/base_serializer.py | 3 +- src/pipecat/serializers/exotel.py | 5 +- src/pipecat/serializers/genesys.py | 90 ++++----- src/pipecat/serializers/plivo.py | 11 +- src/pipecat/serializers/protobuf.py | 3 +- src/pipecat/serializers/telnyx.py | 9 +- src/pipecat/serializers/twilio.py | 15 +- src/pipecat/serializers/vonage.py | 5 +- src/pipecat/services/ai_service.py | 7 +- src/pipecat/services/anthropic/llm.py | 34 ++-- src/pipecat/services/assemblyai/models.py | 28 +-- src/pipecat/services/assemblyai/stt.py | 21 ++- src/pipecat/services/asyncai/tts.py | 39 ++-- src/pipecat/services/aws/agent_core.py | 22 +-- src/pipecat/services/aws/llm.py | 52 +++--- src/pipecat/services/aws/nova_sonic/llm.py | 69 +++---- .../services/aws/sagemaker/bidi_client.py | 13 +- src/pipecat/services/aws/stt.py | 21 ++- src/pipecat/services/aws/tts.py | 36 ++-- src/pipecat/services/aws/utils.py | 11 +- src/pipecat/services/azure/common.py | 4 +- src/pipecat/services/azure/image.py | 8 +- src/pipecat/services/azure/llm.py | 5 +- src/pipecat/services/azure/stt.py | 21 ++- src/pipecat/services/azure/tts.py | 50 +++-- src/pipecat/services/camb/tts.py | 25 +-- src/pipecat/services/cartesia/stt.py | 13 +- src/pipecat/services/cartesia/tts.py | 63 +++---- src/pipecat/services/cerebras/llm.py | 5 +- src/pipecat/services/deepgram/flux/base.py | 32 ++-- .../services/deepgram/flux/sagemaker/stt.py | 14 +- src/pipecat/services/deepgram/flux/stt.py | 24 +-- .../services/deepgram/sagemaker/stt.py | 21 ++- .../services/deepgram/sagemaker/tts.py | 15 +- src/pipecat/services/deepgram/stt.py | 77 ++++---- src/pipecat/services/deepgram/tts.py | 17 +- src/pipecat/services/deepseek/llm.py | 5 +- src/pipecat/services/elevenlabs/stt.py | 47 ++--- src/pipecat/services/elevenlabs/tts.py | 105 +++++------ src/pipecat/services/fal/image.py | 21 ++- src/pipecat/services/fal/stt.py | 22 +-- src/pipecat/services/fireworks/llm.py | 5 +- src/pipecat/services/fish/tts.py | 25 +-- src/pipecat/services/gladia/config.py | 90 ++++----- src/pipecat/services/gladia/stt.py | 19 +- src/pipecat/services/google/frames.py | 15 +- .../services/google/gemini_live/file_api.py | 12 +- .../services/google/gemini_live/llm.py | 86 +++++---- .../services/google/gemini_live/vertex/llm.py | 21 +-- src/pipecat/services/google/image.py | 11 +- src/pipecat/services/google/llm.py | 51 +++-- src/pipecat/services/google/rtvi.py | 10 +- src/pipecat/services/google/stt.py | 79 ++++---- src/pipecat/services/google/tts.py | 95 +++++----- src/pipecat/services/google/utils.py | 4 +- src/pipecat/services/google/vertex/llm.py | 23 ++- src/pipecat/services/gradium/stt.py | 23 +-- src/pipecat/services/gradium/tts.py | 17 +- src/pipecat/services/groq/llm.py | 5 +- src/pipecat/services/groq/stt.py | 15 +- src/pipecat/services/groq/tts.py | 16 +- .../services/heygen/api_interactive_avatar.py | 46 ++--- src/pipecat/services/heygen/api_liveavatar.py | 24 +-- src/pipecat/services/heygen/client.py | 12 +- src/pipecat/services/heygen/video.py | 13 +- src/pipecat/services/hume/tts.py | 19 +- src/pipecat/services/image_service.py | 4 +- .../services/inworld/realtime/events.py | 116 ++++++------ src/pipecat/services/inworld/realtime/llm.py | 25 +-- src/pipecat/services/inworld/tts.py | 80 ++++---- src/pipecat/services/kokoro/tts.py | 12 +- src/pipecat/services/llm_service.py | 69 ++++--- src/pipecat/services/lmnt/tts.py | 17 +- src/pipecat/services/mcp_service.py | 15 +- src/pipecat/services/mem0/memory.py | 22 +-- src/pipecat/services/minimax/tts.py | 33 ++-- src/pipecat/services/mistral/llm.py | 10 +- src/pipecat/services/mistral/stt.py | 21 ++- src/pipecat/services/mistral/tts.py | 8 +- src/pipecat/services/moondream/vision.py | 8 +- src/pipecat/services/nebius/llm.py | 3 +- src/pipecat/services/neuphonic/tts.py | 43 ++--- src/pipecat/services/novita/llm.py | 3 +- src/pipecat/services/nvidia/llm.py | 5 +- src/pipecat/services/nvidia/stt.py | 35 ++-- src/pipecat/services/nvidia/tts.py | 31 ++-- src/pipecat/services/ollama/llm.py | 5 +- src/pipecat/services/openai/base_llm.py | 49 +++-- src/pipecat/services/openai/image.py | 14 +- src/pipecat/services/openai/llm.py | 10 +- .../services/openai/realtime/events.py | 166 ++++++++--------- src/pipecat/services/openai/realtime/llm.py | 21 ++- src/pipecat/services/openai/responses/llm.py | 43 ++--- src/pipecat/services/openai/stt.py | 35 ++-- src/pipecat/services/openai/tts.py | 27 +-- src/pipecat/services/openrouter/llm.py | 10 +- src/pipecat/services/perplexity/llm.py | 5 +- src/pipecat/services/piper/tts.py | 13 +- src/pipecat/services/qwen/llm.py | 5 +- src/pipecat/services/resembleai/tts.py | 14 +- src/pipecat/services/rime/tts.py | 95 +++++----- src/pipecat/services/sambanova/llm.py | 14 +- src/pipecat/services/sarvam/_sdk.py | 3 +- src/pipecat/services/sarvam/llm.py | 7 +- src/pipecat/services/sarvam/stt.py | 41 ++--- src/pipecat/services/sarvam/tts.py | 85 ++++----- src/pipecat/services/settings.py | 25 +-- src/pipecat/services/simli/video.py | 17 +- src/pipecat/services/smallest/stt.py | 17 +- src/pipecat/services/smallest/tts.py | 17 +- src/pipecat/services/soniox/stt.py | 47 ++--- src/pipecat/services/speechmatics/stt.py | 9 +- src/pipecat/services/speechmatics/tts.py | 10 +- src/pipecat/services/stt_service.py | 19 +- src/pipecat/services/tavus/video.py | 9 +- src/pipecat/services/together/llm.py | 5 +- src/pipecat/services/tts_service.py | 85 ++++----- src/pipecat/services/ultravox/llm.py | 48 ++--- src/pipecat/services/vision_service.py | 4 +- src/pipecat/services/websocket_service.py | 10 +- src/pipecat/services/whisper/base_stt.py | 26 +-- src/pipecat/services/whisper/stt.py | 33 ++-- src/pipecat/services/whisper/utils.py | 7 +- src/pipecat/services/xai/llm.py | 5 +- src/pipecat/services/xai/realtime/events.py | 94 +++++----- src/pipecat/services/xai/realtime/llm.py | 19 +- src/pipecat/services/xai/tts.py | 14 +- src/pipecat/services/xtts/tts.py | 15 +- src/pipecat/tests/utils.py | 12 +- src/pipecat/transcriptions/language.py | 16 +- src/pipecat/transports/base_input.py | 2 +- src/pipecat/transports/base_output.py | 19 +- src/pipecat/transports/base_transport.py | 22 +-- src/pipecat/transports/daily/transport.py | 153 ++++++++------- src/pipecat/transports/daily/utils.py | 60 +++--- src/pipecat/transports/heygen/transport.py | 14 +- src/pipecat/transports/lemonslice/api.py | 20 +- .../transports/lemonslice/transport.py | 39 ++-- src/pipecat/transports/livekit/transport.py | 35 ++-- src/pipecat/transports/local/audio.py | 9 +- src/pipecat/transports/local/tk.py | 9 +- .../transports/smallwebrtc/connection.py | 16 +- .../transports/smallwebrtc/request_handler.py | 23 +-- .../transports/smallwebrtc/transport.py | 23 +-- src/pipecat/transports/tavus/transport.py | 21 +-- src/pipecat/transports/websocket/client.py | 16 +- src/pipecat/transports/websocket/fastapi.py | 12 +- src/pipecat/transports/websocket/server.py | 22 +-- src/pipecat/transports/whatsapp/api.py | 32 ++-- src/pipecat/transports/whatsapp/client.py | 20 +- src/pipecat/turns/user_idle_controller.py | 5 +- .../user_mute/base_user_mute_strategy.py | 4 +- .../function_call_user_mute_strategy.py | 4 +- .../base_user_turn_start_strategy.py | 5 +- .../wake_phrase_user_turn_start_strategy.py | 9 +- .../user_stop/base_user_turn_stop_strategy.py | 5 +- .../external_user_turn_stop_strategy.py | 5 +- .../speech_timeout_user_turn_stop_strategy.py | 5 +- .../turn_analyzer_user_turn_stop_strategy.py | 7 +- .../turns/user_turn_completion_mixin.py | 14 +- src/pipecat/turns/user_turn_controller.py | 13 +- src/pipecat/turns/user_turn_processor.py | 6 +- src/pipecat/turns/user_turn_strategies.py | 9 +- src/pipecat/utils/asyncio/task_manager.py | 12 +- src/pipecat/utils/base_object.py | 8 +- .../context/llm_context_summarization.py | 20 +- src/pipecat/utils/frame_queue.py | 5 +- src/pipecat/utils/string.py | 16 +- .../utils/text/base_text_aggregator.py | 8 +- src/pipecat/utils/text/base_text_filter.py | 3 +- .../utils/text/markdown_text_filter.py | 11 +- .../utils/text/pattern_pair_aggregator.py | 6 +- .../utils/text/simple_text_aggregator.py | 6 +- .../utils/text/skip_tags_aggregator.py | 4 +- src/pipecat/utils/time.py | 2 +- .../utils/tracing/service_attributes.py | 70 +++---- .../utils/tracing/service_decorators.py | 19 +- src/pipecat/utils/tracing/tracing_context.py | 18 +- .../utils/tracing/turn_trace_observer.py | 22 ++- tests/test_direct_functions.py | 12 +- tests/test_frame_processor.py | 21 +-- tests/test_pipeline.py | 12 +- tests/test_service_language.py | 7 +- tests/test_tts_frame_ordering.py | 8 +- tests/test_vad_controller.py | 7 +- tests/test_vad_processor.py | 3 +- 283 files changed, 2902 insertions(+), 3020 deletions(-) diff --git a/examples/persistent-context/persistent-context-anthropic.py b/examples/persistent-context/persistent-context-anthropic.py index c401becb2..791f00440 100644 --- a/examples/persistent-context/persistent-context-anthropic.py +++ b/examples/persistent-context/persistent-context-anthropic.py @@ -84,7 +84,7 @@ async def load_conversation(params: FunctionCallParams): filename = params.arguments["filename"] logger.debug(f"loading conversation from {filename}") try: - with open(filename, "r") as file: + with open(filename) as file: params.context.set_messages(json.load(file)) logger.debug( f"loaded conversation from {filename}\n{json.dumps(params.context.get_messages(), indent=4)}" diff --git a/examples/persistent-context/persistent-context-aws-nova-sonic.py b/examples/persistent-context/persistent-context-aws-nova-sonic.py index e4dc635bf..62b9e7eab 100644 --- a/examples/persistent-context/persistent-context-aws-nova-sonic.py +++ b/examples/persistent-context/persistent-context-aws-nova-sonic.py @@ -105,7 +105,7 @@ async def load_conversation(params: FunctionCallParams): filename = params.arguments["filename"] logger.debug(f"loading conversation from {filename}") try: - with open(filename, "r") as file: + with open(filename) as file: messages = json.load(file) # HACK: if using the older Nova Sonic (pre-2) model, you need a special way of # triggering the first assistant response. The call to trigger_assistant_response(), diff --git a/examples/persistent-context/persistent-context-gemini.py b/examples/persistent-context/persistent-context-gemini.py index d236a15b1..2ec3cd0bb 100644 --- a/examples/persistent-context/persistent-context-gemini.py +++ b/examples/persistent-context/persistent-context-gemini.py @@ -110,7 +110,7 @@ async def load_conversation(params: FunctionCallParams): filename = params.arguments["filename"] logger.debug(f"loading conversation from {filename}") try: - with open(filename, "r") as file: + with open(filename) as file: params.context.set_messages(json.load(file)) await params.result_callback( { diff --git a/examples/persistent-context/persistent-context-grok-realtime.py b/examples/persistent-context/persistent-context-grok-realtime.py index 9942550e6..4a990d24e 100644 --- a/examples/persistent-context/persistent-context-grok-realtime.py +++ b/examples/persistent-context/persistent-context-grok-realtime.py @@ -94,7 +94,7 @@ async def load_conversation(params: FunctionCallParams): filename = params.arguments["filename"] logger.debug(f"loading conversation from {filename}") try: - with open(filename, "r") as file: + with open(filename) as file: params.context.set_messages(json.load(file)) await params.llm.reset_conversation() # Manually create a response since we've reset the conversation diff --git a/examples/persistent-context/persistent-context-openai-realtime.py b/examples/persistent-context/persistent-context-openai-realtime.py index ad27161bb..0a7a888a8 100644 --- a/examples/persistent-context/persistent-context-openai-realtime.py +++ b/examples/persistent-context/persistent-context-openai-realtime.py @@ -91,7 +91,7 @@ async def load_conversation(params: FunctionCallParams): filename = params.arguments["filename"] logger.debug(f"loading conversation from {filename}") try: - with open(filename, "r") as file: + with open(filename) as file: params.context.set_messages(json.load(file)) await params.llm.reset_conversation() # NOTE: we manually create a response here rather than relying diff --git a/examples/persistent-context/persistent-context-openai-responses-http.py b/examples/persistent-context/persistent-context-openai-responses-http.py index 0f13eda18..d77513049 100644 --- a/examples/persistent-context/persistent-context-openai-responses-http.py +++ b/examples/persistent-context/persistent-context-openai-responses-http.py @@ -85,7 +85,7 @@ async def load_conversation(params: FunctionCallParams): filename = params.arguments["filename"] logger.debug(f"loading conversation from {filename}") try: - with open(filename, "r") as file: + with open(filename) as file: params.context.set_messages(json.load(file)) logger.debug( f"loaded conversation from {filename}\n{json.dumps(params.context.get_messages(), indent=4)}" diff --git a/examples/persistent-context/persistent-context-openai-responses.py b/examples/persistent-context/persistent-context-openai-responses.py index 5fd9c7657..c89fe3ff1 100644 --- a/examples/persistent-context/persistent-context-openai-responses.py +++ b/examples/persistent-context/persistent-context-openai-responses.py @@ -85,7 +85,7 @@ async def load_conversation(params: FunctionCallParams): filename = params.arguments["filename"] logger.debug(f"loading conversation from {filename}") try: - with open(filename, "r") as file: + with open(filename) as file: params.context.set_messages(json.load(file)) logger.debug( f"loaded conversation from {filename}\n{json.dumps(params.context.get_messages(), indent=4)}" diff --git a/examples/persistent-context/persistent-context-openai.py b/examples/persistent-context/persistent-context-openai.py index 7f744fd46..22e160102 100644 --- a/examples/persistent-context/persistent-context-openai.py +++ b/examples/persistent-context/persistent-context-openai.py @@ -85,7 +85,7 @@ async def load_conversation(params: FunctionCallParams): filename = params.arguments["filename"] logger.debug(f"loading conversation from {filename}") try: - with open(filename, "r") as file: + with open(filename) as file: params.context.set_messages(json.load(file)) logger.debug( f"loaded conversation from {filename}\n{json.dumps(params.context.get_messages(), indent=4)}" diff --git a/examples/rag/rag-gemini.py b/examples/rag/rag-gemini.py index 5380f98d8..0f9a8ddb2 100644 --- a/examples/rag/rag-gemini.py +++ b/examples/rag/rag-gemini.py @@ -87,7 +87,7 @@ def get_rag_content(): """Get the RAG content from the file.""" script_dir = os.path.dirname(os.path.abspath(__file__)) rag_content_path = os.path.join(script_dir, "assets", "rag-content.txt") - with open(rag_content_path, "r") as f: + with open(rag_content_path) as f: return f.read() diff --git a/examples/transports/transports-small-webrtc.py b/examples/transports/transports-small-webrtc.py index 286b1143a..18403940b 100644 --- a/examples/transports/transports-small-webrtc.py +++ b/examples/transports/transports-small-webrtc.py @@ -8,7 +8,6 @@ import argparse import asyncio import os from contextlib import asynccontextmanager -from typing import Dict import uvicorn from dotenv import load_dotenv @@ -39,7 +38,7 @@ load_dotenv(override=True) app = FastAPI() # Store connections by pc_id -pcs_map: Dict[str, SmallWebRTCConnection] = {} +pcs_map: dict[str, SmallWebRTCConnection] = {} ice_servers = [ IceServer( diff --git a/examples/turn-management/turn-management-user-assistant-turns.py b/examples/turn-management/turn-management-user-assistant-turns.py index afc45423b..380c4866d 100644 --- a/examples/turn-management/turn-management-user-assistant-turns.py +++ b/examples/turn-management/turn-management-user-assistant-turns.py @@ -45,13 +45,13 @@ class TranscriptHandler: output_file: Optional path to file where transcript is saved. If None, outputs to log only. """ - def __init__(self, output_file: Optional[str] = None): + def __init__(self, output_file: str | None = None): """Initialize handler with optional file output. Args: output_file: Path to output file. If None, outputs to log only. """ - self.output_file: Optional[str] = output_file + self.output_file: str | None = output_file logger.debug( f"TranscriptHandler initialized {'with output_file=' + output_file if output_file else 'with log output only'}" ) diff --git a/scripts/evals/eval.py b/scripts/evals/eval.py index fc9997318..cf7ecf257 100644 --- a/scripts/evals/eval.py +++ b/scripts/evals/eval.py @@ -13,7 +13,7 @@ import wave from dataclasses import dataclass from datetime import datetime from pathlib import Path -from typing import Any, List, Optional, Tuple +from typing import Any import aiofiles from loguru import logger @@ -60,7 +60,7 @@ PIPELINE_IDLE_TIMEOUT_SECS = 60 EVAL_TIMEOUT_SECS = 120 EVAL_RESULT_TIMEOUT_SECS = 10 -EvalPrompt = str | Tuple[str, ImageFile] +EvalPrompt = str | tuple[str, ImageFile] @dataclass @@ -68,7 +68,7 @@ class EvalConfig: prompt: EvalPrompt eval: str eval_speaks_first: bool = False - runner_args_body: Optional[Any] = None + runner_args_body: Any | None = None class EvalRunner: @@ -78,7 +78,7 @@ class EvalRunner: examples_dir: Path, pattern: str = "", record_audio: bool = False, - name: Optional[str] = None, + name: str | None = None, log_level: str = "DEBUG", ): self._examples_dir = examples_dir @@ -86,8 +86,8 @@ class EvalRunner: self._record_audio = record_audio self._log_level = log_level self._total_success = 0 - self._tests: List[EvalResult] = [] - self._result_future: Optional[asyncio.Future[bool]] = None + self._tests: list[EvalResult] = [] + self._result_future: asyncio.Future[bool] | None = None # We to save runner files. name = name or f"{datetime.now().strftime('%Y%m%d_%H%M%S')}" @@ -150,7 +150,7 @@ class EvalRunner: try: # Wait for the future to resolve. result = await asyncio.wait_for(self._result_future, timeout=EVAL_RESULT_TIMEOUT_SECS) - except asyncio.TimeoutError: + except TimeoutError: logger.error(f"ERROR: Timeout waiting for eval result.") result = False @@ -282,7 +282,7 @@ async def run_eval_pipeline( # Load example prompt depending on image. example_prompt = "" - example_image: Optional[ImageFile] = None + example_image: ImageFile | None = None if isinstance(eval_config.prompt, str): example_prompt = eval_config.prompt elif isinstance(eval_config.prompt, tuple): diff --git a/scripts/evals/run-release-evals.py b/scripts/evals/run-release-evals.py index 3bf6a0dfd..ebf5b36c2 100644 --- a/scripts/evals/run-release-evals.py +++ b/scripts/evals/run-release-evals.py @@ -7,7 +7,7 @@ import argparse import asyncio import sys -from datetime import datetime, timezone +from datetime import UTC, datetime, timezone from pathlib import Path from dotenv import load_dotenv @@ -41,7 +41,7 @@ EVAL_WEATHER_AND_RESTAURANT = EvalConfig( EVAL_ONLINE_SEARCH = EvalConfig( prompt="What's the current date in UTC?", - eval=f"Current date in UTC is {datetime.now(timezone.utc).strftime('%A, %B %d, %Y')}.", + eval=f"Current date in UTC is {datetime.now(UTC).strftime('%A, %B %d, %Y')}.", ) EVAL_SWITCH_LANGUAGE = EvalConfig( diff --git a/scripts/evals/utils.py b/scripts/evals/utils.py index 8111d404f..7ce9dfe84 100644 --- a/scripts/evals/utils.py +++ b/scripts/evals/utils.py @@ -6,9 +6,9 @@ import importlib.util import os +from collections.abc import Sequence from dataclasses import dataclass from pathlib import Path -from typing import Sequence GREEN = "\033[92m" RED = "\033[91m" diff --git a/scripts/krisp/audio_file_utils.py b/scripts/krisp/audio_file_utils.py index 620ffe6d1..d7b412173 100644 --- a/scripts/krisp/audio_file_utils.py +++ b/scripts/krisp/audio_file_utils.py @@ -5,13 +5,12 @@ handling format detection and conversion to int16 PCM format. """ import sys -from typing import Tuple import numpy as np import soundfile as sf -def read_audio_file(input_path: str, verbose: bool = False) -> Tuple[np.ndarray, int]: +def read_audio_file(input_path: str, verbose: bool = False) -> tuple[np.ndarray, int]: """Read an audio file and convert to int16 mono format. This function: diff --git a/src/pipecat/adapters/base_llm_adapter.py b/src/pipecat/adapters/base_llm_adapter.py index 9c6747766..5d1063c54 100644 --- a/src/pipecat/adapters/base_llm_adapter.py +++ b/src/pipecat/adapters/base_llm_adapter.py @@ -12,7 +12,7 @@ adapters that handle tool format conversion and standardization. import warnings from abc import ABC, abstractmethod -from typing import Any, Dict, Generic, List, Optional, TypeVar +from typing import Any, Generic, TypeVar from loguru import logger @@ -50,10 +50,10 @@ class BaseLLMAdapter(ABC, Generic[TLLMInvocationParams]): def __init__(self): """Initialize the adapter.""" self._warned_system_instruction = False - self._builtin_tools: Dict[str, FunctionSchema] = {} + self._builtin_tools: dict[str, FunctionSchema] = {} @property - def builtin_tools(self) -> Dict[str, FunctionSchema]: + def builtin_tools(self) -> dict[str, FunctionSchema]: """Built-in tools automatically merged into every inference request. Keyed by tool name for O(1) lookup, insertion, and removal. The @@ -90,7 +90,7 @@ class BaseLLMAdapter(ABC, Generic[TLLMInvocationParams]): pass @abstractmethod - def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[Any]: + def to_provider_tools_format(self, tools_schema: ToolsSchema) -> list[Any]: """Convert tools schema to the provider's specific format. Args: @@ -102,7 +102,7 @@ class BaseLLMAdapter(ABC, Generic[TLLMInvocationParams]): pass @abstractmethod - def get_messages_for_logging(self, context: LLMContext) -> List[Dict[str, Any]]: + def get_messages_for_logging(self, context: LLMContext) -> list[dict[str, Any]]: """Get messages from a universal LLM context in a format ready for logging about this provider. Args: @@ -127,7 +127,7 @@ class BaseLLMAdapter(ABC, Generic[TLLMInvocationParams]): def get_messages( self, context: LLMContext, *, truncate_large_values: bool = False - ) -> List[LLMContextMessage]: + ) -> list[LLMContextMessage]: """Get messages from the LLM context, including standard and LLM-specific messages. Args: @@ -142,7 +142,7 @@ class BaseLLMAdapter(ABC, Generic[TLLMInvocationParams]): self.id_for_llm_specific_messages, truncate_large_values=truncate_large_values ) - def from_standard_tools(self, tools: Any) -> List[Any] | NotGiven: + def from_standard_tools(self, tools: Any) -> list[Any] | NotGiven: """Convert tools from standard format to provider format. Built-in tools are automatically merged into the schema before conversion so that every @@ -188,8 +188,8 @@ class BaseLLMAdapter(ABC, Generic[TLLMInvocationParams]): self, messages: list, *, - system_instruction: Optional[str] = None, - ) -> Optional[str]: + system_instruction: str | None = None, + ) -> str | None: """Extract an initial ``"system"`` message for use as a system instruction. Only useful for services that expect the system instruction as a @@ -247,11 +247,11 @@ class BaseLLMAdapter(ABC, Generic[TLLMInvocationParams]): def _resolve_system_instruction( self, - system_from_context: Optional[str], - system_instruction: Optional[str], + system_from_context: str | None, + system_instruction: str | None, *, discard_context_system: bool, - ) -> Optional[str]: + ) -> str | None: """Resolve conflict between ``system_instruction`` and an extracted context system message. Args: diff --git a/src/pipecat/adapters/schemas/direct_function.py b/src/pipecat/adapters/schemas/direct_function.py index 62eb9f76d..47afaa897 100644 --- a/src/pipecat/adapters/schemas/direct_function.py +++ b/src/pipecat/adapters/schemas/direct_function.py @@ -15,16 +15,11 @@ formats). import inspect import types +from collections.abc import Callable, Mapping from typing import ( TYPE_CHECKING, Any, - Callable, - Dict, - List, - Mapping, Protocol, - Set, - Tuple, Union, get_args, get_origin, @@ -144,8 +139,8 @@ class BaseDirectFunctionWrapper: # TODO: maybe to better support things like enums, check if each type is a pydantic type and use its convert-to-jsonschema function def _get_parameters_as_jsonschema( - self, func: Callable, docstring_params: List[docstring_parser.DocstringParam] - ) -> Tuple[Dict[str, Any], List[str]]: + self, func: Callable, docstring_params: list[docstring_parser.DocstringParam] + ) -> tuple[dict[str, Any], list[str]]: """Get function parameters as a dictionary of JSON schemas and a list of required parameters. Ignore the first parameter, as it's expected to be the "special" one. @@ -193,7 +188,7 @@ class BaseDirectFunctionWrapper: return properties, required - def _typehint_to_jsonschema(self, type_hint: Any) -> Dict[str, Any]: + def _typehint_to_jsonschema(self, type_hint: Any) -> dict[str, Any]: """Convert a Python type hint to a JSON Schema. Args: @@ -216,9 +211,9 @@ class BaseDirectFunctionWrapper: return {"type": "number"} elif type_hint is bool: return {"type": "boolean"} - elif type_hint is dict or type_hint is Dict: + elif type_hint is dict or type_hint is dict: return {"type": "object"} - elif type_hint is list or type_hint is List: + elif type_hint is list or type_hint is list: return {"type": "array"} # Get origin and arguments for complex types @@ -230,11 +225,11 @@ class BaseDirectFunctionWrapper: return {"anyOf": [self._typehint_to_jsonschema(arg) for arg in args]} # Handle List, Tuple, Set with specific item types - if origin in (list, List, tuple, Tuple, set, Set) and args: + if origin in (list, list, tuple, tuple, set, set) and args: return {"type": "array", "items": self._typehint_to_jsonschema(args[0])} # Handle Dict with specific key/value types - if origin in (dict, Dict) and len(args) == 2: + if origin in (dict, dict) and len(args) == 2: # For JSON Schema, keys must be strings return {"type": "object", "additionalProperties": self._typehint_to_jsonschema(args[1])} diff --git a/src/pipecat/adapters/schemas/function_schema.py b/src/pipecat/adapters/schemas/function_schema.py index 5fba8dd57..046ac04f9 100644 --- a/src/pipecat/adapters/schemas/function_schema.py +++ b/src/pipecat/adapters/schemas/function_schema.py @@ -11,7 +11,7 @@ tools and functions used with AI models, ensuring consistent formatting across different AI service providers. """ -from typing import Any, Dict, List +from typing import Any class FunctionSchema: @@ -23,7 +23,7 @@ class FunctionSchema: """ def __init__( - self, name: str, description: str, properties: Dict[str, Any], required: List[str] + self, name: str, description: str, properties: dict[str, Any], required: list[str] ) -> None: """Initialize the function schema. @@ -38,7 +38,7 @@ class FunctionSchema: self._properties = properties self._required = required - def to_default_dict(self) -> Dict[str, Any]: + def to_default_dict(self) -> dict[str, Any]: """Converts the function schema to a dictionary. Returns: @@ -73,7 +73,7 @@ class FunctionSchema: return self._description @property - def properties(self) -> Dict[str, Any]: + def properties(self) -> dict[str, Any]: """Get the function properties. Returns: @@ -82,7 +82,7 @@ class FunctionSchema: return self._properties @property - def required(self) -> List[str]: + def required(self) -> list[str]: """Get the required parameters. Returns: diff --git a/src/pipecat/adapters/schemas/tools_schema.py b/src/pipecat/adapters/schemas/tools_schema.py index 1c1ba0dd3..28c2b9b88 100644 --- a/src/pipecat/adapters/schemas/tools_schema.py +++ b/src/pipecat/adapters/schemas/tools_schema.py @@ -11,7 +11,7 @@ and custom adapter-specific tools in the Pipecat framework. """ from enum import Enum -from typing import Any, Dict, List, Optional +from typing import Any from pipecat.adapters.schemas.direct_function import DirectFunction, DirectFunctionWrapper from pipecat.adapters.schemas.function_schema import FunctionSchema @@ -39,8 +39,8 @@ class ToolsSchema: def __init__( self, - standard_tools: List[FunctionSchema | DirectFunction], - custom_tools: Optional[Dict[AdapterType, List[Dict[str, Any]]]] = None, + standard_tools: list[FunctionSchema | DirectFunction], + custom_tools: dict[AdapterType, list[dict[str, Any]]] | None = None, ) -> None: """Initialize the tools schema. @@ -66,7 +66,7 @@ class ToolsSchema: self._custom_tools = custom_tools @property - def standard_tools(self) -> List[FunctionSchema]: + def standard_tools(self) -> list[FunctionSchema]: """Get the list of standard function schema tools. Returns: @@ -75,7 +75,7 @@ class ToolsSchema: return self._standard_tools @property - def custom_tools(self) -> Dict[AdapterType, List[Dict[str, Any]]]: + def custom_tools(self) -> dict[AdapterType, list[dict[str, Any]]]: """Get the custom tools dictionary. Returns: @@ -84,7 +84,7 @@ class ToolsSchema: return self._custom_tools @custom_tools.setter - def custom_tools(self, value: Dict[AdapterType, List[Dict[str, Any]]]) -> None: + def custom_tools(self, value: dict[AdapterType, list[dict[str, Any]]]) -> None: """Set the custom tools dictionary. Args: diff --git a/src/pipecat/adapters/services/anthropic_adapter.py b/src/pipecat/adapters/services/anthropic_adapter.py index 9617dadeb..067d1eb22 100644 --- a/src/pipecat/adapters/services/anthropic_adapter.py +++ b/src/pipecat/adapters/services/anthropic_adapter.py @@ -9,7 +9,7 @@ import copy import json from dataclasses import dataclass -from typing import Any, Dict, List, Optional, TypedDict +from typing import Any, TypedDict from anthropic import NOT_GIVEN, NotGiven from anthropic.types.message_param import MessageParam @@ -31,8 +31,8 @@ class AnthropicLLMInvocationParams(TypedDict): """Context-based parameters for invoking Anthropic's LLM API.""" system: str | NotGiven - messages: List[MessageParam] - tools: List[ToolUnionParam] + messages: list[MessageParam] + tools: list[ToolUnionParam] class AnthropicLLMAdapter(BaseLLMAdapter[AnthropicLLMInvocationParams]): @@ -51,7 +51,7 @@ class AnthropicLLMAdapter(BaseLLMAdapter[AnthropicLLMInvocationParams]): self, context: LLMContext, enable_prompt_caching: bool, - system_instruction: Optional[str] = None, + system_instruction: str | None = None, ) -> AnthropicLLMInvocationParams: """Get Anthropic-specific LLM invocation parameters from a universal LLM context. @@ -83,7 +83,7 @@ class AnthropicLLMAdapter(BaseLLMAdapter[AnthropicLLMInvocationParams]): "tools": self.from_standard_tools(context.tools) or [], } - def get_messages_for_logging(self, context: LLMContext) -> List[Dict[str, Any]]: + def get_messages_for_logging(self, context: LLMContext) -> list[dict[str, Any]]: """Get messages from a universal LLM context in a format ready for logging about Anthropic. Removes or truncates sensitive data like image content for safe logging. @@ -115,14 +115,14 @@ class AnthropicLLMAdapter(BaseLLMAdapter[AnthropicLLMInvocationParams]): class ConvertedMessages: """Container for Anthropic-formatted messages converted from universal context.""" - messages: List[MessageParam] + messages: list[MessageParam] system: str | NotGiven def _from_universal_context_messages( self, - universal_context_messages: List[LLMContextMessage], + universal_context_messages: list[LLMContextMessage], *, - system_instruction: Optional[str] = None, + system_instruction: str | None = None, ) -> ConvertedMessages: system = NOT_GIVEN @@ -333,7 +333,7 @@ class AnthropicLLMAdapter(BaseLLMAdapter[AnthropicLLMInvocationParams]): return message - def _with_cache_control_markers(self, messages: List[MessageParam]) -> List[MessageParam]: + def _with_cache_control_markers(self, messages: list[MessageParam]) -> list[MessageParam]: """Add cache control markers to messages for prompt caching. Args: @@ -381,7 +381,7 @@ class AnthropicLLMAdapter(BaseLLMAdapter[AnthropicLLMInvocationParams]): return messages_with_markers @staticmethod - def _to_anthropic_function_format(function: FunctionSchema) -> Dict[str, Any]: + def _to_anthropic_function_format(function: FunctionSchema) -> dict[str, Any]: """Convert a single function schema to Anthropic's format. Args: @@ -400,7 +400,7 @@ class AnthropicLLMAdapter(BaseLLMAdapter[AnthropicLLMInvocationParams]): }, } - def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[Dict[str, Any]]: + def to_provider_tools_format(self, tools_schema: ToolsSchema) -> list[dict[str, Any]]: """Convert function schemas to Anthropic's function-calling format. Args: diff --git a/src/pipecat/adapters/services/aws_nova_sonic_adapter.py b/src/pipecat/adapters/services/aws_nova_sonic_adapter.py index d0dca53c0..e38fe901b 100644 --- a/src/pipecat/adapters/services/aws_nova_sonic_adapter.py +++ b/src/pipecat/adapters/services/aws_nova_sonic_adapter.py @@ -10,7 +10,7 @@ import copy import json from dataclasses import dataclass from enum import Enum -from typing import Any, Dict, List, Optional, TypedDict +from typing import Any, TypedDict from loguru import logger @@ -55,9 +55,9 @@ class AWSNovaSonicLLMInvocationParams(TypedDict): This is a placeholder until support for universal LLMContext machinery is added for AWS Nova Sonic. """ - system_instruction: Optional[str] - messages: List[AWSNovaSonicConversationHistoryMessage] - tools: List[Dict[str, Any]] + system_instruction: str | None + messages: list[AWSNovaSonicConversationHistoryMessage] + tools: list[dict[str, Any]] class AWSNovaSonicLLMAdapter(BaseLLMAdapter[AWSNovaSonicLLMInvocationParams]): @@ -73,7 +73,7 @@ class AWSNovaSonicLLMAdapter(BaseLLMAdapter[AWSNovaSonicLLMInvocationParams]): return "aws-nova-sonic" def get_llm_invocation_params( - self, context: LLMContext, *, system_instruction: Optional[str] = None + self, context: LLMContext, *, system_instruction: str | None = None ) -> AWSNovaSonicLLMInvocationParams: """Get AWS Nova Sonic-specific LLM invocation parameters from a universal LLM context. @@ -97,7 +97,7 @@ class AWSNovaSonicLLMAdapter(BaseLLMAdapter[AWSNovaSonicLLMInvocationParams]): "tools": self.from_standard_tools(context.tools) or [], } - def get_messages_for_logging(self, context) -> List[Dict[str, Any]]: + def get_messages_for_logging(self, context) -> list[dict[str, Any]]: """Get messages from a universal LLM context in a format ready for logging about AWS Nova Sonic. Removes or truncates sensitive data like image content for safe logging. @@ -116,11 +116,11 @@ class AWSNovaSonicLLMAdapter(BaseLLMAdapter[AWSNovaSonicLLMInvocationParams]): class ConvertedMessages: """Container for Google-formatted messages converted from universal context.""" - messages: List[AWSNovaSonicConversationHistoryMessage] - system_instruction: Optional[str] = None + messages: list[AWSNovaSonicConversationHistoryMessage] + system_instruction: str | None = None def _from_universal_context_messages( - self, universal_context_messages: List[LLMContextMessage] + self, universal_context_messages: list[LLMContextMessage] ) -> ConvertedMessages: system_instruction = None messages = [] @@ -187,7 +187,7 @@ class AWSNovaSonicLLMAdapter(BaseLLMAdapter[AWSNovaSonicLLMInvocationParams]): # Sonic conversation history @staticmethod - def _to_aws_nova_sonic_function_format(function: FunctionSchema) -> Dict[str, Any]: + def _to_aws_nova_sonic_function_format(function: FunctionSchema) -> dict[str, Any]: """Convert a function schema to AWS Nova Sonic format. Args: @@ -212,7 +212,7 @@ class AWSNovaSonicLLMAdapter(BaseLLMAdapter[AWSNovaSonicLLMInvocationParams]): } } - def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[Dict[str, Any]]: + def to_provider_tools_format(self, tools_schema: ToolsSchema) -> list[dict[str, Any]]: """Convert tools schema to AWS Nova Sonic function-calling format. Args: diff --git a/src/pipecat/adapters/services/bedrock_adapter.py b/src/pipecat/adapters/services/bedrock_adapter.py index 3150e6458..bb1223880 100644 --- a/src/pipecat/adapters/services/bedrock_adapter.py +++ b/src/pipecat/adapters/services/bedrock_adapter.py @@ -10,7 +10,7 @@ import base64 import copy import json from dataclasses import dataclass -from typing import Any, Dict, List, Optional, TypedDict +from typing import Any, TypedDict from loguru import logger @@ -29,9 +29,9 @@ from pipecat.processors.aggregators.llm_context import ( class AWSBedrockLLMInvocationParams(TypedDict): """Context-based parameters for invoking AWS Bedrock's LLM API.""" - system: Optional[List[dict[str, Any]]] # [{"text": "system message"}] - messages: List[dict[str, Any]] - tools: List[dict[str, Any]] + system: list[dict[str, Any]] | None # [{"text": "system message"}] + messages: list[dict[str, Any]] + tools: list[dict[str, Any]] tool_choice: LLMContextToolChoice @@ -48,7 +48,7 @@ class AWSBedrockLLMAdapter(BaseLLMAdapter[AWSBedrockLLMInvocationParams]): return "aws" def get_llm_invocation_params( - self, context: LLMContext, *, system_instruction: Optional[str] = None + self, context: LLMContext, *, system_instruction: str | None = None ) -> AWSBedrockLLMInvocationParams: """Get AWS Bedrock-specific LLM invocation parameters from a universal LLM context. @@ -79,7 +79,7 @@ class AWSBedrockLLMAdapter(BaseLLMAdapter[AWSBedrockLLMInvocationParams]): "tool_choice": context.tool_choice, } - def get_messages_for_logging(self, context) -> List[Dict[str, Any]]: + def get_messages_for_logging(self, context) -> list[dict[str, Any]]: """Get messages from a universal LLM context in a format ready for logging about AWS Bedrock. Removes or truncates sensitive data like image content for safe logging. @@ -109,14 +109,14 @@ class AWSBedrockLLMAdapter(BaseLLMAdapter[AWSBedrockLLMInvocationParams]): class ConvertedMessages: """Container for Bedrock-formatted messages converted from universal context.""" - messages: List[dict[str, Any]] - system: Optional[str] + messages: list[dict[str, Any]] + system: str | None def _from_universal_context_messages( self, - universal_context_messages: List[LLMContextMessage], + universal_context_messages: list[LLMContextMessage], *, - system_instruction: Optional[str] = None, + system_instruction: str | None = None, ) -> ConvertedMessages: system = None @@ -305,7 +305,7 @@ class AWSBedrockLLMAdapter(BaseLLMAdapter[AWSBedrockLLMInvocationParams]): return message @staticmethod - def _to_bedrock_function_format(function: FunctionSchema) -> Dict[str, Any]: + def _to_bedrock_function_format(function: FunctionSchema) -> dict[str, Any]: """Convert a function schema to Bedrock's tool format. Args: @@ -328,7 +328,7 @@ class AWSBedrockLLMAdapter(BaseLLMAdapter[AWSBedrockLLMInvocationParams]): } } - def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[Dict[str, Any]]: + def to_provider_tools_format(self, tools_schema: ToolsSchema) -> list[dict[str, Any]]: """Convert function schemas to Bedrock's function-calling format. Args: diff --git a/src/pipecat/adapters/services/gemini_adapter.py b/src/pipecat/adapters/services/gemini_adapter.py index 565d0d0b8..aede18e7c 100644 --- a/src/pipecat/adapters/services/gemini_adapter.py +++ b/src/pipecat/adapters/services/gemini_adapter.py @@ -9,7 +9,7 @@ import base64 import json from dataclasses import dataclass, field -from typing import Any, Dict, List, Optional, TypedDict +from typing import Any, TypedDict from loguru import logger from openai import NotGiven @@ -34,9 +34,9 @@ except ModuleNotFoundError as e: class GeminiLLMInvocationParams(TypedDict): """Context-based parameters for invoking Gemini LLM.""" - system_instruction: Optional[str] - messages: List[Content] - tools: List[Any] | NotGiven + system_instruction: str | None + messages: list[Content] + tools: list[Any] | NotGiven class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]): @@ -54,7 +54,7 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]): return "google" def get_llm_invocation_params( - self, context: LLMContext, *, system_instruction: Optional[str] = None + self, context: LLMContext, *, system_instruction: str | None = None ) -> GeminiLLMInvocationParams: """Get Gemini-specific LLM invocation parameters from a universal LLM context. @@ -81,7 +81,7 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]): "tools": self.from_standard_tools(context.tools), } - def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[Dict[str, Any]]: + def to_provider_tools_format(self, tools_schema: ToolsSchema) -> list[dict[str, Any]]: """Convert tool schemas to Gemini's function-calling format. Args: @@ -92,7 +92,7 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]): Includes both converted standard tools and any custom Gemini-specific tools. """ - def _strip_additional_properties(schema: Dict[str, Any]) -> Dict[str, Any]: + def _strip_additional_properties(schema: dict[str, Any]) -> dict[str, Any]: """Recursively remove "additionalProperties" fields from JSON schema, as they're not supported by Gemini. Args: @@ -139,7 +139,7 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]): return formatted_standard_tools + custom_gemini_tools - def get_messages_for_logging(self, context: LLMContext) -> List[Dict[str, Any]]: + def get_messages_for_logging(self, context: LLMContext) -> list[dict[str, Any]]: """Get messages from a universal LLM context in a format ready for logging about Gemini. Removes or truncates sensitive data like image content for safe logging. @@ -173,8 +173,8 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]): class ConvertedMessages: """Container for Google-formatted messages converted from universal context.""" - messages: List[Content] - system_instruction: Optional[str] = None + messages: list[Content] + system_instruction: str | None = None @dataclass class MessageConversionResult: @@ -184,20 +184,20 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]): for any tool calls discovered in the message. """ - content: Optional[Content] = None - tool_call_id_to_name_mapping: Dict[str, str] = field(default_factory=dict) + content: Content | None = None + tool_call_id_to_name_mapping: dict[str, str] = field(default_factory=dict) @dataclass class MessageConversionParams: """Parameters for converting a single universal context message to Google format.""" - tool_call_id_to_name_mapping: Dict[str, str] + tool_call_id_to_name_mapping: dict[str, str] def _from_universal_context_messages( self, - universal_context_messages: List[LLMContextMessage], + universal_context_messages: list[LLMContextMessage], *, - system_instruction: Optional[str] = None, + system_instruction: str | None = None, ) -> ConvertedMessages: """Restructures messages to ensure proper Google format and message ordering. @@ -443,8 +443,8 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]): ) def _merge_parallel_tool_calls_for_thinking( - self, thought_signature_dicts: List[dict], messages: List[Content] - ) -> List[Content]: + self, thought_signature_dicts: list[dict], messages: list[Content] + ) -> list[Content]: """Merge parallel tool calls into single Content objects when thinking is enabled. Gemini expects parallel tool calls (multiple function calls made @@ -540,7 +540,7 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]): return merged_messages def _apply_thought_signatures_to_messages( - self, thought_signature_dicts: List[dict], messages: List[Content] + self, thought_signature_dicts: list[dict], messages: list[Content] ) -> None: """Apply thought signatures to corresponding assistant messages. diff --git a/src/pipecat/adapters/services/grok_realtime_adapter.py b/src/pipecat/adapters/services/grok_realtime_adapter.py index cc98887f8..75ca61030 100644 --- a/src/pipecat/adapters/services/grok_realtime_adapter.py +++ b/src/pipecat/adapters/services/grok_realtime_adapter.py @@ -13,7 +13,7 @@ Grok's Voice Agent API. import copy import json from dataclasses import dataclass -from typing import Any, Dict, List, Optional, TypedDict +from typing import Any, TypedDict from loguru import logger @@ -33,9 +33,9 @@ class GrokRealtimeLLMInvocationParams(TypedDict): tools: List of tool definitions (function, web_search, x_search, file_search). """ - system_instruction: Optional[str] - messages: List[events.ConversationItem] - tools: List[Dict[str, Any]] + system_instruction: str | None + messages: list[events.ConversationItem] + tools: list[dict[str, Any]] class GrokRealtimeLLMAdapter(BaseLLMAdapter): @@ -51,7 +51,7 @@ class GrokRealtimeLLMAdapter(BaseLLMAdapter): return "grok-realtime" def get_llm_invocation_params( - self, context: LLMContext, *, system_instruction: Optional[str] = None + self, context: LLMContext, *, system_instruction: str | None = None ) -> GrokRealtimeLLMInvocationParams: """Get Grok Realtime-specific LLM invocation parameters from a universal LLM context. @@ -74,7 +74,7 @@ class GrokRealtimeLLMAdapter(BaseLLMAdapter): "tools": self.from_standard_tools(context.tools) or [], } - def get_messages_for_logging(self, context) -> List[Dict[str, Any]]: + def get_messages_for_logging(self, context) -> list[dict[str, Any]]: """Get messages from context in a format safe for logging. Binary data (images, audio) is replaced with short placeholders. @@ -91,11 +91,11 @@ class GrokRealtimeLLMAdapter(BaseLLMAdapter): class ConvertedMessages: """Container for Grok-formatted messages converted from universal context.""" - messages: List[events.ConversationItem] - system_instruction: Optional[str] = None + messages: list[events.ConversationItem] + system_instruction: str | None = None def _from_universal_context_messages( - self, universal_context_messages: List[LLMContextMessage] + self, universal_context_messages: list[LLMContextMessage] ) -> ConvertedMessages: """Convert universal context messages to Grok Realtime format. @@ -211,7 +211,7 @@ class GrokRealtimeLLMAdapter(BaseLLMAdapter): logger.error(f"Unhandled message type in _from_universal_context_message: {message}") @staticmethod - def _to_grok_function_format(function: FunctionSchema) -> Dict[str, Any]: + def _to_grok_function_format(function: FunctionSchema) -> dict[str, Any]: """Convert a function schema to Grok Realtime function format. Args: @@ -231,7 +231,7 @@ class GrokRealtimeLLMAdapter(BaseLLMAdapter): }, } - def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[Dict[str, Any]]: + def to_provider_tools_format(self, tools_schema: ToolsSchema) -> list[dict[str, Any]]: """Convert tool schemas to Grok Realtime format. Supports both standard function tools and Grok-specific tools diff --git a/src/pipecat/adapters/services/inworld_realtime_adapter.py b/src/pipecat/adapters/services/inworld_realtime_adapter.py index b022afe6b..db07256f5 100644 --- a/src/pipecat/adapters/services/inworld_realtime_adapter.py +++ b/src/pipecat/adapters/services/inworld_realtime_adapter.py @@ -13,7 +13,7 @@ Inworld's Realtime API. import copy import json from dataclasses import dataclass -from typing import Any, Dict, List, Optional, TypedDict +from typing import Any, TypedDict from loguru import logger @@ -33,9 +33,9 @@ class InworldRealtimeLLMInvocationParams(TypedDict): tools: List of tool definitions. """ - system_instruction: Optional[str] - messages: List[events.ConversationItem] - tools: List[Dict[str, Any]] + system_instruction: str | None + messages: list[events.ConversationItem] + tools: list[dict[str, Any]] class InworldRealtimeLLMAdapter(BaseLLMAdapter): @@ -51,7 +51,7 @@ class InworldRealtimeLLMAdapter(BaseLLMAdapter): return "inworld-realtime" def get_llm_invocation_params( - self, context: LLMContext, *, system_instruction: Optional[str] = None + self, context: LLMContext, *, system_instruction: str | None = None ) -> InworldRealtimeLLMInvocationParams: """Get Inworld Realtime-specific LLM invocation parameters from a universal LLM context. @@ -74,7 +74,7 @@ class InworldRealtimeLLMAdapter(BaseLLMAdapter): "tools": self.from_standard_tools(context.tools) or [], } - def get_messages_for_logging(self, context) -> List[Dict[str, Any]]: + def get_messages_for_logging(self, context) -> list[dict[str, Any]]: """Get messages from context in a format safe for logging. Binary data (images, audio) is replaced with short placeholders. @@ -91,11 +91,11 @@ class InworldRealtimeLLMAdapter(BaseLLMAdapter): class ConvertedMessages: """Container for Inworld-formatted messages converted from universal context.""" - messages: List[events.ConversationItem] - system_instruction: Optional[str] = None + messages: list[events.ConversationItem] + system_instruction: str | None = None def _from_universal_context_messages( - self, universal_context_messages: List[LLMContextMessage] + self, universal_context_messages: list[LLMContextMessage] ) -> ConvertedMessages: """Convert universal context messages to Inworld Realtime format. @@ -211,7 +211,7 @@ class InworldRealtimeLLMAdapter(BaseLLMAdapter): logger.error(f"Unhandled message type in _from_universal_context_message: {message}") @staticmethod - def _to_inworld_function_format(function: FunctionSchema) -> Dict[str, Any]: + def _to_inworld_function_format(function: FunctionSchema) -> dict[str, Any]: """Convert a function schema to Inworld Realtime function format. Args: @@ -231,7 +231,7 @@ class InworldRealtimeLLMAdapter(BaseLLMAdapter): }, } - def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[Dict[str, Any]]: + def to_provider_tools_format(self, tools_schema: ToolsSchema) -> list[dict[str, Any]]: """Convert tool schemas to Inworld Realtime format. Args: diff --git a/src/pipecat/adapters/services/open_ai_adapter.py b/src/pipecat/adapters/services/open_ai_adapter.py index a52fb84a6..335843d16 100644 --- a/src/pipecat/adapters/services/open_ai_adapter.py +++ b/src/pipecat/adapters/services/open_ai_adapter.py @@ -6,7 +6,7 @@ """OpenAI LLM adapter for Pipecat.""" -from typing import Any, Dict, List, Optional, TypedDict +from typing import Any, TypedDict from openai._types import NotGiven as OpenAINotGiven from openai.types.chat import ( @@ -29,8 +29,8 @@ from pipecat.processors.aggregators.llm_context import ( class OpenAILLMInvocationParams(TypedDict): """Context-based parameters for invoking OpenAI ChatCompletion API.""" - messages: List[ChatCompletionMessageParam] - tools: List[ChatCompletionToolParam] | OpenAINotGiven + messages: list[ChatCompletionMessageParam] + tools: list[ChatCompletionToolParam] | OpenAINotGiven tool_choice: ChatCompletionToolChoiceOptionParam | OpenAINotGiven @@ -54,7 +54,7 @@ class OpenAILLMAdapter(BaseLLMAdapter[OpenAILLMInvocationParams]): self, context: LLMContext, *, - system_instruction: Optional[str] = None, + system_instruction: str | None = None, convert_developer_to_user: bool, ) -> OpenAILLMInvocationParams: """Get OpenAI-specific LLM invocation parameters from a universal LLM context. @@ -95,7 +95,7 @@ class OpenAILLMAdapter(BaseLLMAdapter[OpenAILLMInvocationParams]): "tool_choice": context.tool_choice, } - def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[ChatCompletionToolParam]: + def to_provider_tools_format(self, tools_schema: ToolsSchema) -> list[ChatCompletionToolParam]: """Convert function schemas to OpenAI's function-calling format. Args: @@ -115,7 +115,7 @@ class OpenAILLMAdapter(BaseLLMAdapter[OpenAILLMInvocationParams]): custom_openai_tools = tools_schema.custom_tools.get(AdapterType.OPENAI, []) return formatted_standard_tools + custom_openai_tools - def get_messages_for_logging(self, context: LLMContext) -> List[Dict[str, Any]]: + def get_messages_for_logging(self, context: LLMContext) -> list[dict[str, Any]]: """Get messages from a universal LLM context in a format ready for logging about OpenAI. Binary data (images, audio) is replaced with short placeholders. @@ -130,10 +130,10 @@ class OpenAILLMAdapter(BaseLLMAdapter[OpenAILLMInvocationParams]): def _from_universal_context_messages( self, - messages: List[LLMContextMessage], + messages: list[LLMContextMessage], *, convert_developer_to_user: bool, - ) -> List[ChatCompletionMessageParam]: + ) -> list[ChatCompletionMessageParam]: result = [] for message in messages: if isinstance(message, LLMSpecificMessage): diff --git a/src/pipecat/adapters/services/open_ai_realtime_adapter.py b/src/pipecat/adapters/services/open_ai_realtime_adapter.py index 41f3ce89d..7df7e45c5 100644 --- a/src/pipecat/adapters/services/open_ai_realtime_adapter.py +++ b/src/pipecat/adapters/services/open_ai_realtime_adapter.py @@ -9,7 +9,7 @@ import copy import json from dataclasses import dataclass -from typing import Any, Dict, List, Optional, TypedDict +from typing import Any, TypedDict from loguru import logger @@ -26,9 +26,9 @@ class OpenAIRealtimeLLMInvocationParams(TypedDict): This is a placeholder until support for universal LLMContext machinery is added for OpenAI Realtime. """ - system_instruction: Optional[str] - messages: List[events.ConversationItem] - tools: List[Dict[str, Any]] + system_instruction: str | None + messages: list[events.ConversationItem] + tools: list[dict[str, Any]] class OpenAIRealtimeLLMAdapter(BaseLLMAdapter): @@ -44,7 +44,7 @@ class OpenAIRealtimeLLMAdapter(BaseLLMAdapter): return "openai-realtime" def get_llm_invocation_params( - self, context: LLMContext, *, system_instruction: Optional[str] = None + self, context: LLMContext, *, system_instruction: str | None = None ) -> OpenAIRealtimeLLMInvocationParams: """Get OpenAI Realtime-specific LLM invocation parameters from a universal LLM context. @@ -68,7 +68,7 @@ class OpenAIRealtimeLLMAdapter(BaseLLMAdapter): "tools": self.from_standard_tools(context.tools) or [], } - def get_messages_for_logging(self, context) -> List[Dict[str, Any]]: + def get_messages_for_logging(self, context) -> list[dict[str, Any]]: """Get messages from a universal LLM context in a format ready for logging about OpenAI Realtime. Binary data (images, audio) is replaced with short placeholders. @@ -87,11 +87,11 @@ class OpenAIRealtimeLLMAdapter(BaseLLMAdapter): class ConvertedMessages: """Container for OpenAI-formatted messages converted from universal context.""" - messages: List[events.ConversationItem] - system_instruction: Optional[str] = None + messages: list[events.ConversationItem] + system_instruction: str | None = None def _from_universal_context_messages( - self, universal_context_messages: List[LLMContextMessage] + self, universal_context_messages: list[LLMContextMessage] ) -> ConvertedMessages: # We can't load a long conversation history into the openai realtime api yet. (The API/model # forgets that it can do audio, if you do a series of `conversation.item.create` calls.) So @@ -188,7 +188,7 @@ class OpenAIRealtimeLLMAdapter(BaseLLMAdapter): logger.error(f"Unhandled message type in _from_universal_context_message: {message}") @staticmethod - def _to_openai_realtime_function_format(function: FunctionSchema) -> Dict[str, Any]: + def _to_openai_realtime_function_format(function: FunctionSchema) -> dict[str, Any]: """Convert a function schema to OpenAI Realtime format. Args: @@ -208,7 +208,7 @@ class OpenAIRealtimeLLMAdapter(BaseLLMAdapter): }, } - def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[Dict[str, Any]]: + def to_provider_tools_format(self, tools_schema: ToolsSchema) -> list[dict[str, Any]]: """Convert tool schemas to OpenAI Realtime function-calling format. Args: diff --git a/src/pipecat/adapters/services/open_ai_responses_adapter.py b/src/pipecat/adapters/services/open_ai_responses_adapter.py index f3dd67e03..c5c6cbc7a 100644 --- a/src/pipecat/adapters/services/open_ai_responses_adapter.py +++ b/src/pipecat/adapters/services/open_ai_responses_adapter.py @@ -6,7 +6,7 @@ """OpenAI Responses API adapter for Pipecat.""" -from typing import Any, Dict, List, Optional, TypedDict +from typing import Any, TypedDict from openai._types import NotGiven as OpenAINotGiven from openai.types.responses import FunctionToolParam, ResponseInputItemParam, ToolParam @@ -23,8 +23,8 @@ from pipecat.processors.aggregators.llm_context import ( class OpenAIResponsesLLMInvocationParams(TypedDict, total=False): """Context-based parameters for invoking OpenAI Responses API.""" - input: List[ResponseInputItemParam] - tools: List[ToolParam] | OpenAINotGiven + input: list[ResponseInputItemParam] + tools: list[ToolParam] | OpenAINotGiven instructions: str @@ -47,7 +47,7 @@ class OpenAIResponsesLLMAdapter(BaseLLMAdapter[OpenAIResponsesLLMInvocationParam self, context: LLMContext, *, - system_instruction: Optional[str] = None, + system_instruction: str | None = None, ) -> OpenAIResponsesLLMInvocationParams: """Get Responses API invocation parameters from a universal LLM context. @@ -105,7 +105,7 @@ class OpenAIResponsesLLMAdapter(BaseLLMAdapter[OpenAIResponsesLLMInvocationParam return params - def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[ToolParam]: + def to_provider_tools_format(self, tools_schema: ToolsSchema) -> list[ToolParam]: """Convert function schemas to Responses API function tool format. Args: @@ -132,7 +132,7 @@ class OpenAIResponsesLLMAdapter(BaseLLMAdapter[OpenAIResponsesLLMInvocationParam custom_openai_tools = tools_schema.custom_tools.get(AdapterType.OPENAI, []) return result + custom_openai_tools - def get_messages_for_logging(self, context: LLMContext) -> List[Dict[str, Any]]: + def get_messages_for_logging(self, context: LLMContext) -> list[dict[str, Any]]: """Get messages from context in a format ready for logging. Binary data (images, audio) is replaced with short placeholders. @@ -146,8 +146,8 @@ class OpenAIResponsesLLMAdapter(BaseLLMAdapter[OpenAIResponsesLLMInvocationParam return self.get_messages(context, truncate_large_values=True) def _convert_messages_to_input( - self, messages: List[LLMContextMessage] - ) -> List[ResponseInputItemParam]: + self, messages: list[LLMContextMessage] + ) -> list[ResponseInputItemParam]: """Convert LLMContext messages to Responses API input items. Args: @@ -156,7 +156,7 @@ class OpenAIResponsesLLMAdapter(BaseLLMAdapter[OpenAIResponsesLLMInvocationParam Returns: List of Responses API input items. """ - result: List[ResponseInputItemParam] = [] + result: list[ResponseInputItemParam] = [] for message in messages: if isinstance(message, LLMSpecificMessage): diff --git a/src/pipecat/adapters/services/perplexity_adapter.py b/src/pipecat/adapters/services/perplexity_adapter.py index 18ebea648..188092b78 100644 --- a/src/pipecat/adapters/services/perplexity_adapter.py +++ b/src/pipecat/adapters/services/perplexity_adapter.py @@ -28,7 +28,6 @@ the messages are sent to Perplexity's API. """ import copy -from typing import List, Optional from openai.types.chat import ChatCompletionMessageParam @@ -53,7 +52,7 @@ class PerplexityLLMAdapter(OpenAILLMAdapter): self, context: LLMContext, *, - system_instruction: Optional[str] = None, + system_instruction: str | None = None, convert_developer_to_user: bool, ) -> OpenAILLMInvocationParams: """Get OpenAI-compatible invocation parameters with Perplexity message fixes applied. @@ -78,8 +77,8 @@ class PerplexityLLMAdapter(OpenAILLMAdapter): return params def _transform_messages( - self, messages: List[ChatCompletionMessageParam] - ) -> List[ChatCompletionMessageParam]: + self, messages: list[ChatCompletionMessageParam] + ) -> list[ChatCompletionMessageParam]: """Transform messages to satisfy Perplexity's API constraints. Applies three transformation steps in order: diff --git a/src/pipecat/audio/dtmf/types.py b/src/pipecat/audio/dtmf/types.py index 1b6eea7a0..aaec06f8c 100644 --- a/src/pipecat/audio/dtmf/types.py +++ b/src/pipecat/audio/dtmf/types.py @@ -11,10 +11,10 @@ key on the telephone keypad, facilitating the handling of input in telecommunication applications. """ -from enum import Enum +from enum import StrEnum -class KeypadEntry(str, Enum): +class KeypadEntry(StrEnum): """DTMF keypad entries for phone system integration. Parameters: diff --git a/src/pipecat/audio/dtmf/utils.py b/src/pipecat/audio/dtmf/utils.py index eff2aa12f..22026759e 100644 --- a/src/pipecat/audio/dtmf/utils.py +++ b/src/pipecat/audio/dtmf/utils.py @@ -15,7 +15,6 @@ import asyncio import io import wave from importlib.resources import files -from typing import Dict, Optional import aiofiles @@ -24,8 +23,8 @@ from pipecat.audio.resamplers.base_audio_resampler import BaseAudioResampler from pipecat.audio.utils import create_file_resampler __DTMF_LOCK__ = asyncio.Lock() -__DTMF_AUDIO__: Dict[KeypadEntry, bytes] = {} -__DTMF_RESAMPLER__: Optional[BaseAudioResampler] = None +__DTMF_AUDIO__: dict[KeypadEntry, bytes] = {} +__DTMF_RESAMPLER__: BaseAudioResampler | None = None __DTMF_FILE_NAME = { KeypadEntry.POUND: "dtmf-pound.wav", diff --git a/src/pipecat/audio/filters/aic_filter.py b/src/pipecat/audio/filters/aic_filter.py index 752f6f3fa..1bdf723ae 100644 --- a/src/pipecat/audio/filters/aic_filter.py +++ b/src/pipecat/audio/filters/aic_filter.py @@ -18,7 +18,6 @@ Classes: import asyncio from pathlib import Path from threading import Lock -from typing import List, Optional, Tuple import numpy as np from aic_sdk import ( @@ -44,14 +43,14 @@ class AICModelManager: acquires on first use and releases when the last reference is dropped. """ - _cache: dict[str, Tuple[Model, int]] = {} # key -> (model, ref_count) + _cache: dict[str, tuple[Model, int]] = {} # key -> (model, ref_count) _lock = Lock() _loading: dict[ str, asyncio.Task[Model] ] = {} # key -> load task (deduplicates concurrent loads) @classmethod - def _increment_reference(cls, cache_key: str, entry: Tuple[Model, int]) -> Tuple[Model, str]: + def _increment_reference(cls, cache_key: str, entry: tuple[Model, int]) -> tuple[Model, str]: """Increment reference count for cached entry. Caller must hold _lock.""" cached_model, ref_count = entry cls._cache[cache_key] = (cached_model, ref_count + 1) @@ -59,7 +58,7 @@ class AICModelManager: return cached_model, cache_key @classmethod - def _store_new_reference(cls, cache_key: str, model: Model) -> Tuple[Model, str]: + def _store_new_reference(cls, cache_key: str, model: Model) -> tuple[Model, str]: """Store new model in cache with ref count 1. Caller must hold _lock.""" cls._cache[cache_key] = (model, 1) logger.debug(f"AIC model cached key={cache_key!r} ref_count=1") @@ -70,9 +69,9 @@ class AICModelManager: cls, cache_key: str, *, - model_path: Optional[Path] = None, - model_id: Optional[str] = None, - model_download_dir: Optional[Path] = None, + model_path: Path | None = None, + model_id: str | None = None, + model_download_dir: Path | None = None, ) -> Model: """Run the actual load (file or download). Separate to allow create_task and deduplication.""" if model_path is not None: @@ -94,9 +93,9 @@ class AICModelManager: @staticmethod def _get_cache_key( *, - model_path: Optional[Path] = None, - model_id: Optional[str] = None, - model_download_dir: Optional[Path] = None, + model_path: Path | None = None, + model_id: str | None = None, + model_download_dir: Path | None = None, ) -> str: """Build a stable cache key for the model. @@ -120,10 +119,10 @@ class AICModelManager: async def acquire( cls, *, - model_path: Optional[Path] = None, - model_id: Optional[str] = None, - model_download_dir: Optional[Path] = None, - ) -> Tuple[Model, str]: + model_path: Path | None = None, + model_id: str | None = None, + model_download_dir: Path | None = None, + ) -> tuple[Model, str]: """Get or load a Model and increment its reference count. Call this when starting a filter. Store the returned key and pass it @@ -218,10 +217,10 @@ class AICFilter(BaseAudioFilter): self, *, license_key: str, - model_id: Optional[str] = None, - model_path: Optional[Path] = None, - model_download_dir: Optional[Path] = None, - enhancement_level: Optional[float] = None, + model_id: str | None = None, + model_path: Path | None = None, + model_download_dir: Path | None = None, + enhancement_level: float | None = None, ) -> None: """Initialize the AIC filter. @@ -274,7 +273,7 @@ class AICFilter(BaseAudioFilter): ) # AIC SDK objects; model is shared via AICModelManager - self._model_cache_key: Optional[str] = None + self._model_cache_key: str | None = None self._model = None self._processor = None self._processor_ctx = None @@ -298,9 +297,9 @@ class AICFilter(BaseAudioFilter): def create_vad_analyzer( self, *, - speech_hold_duration: Optional[float] = None, - minimum_speech_duration: Optional[float] = None, - sensitivity: Optional[float] = None, + speech_hold_duration: float | None = None, + minimum_speech_duration: float | None = None, + sensitivity: float | None = None, ): """Return an analyzer that will lazily instantiate the AIC VAD when ready. @@ -491,7 +490,7 @@ class AICFilter(BaseAudioFilter): blocks_data = bytes(self._audio_buffer[:total_size]) self._audio_buffer = self._audio_buffer[total_size:] - filtered_chunks: List[bytes] = [] + filtered_chunks: list[bytes] = [] for i in range(num_blocks): start = i * block_size diff --git a/src/pipecat/audio/filters/koala_filter.py b/src/pipecat/audio/filters/koala_filter.py index cd5525f98..51dd5871d 100644 --- a/src/pipecat/audio/filters/koala_filter.py +++ b/src/pipecat/audio/filters/koala_filter.py @@ -10,7 +10,7 @@ This module provides an audio filter implementation using PicoVoice's Koala Noise Suppression engine to reduce background noise in audio streams. """ -from typing import Sequence +from collections.abc import Sequence import numpy as np from loguru import logger diff --git a/src/pipecat/audio/mixers/soundfile_mixer.py b/src/pipecat/audio/mixers/soundfile_mixer.py index 846e845e6..6d00c1c3f 100644 --- a/src/pipecat/audio/mixers/soundfile_mixer.py +++ b/src/pipecat/audio/mixers/soundfile_mixer.py @@ -12,7 +12,8 @@ runtime configuration changes. """ import asyncio -from typing import Any, Dict, Mapping +from collections.abc import Mapping +from typing import Any import numpy as np from loguru import logger @@ -70,7 +71,7 @@ class SoundfileMixer(BaseAudioMixer): self._sample_rate = 0 self._sound_pos = 0 - self._sounds: Dict[str, Any] = {} + self._sounds: dict[str, Any] = {} self._current_sound = default_sound self._mixing = mixing self._loop = loop diff --git a/src/pipecat/audio/turn/base_turn_analyzer.py b/src/pipecat/audio/turn/base_turn_analyzer.py index e8f6b9d13..6bf9f5dcf 100644 --- a/src/pipecat/audio/turn/base_turn_analyzer.py +++ b/src/pipecat/audio/turn/base_turn_analyzer.py @@ -12,7 +12,6 @@ when a user has finished speaking in a conversation. from abc import ABC, abstractmethod from enum import Enum -from typing import Optional, Tuple from pydantic import BaseModel @@ -44,7 +43,7 @@ class BaseTurnAnalyzer(ABC): while still defining an abstract interface through abstract methods. """ - def __init__(self, *, sample_rate: Optional[int] = None): + def __init__(self, *, sample_rate: int | None = None): """Initialize the turn analyzer. Args: @@ -108,7 +107,7 @@ class BaseTurnAnalyzer(ABC): pass @abstractmethod - async def analyze_end_of_turn(self) -> Tuple[EndOfTurnState, Optional[MetricsData]]: + async def analyze_end_of_turn(self) -> tuple[EndOfTurnState, MetricsData | None]: """Analyzes if an end of turn has occurred based on the audio input. Returns: diff --git a/src/pipecat/audio/turn/krisp_viva_turn.py b/src/pipecat/audio/turn/krisp_viva_turn.py index 3aa540491..5235a94be 100644 --- a/src/pipecat/audio/turn/krisp_viva_turn.py +++ b/src/pipecat/audio/turn/krisp_viva_turn.py @@ -16,7 +16,6 @@ passed directly to the constructor. import os import time -from typing import Optional, Tuple import numpy as np from loguru import logger @@ -61,9 +60,9 @@ class KrispVivaTurn(BaseTurnAnalyzer): def __init__( self, *, - model_path: Optional[str] = None, - sample_rate: Optional[int] = None, - params: Optional[KrispTurnParams] = None, + model_path: str | None = None, + sample_rate: int | None = None, + params: KrispTurnParams | None = None, api_key: str = "", ) -> None: """Initialize the Krisp turn analyzer. @@ -119,9 +118,9 @@ class KrispVivaTurn(BaseTurnAnalyzer): self._last_probability = None self._frame_probabilities = [] self._last_state = EndOfTurnState.INCOMPLETE - self._speech_stopped_time: Optional[float] = None - self._e2e_processing_time_ms: Optional[float] = None - self._last_metrics: Optional[TurnMetricsData] = None + self._speech_stopped_time: float | None = None + self._e2e_processing_time_ms: float | None = None + self._last_metrics: TurnMetricsData | None = None # Create session with provided sample rate or default to 16000 Hz # This preloads the model to improve latency when set_sample_rate is called later @@ -214,7 +213,7 @@ class KrispVivaTurn(BaseTurnAnalyzer): return self._frame_probabilities @property - def last_probability(self) -> Optional[float]: + def last_probability(self) -> float | None: """Get the last turn probability value computed. Returns: @@ -348,7 +347,7 @@ class KrispVivaTurn(BaseTurnAnalyzer): self._last_state = error_state return error_state - async def analyze_end_of_turn(self) -> Tuple[EndOfTurnState, Optional[MetricsData]]: + async def analyze_end_of_turn(self) -> tuple[EndOfTurnState, MetricsData | None]: """Analyze the current audio state to determine if turn has ended. Returns: diff --git a/src/pipecat/audio/turn/smart_turn/base_smart_turn.py b/src/pipecat/audio/turn/smart_turn/base_smart_turn.py index fa652d884..c1f4e53a2 100644 --- a/src/pipecat/audio/turn/smart_turn/base_smart_turn.py +++ b/src/pipecat/audio/turn/smart_turn/base_smart_turn.py @@ -15,7 +15,7 @@ import asyncio import time from abc import abstractmethod from concurrent.futures import ThreadPoolExecutor -from typing import Any, Dict, Optional, Tuple +from typing import Any import numpy as np from loguru import logger @@ -57,9 +57,7 @@ class BaseSmartTurn(BaseTurnAnalyzer): implement the specific model prediction logic. """ - def __init__( - self, *, sample_rate: Optional[int] = None, params: Optional[SmartTurnParams] = None - ): + def __init__(self, *, sample_rate: int | None = None, params: SmartTurnParams | None = None): """Initialize the smart turn analyzer. Args: @@ -146,7 +144,7 @@ class BaseSmartTurn(BaseTurnAnalyzer): return state - async def analyze_end_of_turn(self) -> Tuple[EndOfTurnState, Optional[MetricsData]]: + async def analyze_end_of_turn(self) -> tuple[EndOfTurnState, MetricsData | None]: """Analyze the current audio state to determine if turn has ended. Returns: @@ -178,7 +176,7 @@ class BaseSmartTurn(BaseTurnAnalyzer): self._speech_start_time = 0 self._silence_ms = 0 - def _process_speech_segment(self, audio_buffer) -> Tuple[EndOfTurnState, Optional[MetricsData]]: + def _process_speech_segment(self, audio_buffer) -> tuple[EndOfTurnState, MetricsData | None]: """Process accumulated audio segment using ML model.""" state = EndOfTurnState.INCOMPLETE @@ -248,6 +246,6 @@ class BaseSmartTurn(BaseTurnAnalyzer): return state, result_data @abstractmethod - def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]: + def _predict_endpoint(self, audio_array: np.ndarray) -> dict[str, Any]: """Predict end-of-turn using ML model from audio data.""" pass diff --git a/src/pipecat/audio/turn/smart_turn/http_smart_turn.py b/src/pipecat/audio/turn/smart_turn/http_smart_turn.py index 6b5f4a84d..0d4b717b3 100644 --- a/src/pipecat/audio/turn/smart_turn/http_smart_turn.py +++ b/src/pipecat/audio/turn/smart_turn/http_smart_turn.py @@ -12,7 +12,7 @@ HTTP endpoints for ML-based end-of-turn detection. import asyncio import io -from typing import Any, Dict, Optional +from typing import Any import aiohttp import numpy as np @@ -33,7 +33,7 @@ class HttpSmartTurnAnalyzer(BaseSmartTurn): *, url: str, aiohttp_session: aiohttp.ClientSession, - headers: Optional[Dict[str, str]] = None, + headers: dict[str, str] | None = None, **kwargs, ): """Initialize the HTTP smart turn analyzer. @@ -58,7 +58,7 @@ class HttpSmartTurnAnalyzer(BaseSmartTurn): logger.trace(f"Serialized size: {len(serialized_bytes)} bytes") return serialized_bytes - async def _send_raw_request(self, data_bytes: bytes) -> Dict[str, Any]: + async def _send_raw_request(self, data_bytes: bytes) -> dict[str, Any]: """Send raw audio data to the HTTP endpoint for prediction.""" headers = {"Content-Type": "application/octet-stream"} headers.update(self._headers) @@ -97,14 +97,14 @@ class HttpSmartTurnAnalyzer(BaseSmartTurn): logger.trace(text) raise Exception(f"Non-JSON response: {text}") - except asyncio.TimeoutError: + except TimeoutError: logger.error(f"Request timed out after {self._params.stop_secs} seconds") raise SmartTurnTimeoutException(f"Request exceeded {self._params.stop_secs} seconds.") except aiohttp.ClientError as e: logger.error(f"Failed to send raw request to Daily Smart Turn: {e}") raise Exception("Failed to send raw request to Daily Smart Turn.") - def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]: + def _predict_endpoint(self, audio_array: np.ndarray) -> dict[str, Any]: """Predict end-of-turn using remote HTTP ML service.""" try: serialized_array = self._serialize_array(audio_array) diff --git a/src/pipecat/audio/turn/smart_turn/local_coreml_smart_turn.py b/src/pipecat/audio/turn/smart_turn/local_coreml_smart_turn.py index 18310c386..f424ae9a2 100644 --- a/src/pipecat/audio/turn/smart_turn/local_coreml_smart_turn.py +++ b/src/pipecat/audio/turn/smart_turn/local_coreml_smart_turn.py @@ -11,7 +11,7 @@ local end-of-turn detection without requiring network connectivity. """ import warnings -from typing import Any, Dict +from typing import Any import numpy as np from loguru import logger @@ -76,7 +76,7 @@ class LocalCoreMLSmartTurnAnalyzer(BaseSmartTurn): self._turn_model = ct.models.MLModel(core_ml_model_path) logger.debug("Loaded Local Smart Turn") - async def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]: + async def _predict_endpoint(self, audio_array: np.ndarray) -> dict[str, Any]: """Predict end-of-turn using local CoreML model.""" inputs = self._turn_processor( audio_array, diff --git a/src/pipecat/audio/turn/smart_turn/local_smart_turn_v2.py b/src/pipecat/audio/turn/smart_turn/local_smart_turn_v2.py index 8d584ecd2..2ef36b240 100644 --- a/src/pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +++ b/src/pipecat/audio/turn/smart_turn/local_smart_turn_v2.py @@ -11,7 +11,7 @@ local end-of-turn detection without requiring network connectivity. """ import warnings -from typing import Any, Dict +from typing import Any import numpy as np from loguru import logger @@ -87,7 +87,7 @@ class LocalSmartTurnAnalyzerV2(BaseSmartTurn): self._turn_model.eval() logger.debug("Loaded Local Smart Turn v2") - def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]: + def _predict_endpoint(self, audio_array: np.ndarray) -> dict[str, Any]: """Predict end-of-turn using local PyTorch model.""" inputs = self._turn_processor( audio_array, diff --git a/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py b/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py index a8cc249fd..a94af41e6 100644 --- a/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +++ b/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py @@ -10,7 +10,7 @@ This module provides a smart turn analyzer that uses an ONNX model for local end-of-turn detection without requiring network connectivity. """ -from typing import Any, Dict, Optional +from typing import Any import numpy as np import onnxruntime as ort @@ -32,9 +32,7 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn): enabling offline operation without network dependencies. """ - def __init__( - self, *, smart_turn_model_path: Optional[str] = None, cpu_count: int = 1, **kwargs - ): + def __init__(self, *, smart_turn_model_path: str | None = None, cpu_count: int = 1, **kwargs): """Initialize the local ONNX smart-turn-v3 analyzer. Args: @@ -138,7 +136,7 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn): return soxr.resample(audio_array, actual_rate, _MODEL_SAMPLE_RATE, quality="VHQ") - def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]: + def _predict_endpoint(self, audio_array: np.ndarray) -> dict[str, Any]: """Predict end-of-turn using local ONNX model.""" def truncate_audio_to_last_n_seconds( diff --git a/src/pipecat/audio/vad/aic_vad.py b/src/pipecat/audio/vad/aic_vad.py index 813029e2b..68f4fc67d 100644 --- a/src/pipecat/audio/vad/aic_vad.py +++ b/src/pipecat/audio/vad/aic_vad.py @@ -7,7 +7,8 @@ Classes: AICVADAnalyzer: For aic-sdk (uses 'aic_sdk' module) """ -from typing import Any, Callable, Optional +from collections.abc import Callable +from typing import Any from aic_sdk import VadParameter from loguru import logger @@ -46,10 +47,10 @@ class AICVADAnalyzer(VADAnalyzer): def __init__( self, *, - vad_context_factory: Optional[Callable[[], Any]] = None, - speech_hold_duration: Optional[float] = None, - minimum_speech_duration: Optional[float] = None, - sensitivity: Optional[float] = None, + vad_context_factory: Callable[[], Any] | None = None, + speech_hold_duration: float | None = None, + minimum_speech_duration: float | None = None, + sensitivity: float | None = None, ): """Create an AIC VAD analyzer. @@ -77,10 +78,10 @@ class AICVADAnalyzer(VADAnalyzer): super().__init__(sample_rate=None, params=fixed_params) self._vad_context_factory = vad_context_factory - self._vad_ctx: Optional[Any] = None - self._pending_speech_hold_duration: Optional[float] = speech_hold_duration - self._pending_minimum_speech_duration: Optional[float] = minimum_speech_duration - self._pending_sensitivity: Optional[float] = sensitivity + self._vad_ctx: Any | None = None + self._pending_speech_hold_duration: float | None = speech_hold_duration + self._pending_minimum_speech_duration: float | None = minimum_speech_duration + self._pending_sensitivity: float | None = sensitivity def bind_vad_context_factory(self, vad_context_factory: Callable[[], Any]): """Attach or replace the factory post-construction.""" diff --git a/src/pipecat/audio/vad/krisp_viva_vad.py b/src/pipecat/audio/vad/krisp_viva_vad.py index 2bcc13ab2..42e787bc8 100644 --- a/src/pipecat/audio/vad/krisp_viva_vad.py +++ b/src/pipecat/audio/vad/krisp_viva_vad.py @@ -12,7 +12,6 @@ Supports 8kHz, 16kHz, 32kHz, 44.1kHz and 48kHz sample rates. """ import os -from typing import Optional import numpy as np from loguru import logger @@ -38,10 +37,10 @@ class KrispVivaVadAnalyzer(VADAnalyzer): def __init__( self, *, - model_path: Optional[str] = None, + model_path: str | None = None, frame_duration: int = 10, - sample_rate: Optional[int] = None, - params: Optional[VADParams] = None, + sample_rate: int | None = None, + params: VADParams | None = None, ): """Initialize the Krisp VIVA VAD analyzer. diff --git a/src/pipecat/audio/vad/silero.py b/src/pipecat/audio/vad/silero.py index c15ba5b90..2b3d3d629 100644 --- a/src/pipecat/audio/vad/silero.py +++ b/src/pipecat/audio/vad/silero.py @@ -12,7 +12,6 @@ Supports 8kHz and 16kHz sample rates. """ import time -from typing import Optional import numpy as np from loguru import logger @@ -135,7 +134,7 @@ class SileroVADAnalyzer(VADAnalyzer): with automatic model state management and periodic resets. """ - def __init__(self, *, sample_rate: Optional[int] = None, params: Optional[VADParams] = None): + def __init__(self, *, sample_rate: int | None = None, params: VADParams | None = None): """Initialize the Silero VAD analyzer. Args: diff --git a/src/pipecat/audio/vad/vad_analyzer.py b/src/pipecat/audio/vad/vad_analyzer.py index c519b0861..32b879957 100644 --- a/src/pipecat/audio/vad/vad_analyzer.py +++ b/src/pipecat/audio/vad/vad_analyzer.py @@ -15,7 +15,6 @@ import asyncio from abc import ABC, abstractmethod from concurrent.futures import ThreadPoolExecutor from enum import Enum -from typing import Optional from loguru import logger from pydantic import BaseModel @@ -68,7 +67,7 @@ class VADAnalyzer(ABC): Subclasses must implement the core voice confidence calculation. """ - def __init__(self, *, sample_rate: Optional[int] = None, params: Optional[VADParams] = None): + def __init__(self, *, sample_rate: int | None = None, params: VADParams | None = None): """Initialize the VAD analyzer. Args: diff --git a/src/pipecat/audio/vad/vad_controller.py b/src/pipecat/audio/vad/vad_controller.py index fefe3bec1..2f16a0c62 100644 --- a/src/pipecat/audio/vad/vad_controller.py +++ b/src/pipecat/audio/vad/vad_controller.py @@ -12,7 +12,6 @@ and emit events when speech starts, stops, or is actively detected. import asyncio import time -from typing import Optional, Type from loguru import logger @@ -90,7 +89,7 @@ class VADController(BaseObject): self._vad_analyzer = vad_analyzer self._vad_state: VADState = VADState.QUIET - self._task_manager: Optional[BaseTaskManager] = None + self._task_manager: BaseTaskManager | None = None # Last time a on_speech_activity was triggered. self._speech_activity_time = 0 @@ -102,7 +101,7 @@ class VADController(BaseObject): # while in SPEAKING state (e.g. user mutes mic mid-speech). self._last_audio_time: float = 0.0 self._audio_idle_timeout = audio_idle_timeout - self._audio_idle_task: Optional[asyncio.Task] = None + self._audio_idle_task: asyncio.Task | None = None self._register_event_handler("on_speech_started", sync=True) self._register_event_handler("on_speech_stopped", sync=True) @@ -234,7 +233,7 @@ class VADController(BaseObject): """ await self._call_event_handler("on_push_frame", frame, direction) - async def broadcast_frame(self, frame_cls: Type[Frame], **kwargs): + async def broadcast_frame(self, frame_cls: type[Frame], **kwargs): """Request a frame to be broadcast upstream and downstream. This emits an on_broadcast_frame event that must be handled by a processor diff --git a/src/pipecat/extensions/ivr/ivr_navigator.py b/src/pipecat/extensions/ivr/ivr_navigator.py index a2ff0cde5..f6b36655b 100644 --- a/src/pipecat/extensions/ivr/ivr_navigator.py +++ b/src/pipecat/extensions/ivr/ivr_navigator.py @@ -11,7 +11,6 @@ using LLM-based decision making and DTMF tone generation. """ from enum import Enum -from typing import List, Optional from loguru import logger @@ -72,7 +71,7 @@ class IVRProcessor(FrameProcessor): *, classifier_prompt: str, ivr_prompt: str, - ivr_vad_params: Optional[VADParams] = None, + ivr_vad_params: VADParams | None = None, ): """Initialize the IVR processor. @@ -88,7 +87,7 @@ class IVRProcessor(FrameProcessor): self._classifier_prompt = classifier_prompt # Store saved context messages - self._saved_messages: List[dict] = [] + self._saved_messages: list[dict] = [] # XML pattern aggregation self._aggregator = PatternPairAggregator() @@ -98,7 +97,7 @@ class IVRProcessor(FrameProcessor): self._register_event_handler("on_conversation_detected") self._register_event_handler("on_ivr_status_changed") - def update_saved_messages(self, messages: List[dict]) -> None: + def update_saved_messages(self, messages: list[dict]) -> None: """Update the saved context messages. Sets the messages that are saved when switching between @@ -109,7 +108,7 @@ class IVRProcessor(FrameProcessor): """ self._saved_messages = messages - def _get_conversation_history(self) -> List[dict]: + def _get_conversation_history(self) -> list[dict]: """Get saved context messages without the system message. Returns: @@ -409,7 +408,7 @@ Remember: Respond with `NUMBER` (single or multiple for sequences), *, llm: LLMService, ivr_prompt: str, - ivr_vad_params: Optional[VADParams] = None, + ivr_vad_params: VADParams | None = None, ): """Initialize the IVR navigator. diff --git a/src/pipecat/extensions/voicemail/voicemail_detector.py b/src/pipecat/extensions/voicemail/voicemail_detector.py index 470f5dd54..3ab7f2f7d 100644 --- a/src/pipecat/extensions/voicemail/voicemail_detector.py +++ b/src/pipecat/extensions/voicemail/voicemail_detector.py @@ -16,7 +16,6 @@ Note: """ import asyncio -from typing import List, Optional from loguru import logger @@ -71,7 +70,7 @@ class NotifierGate(FrameProcessor): self._notifier = notifier self._task_name = task_name self._gate_opened = True - self._gate_task: Optional[asyncio.Task] = None + self._gate_task: asyncio.Task | None = None async def setup(self, setup: FrameProcessorSetup): """Set up the processor with required components. @@ -143,7 +142,7 @@ class ClassifierGate(NotifierGate): super().__init__(gate_notifier, task_name="classifier_gate") self._conversation_notifier = conversation_notifier self._conversation_detected = False - self._conversation_task: Optional[asyncio.Task] = None + self._conversation_task: asyncio.Task | None = None async def setup(self, setup: FrameProcessorSetup): """Set up the processor with required components. @@ -267,7 +266,7 @@ class ClassificationProcessor(FrameProcessor): # Voicemail timing state self._voicemail_detected = False - self._voicemail_task: Optional[asyncio.Task] = None + self._voicemail_task: asyncio.Task | None = None self._voicemail_event = asyncio.Event() self._voicemail_event.set() @@ -390,7 +389,7 @@ class ClassificationProcessor(FrameProcessor): self._voicemail_event.wait(), timeout=self._voicemail_response_delay ) await asyncio.sleep(0.1) - except asyncio.TimeoutError: + except TimeoutError: await self._call_event_handler("on_voicemail_detected") break @@ -423,10 +422,10 @@ class TTSGate(FrameProcessor): super().__init__() self._conversation_notifier = conversation_notifier self._voicemail_notifier = voicemail_notifier - self._frame_buffer: List[tuple[Frame, FrameDirection]] = [] + self._frame_buffer: list[tuple[Frame, FrameDirection]] = [] self._gating_active = True - self._conversation_task: Optional[asyncio.Task] = None - self._voicemail_task: Optional[asyncio.Task] = None + self._conversation_task: asyncio.Task | None = None + self._voicemail_task: asyncio.Task | None = None async def setup(self, setup: FrameProcessorSetup): """Set up the processor with required components. @@ -591,7 +590,7 @@ VOICEMAIL SYSTEM (respond "VOICEMAIL"): *, llm: LLMService, voicemail_response_delay: float = 2.0, - custom_system_prompt: Optional[str] = None, + custom_system_prompt: str | None = None, ): """Initialize the voicemail detector with classification and buffering components. diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py index 21780d6cb..7fd215caf 100644 --- a/src/pipecat/frames/frames.py +++ b/src/pipecat/frames/frames.py @@ -11,20 +11,16 @@ including data frames, system frames, and control frames for audio, video, text, and LLM processing. """ +from __future__ import annotations + import time +from collections.abc import Awaitable, Callable, Mapping, Sequence from dataclasses import dataclass, field from typing import ( TYPE_CHECKING, Any, - Awaitable, - Callable, - Dict, - List, Literal, - Mapping, Optional, - Sequence, - Tuple, ) from pipecat.adapters.schemas.tools_schema import ToolsSchema @@ -45,7 +41,7 @@ if TYPE_CHECKING: from pipecat.utils.tracing.tracing_context import TracingContext -def format_pts(pts: Optional[int]): +def format_pts(pts: int | None): """Format presentation timestamp (PTS) in nanoseconds to a human-readable string. Converts a PTS value in nanoseconds to a string representation. @@ -77,20 +73,20 @@ class Frame: id: int = field(init=False) name: str = field(init=False) - pts: Optional[int] = field(init=False) - broadcast_sibling_id: Optional[int] = field(init=False) - metadata: Dict[str, Any] = field(init=False) - transport_source: Optional[str] = field(init=False) - transport_destination: Optional[str] = field(init=False) + pts: int | None = field(init=False) + broadcast_sibling_id: int | None = field(init=False) + metadata: dict[str, Any] = field(init=False) + transport_source: str | None = field(init=False) + transport_destination: str | None = field(init=False) def __post_init__(self): self.id: int = obj_id() self.name: str = f"{self.__class__.__name__}#{obj_count(self)}" - self.pts: Optional[int] = None - self.broadcast_sibling_id: Optional[int] = None - self.metadata: Dict[str, Any] = {} - self.transport_source: Optional[str] = None - self.transport_destination: Optional[str] = None + self.pts: int | None = None + self.broadcast_sibling_id: int | None = None + self.metadata: dict[str, Any] = {} + self.transport_source: str | None = None + self.transport_destination: str | None = None def __str__(self): return self.name @@ -183,8 +179,8 @@ class ImageRawFrame: """ image: bytes - size: Tuple[int, int] - format: Optional[str] + size: tuple[int, int] + format: str | None # @@ -242,7 +238,7 @@ class TTSAudioRawFrame(OutputAudioRawFrame): context_id: Unique identifier for the TTS context that generated this audio. """ - context_id: Optional[str] = None + context_id: str | None = None @dataclass @@ -268,7 +264,7 @@ class URLImageRawFrame(OutputImageRawFrame): url: URL where the image can be downloaded from. """ - url: Optional[str] = None + url: str | None = None def __str__(self): pts = format_pts(self.pts) @@ -287,7 +283,7 @@ class SpriteFrame(DataFrame): images: List of image frames that make up the sprite animation. """ - images: List[OutputImageRawFrame] + images: list[OutputImageRawFrame] def __str__(self): pts = format_pts(self.pts) @@ -312,7 +308,7 @@ class TextFrame(DataFrame): """ text: str - skip_tts: Optional[bool] = field(init=False) + skip_tts: bool | None = field(init=False) # Whether any necessary inter-frame (leading/trailing) spaces are already # included in the text. # NOTE: Ideally this would be available at init time with a default value, @@ -357,7 +353,7 @@ class AggregatedTextFrame(TextFrame): """ aggregated_by: AggregationType | str - context_id: Optional[str] = None + context_id: str | None = None @dataclass @@ -375,7 +371,7 @@ class TTSTextFrame(AggregatedTextFrame): context_id: Unique identifier for the TTS context that generated this text. """ - context_id: Optional[str] = None + context_id: str | None = None @dataclass @@ -396,8 +392,8 @@ class TranscriptionFrame(TextFrame): user_id: str timestamp: str - language: Optional[Language] = None - result: Optional[Any] = None + language: Language | None = None + result: Any | None = None finalized: bool = False def __str__(self): @@ -422,8 +418,8 @@ class InterimTranscriptionFrame(TextFrame): text: str user_id: str timestamp: str - language: Optional[Language] = None - result: Optional[Any] = None + language: Language | None = None + result: Any | None = None def __str__(self): return f"{self.name}(user: {self.user_id}, text: [{self.text}], language: {self.language}, timestamp: {self.timestamp})" @@ -444,7 +440,7 @@ class TranslationFrame(TextFrame): user_id: str timestamp: str - language: Optional[Language] = None + language: Language | None = None def __str__(self): return f"{self.name}(user: {self.user_id}, text: [{self.text}], language: {self.language}, timestamp: {self.timestamp})" @@ -472,7 +468,7 @@ class LLMContextFrame(Frame): context: The LLM context containing messages, tools, and configuration. """ - context: "LLMContext" + context: LLMContext @dataclass @@ -489,7 +485,7 @@ class LLMThoughtStartFrame(ControlFrame): """ append_to_context: bool = False - llm: Optional[str] = None + llm: str | None = None def __post_init__(self): super().__post_init__() @@ -567,8 +563,8 @@ class LLMMessagesAppendFrame(DataFrame): run_llm: Whether the context update should be sent to the LLM. """ - messages: List[dict] - run_llm: Optional[bool] = None + messages: list[dict] + run_llm: bool | None = None @dataclass @@ -583,8 +579,8 @@ class LLMMessagesUpdateFrame(DataFrame): run_llm: Whether the context update should be sent to the LLM. """ - messages: List[dict] - run_llm: Optional[bool] = None + messages: list[dict] + run_llm: bool | None = None @dataclass @@ -600,8 +596,8 @@ class LLMMessagesTransformFrame(DataFrame): run_llm: Whether the context update should be sent to the LLM. """ - transform: Callable[[List["LLMContextMessage"]], List["LLMContextMessage"]] - run_llm: Optional[bool] = None + transform: Callable[[list[LLMContextMessage]], list[LLMContextMessage]] + run_llm: bool | None = None @dataclass @@ -616,7 +612,7 @@ class LLMSetToolsFrame(DataFrame): tools: List of tool/function definitions for the LLM. """ - tools: List[dict] | ToolsSchema | "NotGiven" + tools: list[dict] | ToolsSchema | NotGiven @dataclass @@ -668,8 +664,8 @@ class FunctionCallResultProperties: Only meaningful for async function calls (``cancel_on_interruption=False``). """ - run_llm: Optional[bool] = None - on_context_updated: Optional[Callable[[], Awaitable[None]]] = None + run_llm: bool | None = None + on_context_updated: Callable[[], Awaitable[None]] | None = None is_final: bool = True @@ -694,8 +690,8 @@ class FunctionCallResultFrame(DataFrame, UninterruptibleFrame): tool_call_id: str arguments: Any result: Any - run_llm: Optional[bool] = None - properties: Optional[FunctionCallResultProperties] = None + run_llm: bool | None = None + properties: FunctionCallResultProperties | None = None @dataclass @@ -711,7 +707,7 @@ class TTSSpeakFrame(DataFrame): """ text: str - append_to_context: Optional[bool] = None + append_to_context: bool | None = None @dataclass @@ -752,8 +748,8 @@ class OutputDTMFFrame(DTMFFrame, DataFrame): :meth:`from_string` to build this from a string like ``"123#"``. """ - button: Optional[KeypadEntry] = None - buttons: Optional[List[KeypadEntry]] = None + button: KeypadEntry | None = None + buttons: list[KeypadEntry] | None = None def __post_init__(self): super().__post_init__() @@ -766,7 +762,7 @@ class OutputDTMFFrame(DTMFFrame, DataFrame): return f"{self.name}(buttons: {self.to_string()})" @classmethod - def from_string(cls, buttons: str, **kwargs) -> "OutputDTMFFrame": + def from_string(cls, buttons: str, **kwargs) -> OutputDTMFFrame: """Build an ``OutputDTMFFrame`` from a string of DTMF characters. Args: @@ -820,7 +816,7 @@ class StartFrame(SystemFrame): enable_tracing: bool = False enable_usage_metrics: bool = False report_only_initial_ttfb: bool = False - tracing_context: Optional["TracingContext"] = None + tracing_context: TracingContext | None = None @dataclass @@ -834,7 +830,7 @@ class CancelFrame(SystemFrame): reason: Optional reason for pushing a cancel frame. """ - reason: Optional[Any] = None + reason: Any | None = None def __str__(self): return f"{self.name}(reason: {self.reason})" @@ -857,8 +853,8 @@ class ErrorFrame(SystemFrame): error: str fatal: bool = False - processor: Optional["FrameProcessor"] = None - exception: Optional[Exception] = None + processor: FrameProcessor | None = None + exception: Exception | None = None def __str__(self): return f"{self.name}(error: {self.error}, fatal: {self.fatal})" @@ -891,7 +887,7 @@ class FrameProcessorPauseUrgentFrame(SystemFrame): processor: The frame processor to pause. """ - processor: "FrameProcessor" + processor: FrameProcessor @dataclass @@ -906,7 +902,7 @@ class FrameProcessorResumeUrgentFrame(SystemFrame): processor: The frame processor to resume. """ - processor: "FrameProcessor" + processor: FrameProcessor @dataclass @@ -1050,7 +1046,7 @@ class MetricsFrame(SystemFrame): data: List of metrics data collected by the processor. """ - data: List[MetricsData] + data: list[MetricsData] @dataclass @@ -1156,12 +1152,12 @@ class UserImageRequestFrame(SystemFrame): """ user_id: str - text: Optional[str] = None - append_to_context: Optional[bool] = None - video_source: Optional[str] = None - function_name: Optional[str] = None - tool_call_id: Optional[str] = None - result_callback: Optional[Any] = None + text: str | None = None + append_to_context: bool | None = None + video_source: str | None = None + function_name: str | None = None + tool_call_id: str | None = None + result_callback: Any | None = None def __str__(self): return f"{self.name}(user: {self.user_id}, text: {self.text}, append_to_context: {self.append_to_context}, {self.video_source})" @@ -1244,9 +1240,9 @@ class UserImageRawFrame(InputImageRawFrame): """ user_id: str = "" - text: Optional[str] = None - append_to_context: Optional[bool] = None - request: Optional[UserImageRequestFrame] = None + text: str | None = None + append_to_context: bool | None = None + request: UserImageRequestFrame | None = None def __str__(self): pts = format_pts(self.pts) @@ -1266,8 +1262,8 @@ class AssistantImageRawFrame(OutputImageRawFrame): original_mime_type: The MIME type of the original image data. """ - original_data: Optional[bytes] = None - original_mime_type: Optional[str] = None + original_data: bytes | None = None + original_mime_type: str | None = None @dataclass @@ -1296,8 +1292,8 @@ class OutputDTMFUrgentFrame(DTMFFrame, SystemFrame): :meth:`from_string` to build this from a string like ``"123#"``. """ - button: Optional[KeypadEntry] = None - buttons: Optional[List[KeypadEntry]] = None + button: KeypadEntry | None = None + buttons: list[KeypadEntry] | None = None def __post_init__(self): super().__post_init__() @@ -1310,7 +1306,7 @@ class OutputDTMFUrgentFrame(DTMFFrame, SystemFrame): return f"{self.name}(buttons: {self.to_string()})" @classmethod - def from_string(cls, buttons: str, **kwargs) -> "OutputDTMFUrgentFrame": + def from_string(cls, buttons: str, **kwargs) -> OutputDTMFUrgentFrame: """Build an ``OutputDTMFUrgentFrame`` from a string of DTMF characters. Args: @@ -1349,8 +1345,8 @@ class SpeechControlParamsFrame(SystemFrame): turn_params: Current turn-taking analysis parameters. """ - vad_params: Optional[VADParams] = None - turn_params: Optional[BaseTurnParams] = None + vad_params: VADParams | None = None + turn_params: BaseTurnParams | None = None @dataclass @@ -1396,7 +1392,7 @@ class ServiceSwitcherRequestMetadataFrame(ControlFrame): service: The target service that should re-emit its metadata. """ - service: "FrameProcessor" + service: FrameProcessor # @@ -1444,7 +1440,7 @@ class EndTaskFrame(TaskFrame, UninterruptibleFrame): reason: Optional reason for pushing an end frame. """ - reason: Optional[Any] = None + reason: Any | None = None def __str__(self): return f"{self.name}(reason: {self.reason})" @@ -1475,7 +1471,7 @@ class CancelTaskFrame(TaskSystemFrame): reason: Optional reason for pushing a cancel frame. """ - reason: Optional[Any] = None + reason: Any | None = None def __str__(self): return f"{self.name}(reason: {self.reason})" @@ -1516,7 +1512,7 @@ class EndFrame(ControlFrame, UninterruptibleFrame): reason: Optional reason for pushing an end frame. """ - reason: Optional[Any] = None + reason: Any | None = None def __str__(self): return f"{self.name}(reason: {self.reason})" @@ -1598,7 +1594,7 @@ class FrameProcessorPauseFrame(ControlFrame): processor: The frame processor to pause. """ - processor: "FrameProcessor" + processor: FrameProcessor @dataclass @@ -1613,7 +1609,7 @@ class FrameProcessorResumeFrame(ControlFrame): processor: The frame processor to resume. """ - processor: "FrameProcessor" + processor: FrameProcessor @dataclass @@ -1624,7 +1620,7 @@ class LLMFullResponseStartFrame(ControlFrame): more TextFrames and a final LLMFullResponseEndFrame. """ - skip_tts: Optional[bool] = field(init=False) + skip_tts: bool | None = field(init=False) def __post_init__(self): super().__post_init__() @@ -1635,7 +1631,7 @@ class LLMFullResponseStartFrame(ControlFrame): class LLMFullResponseEndFrame(ControlFrame): """Frame indicating the end of an LLM response.""" - skip_tts: Optional[bool] = field(init=False) + skip_tts: bool | None = field(init=False) def __post_init__(self): super().__post_init__() @@ -1665,7 +1661,7 @@ class LLMSummarizeContextFrame(ControlFrame): is used. """ - config: Optional["LLMContextSummaryConfig"] = None + config: LLMContextSummaryConfig | None = None @dataclass @@ -1692,11 +1688,11 @@ class LLMContextSummaryRequestFrame(ControlFrame): """ request_id: str - context: "LLMContext" + context: LLMContext min_messages_to_keep: int target_context_tokens: int summarization_prompt: str - summarization_timeout: Optional[float] = None + summarization_timeout: float | None = None @dataclass @@ -1718,7 +1714,7 @@ class LLMContextSummaryResultFrame(ControlFrame, UninterruptibleFrame): request_id: str summary: str last_summarized_index: int - error: Optional[str] = None + error: str | None = None @dataclass @@ -1745,7 +1741,7 @@ class FunctionCallInProgressFrame(ControlFrame, UninterruptibleFrame): tool_call_id: str arguments: Any cancel_on_interruption: bool = False - group_id: Optional[str] = None + group_id: str | None = None @dataclass @@ -1781,7 +1777,7 @@ class TTSStartedFrame(ControlFrame): context_id: Unique identifier for this TTS context. """ - context_id: Optional[str] = None + context_id: str | None = None @dataclass @@ -1792,7 +1788,7 @@ class TTSStoppedFrame(ControlFrame): context_id: Unique identifier for this TTS context. """ - context_id: Optional[str] = None + context_id: str | None = None @dataclass @@ -1817,8 +1813,8 @@ class ServiceUpdateSettingsFrame(ControlFrame, UninterruptibleFrame): """ settings: Mapping[str, Any] = field(default_factory=dict) - delta: Optional["ServiceSettings"] = None - service: Optional["FrameProcessor"] = None + delta: ServiceSettings | None = None + service: FrameProcessor | None = None @dataclass @@ -1942,4 +1938,4 @@ class ManuallySwitchServiceFrame(ServiceSwitcherFrame): Handled by ServiceSwitcherStrategyManual to switch the active service. """ - service: "FrameProcessor" + service: FrameProcessor diff --git a/src/pipecat/metrics/metrics.py b/src/pipecat/metrics/metrics.py index 2030306e5..5d0dbddc8 100644 --- a/src/pipecat/metrics/metrics.py +++ b/src/pipecat/metrics/metrics.py @@ -11,8 +11,6 @@ collected throughout the pipeline, including timing, token usage, and processing statistics. """ -from typing import Optional - from pydantic import BaseModel @@ -25,7 +23,7 @@ class MetricsData(BaseModel): """ processor: str - model: Optional[str] = None + model: str | None = None class TTFBMetricsData(MetricsData): @@ -62,9 +60,9 @@ class LLMTokenUsage(BaseModel): prompt_tokens: int completion_tokens: int total_tokens: int - cache_read_input_tokens: Optional[int] = None - cache_creation_input_tokens: Optional[int] = None - reasoning_tokens: Optional[int] = None + cache_read_input_tokens: int | None = None + cache_creation_input_tokens: int | None = None + reasoning_tokens: int | None = None class LLMUsageMetricsData(MetricsData): diff --git a/src/pipecat/observers/base_observer.py b/src/pipecat/observers/base_observer.py index 70c79224a..69d81f0fe 100644 --- a/src/pipecat/observers/base_observer.py +++ b/src/pipecat/observers/base_observer.py @@ -12,8 +12,7 @@ for logging, debugging, analytics, and monitoring pipeline behavior. """ from dataclasses import dataclass - -from typing_extensions import TYPE_CHECKING +from typing import TYPE_CHECKING from pipecat.frames.frames import Frame from pipecat.utils.base_object import BaseObject diff --git a/src/pipecat/observers/loggers/debug_log_observer.py b/src/pipecat/observers/loggers/debug_log_observer.py index c5704a33b..1267dd0bf 100644 --- a/src/pipecat/observers/loggers/debug_log_observer.py +++ b/src/pipecat/observers/loggers/debug_log_observer.py @@ -13,7 +13,6 @@ understanding frame flow between processors. from dataclasses import fields, is_dataclass from enum import Enum, auto -from typing import Dict, Optional, Set, Tuple, Type, Union from loguru import logger @@ -75,10 +74,10 @@ class DebugLogObserver(BaseObserver): def __init__( self, - frame_types: Optional[ - Union[Tuple[Type[Frame], ...], Dict[Type[Frame], Optional[Tuple[Type, FrameEndpoint]]]] - ] = None, - exclude_fields: Optional[Set[str]] = None, + frame_types: tuple[type[Frame], ...] + | dict[type[Frame], tuple[type, FrameEndpoint] | None] + | None = None, + exclude_fields: set[str] | None = None, **kwargs, ): """Initialize the debug log observer. diff --git a/src/pipecat/observers/loggers/metrics_log_observer.py b/src/pipecat/observers/loggers/metrics_log_observer.py index 7f4c1635c..8b3072894 100644 --- a/src/pipecat/observers/loggers/metrics_log_observer.py +++ b/src/pipecat/observers/loggers/metrics_log_observer.py @@ -11,8 +11,6 @@ allowing developers to monitor performance metrics, token usage, and other statistics in real-time. """ -from typing import Optional, Set, Type - from loguru import logger from pipecat.frames.frames import MetricsFrame @@ -60,7 +58,7 @@ class MetricsLogObserver(BaseObserver): def __init__( self, - include_metrics: Optional[Set[Type[MetricsData]]] = None, + include_metrics: set[type[MetricsData]] | None = None, **kwargs, ): """Initialize the metrics log observer. diff --git a/src/pipecat/observers/startup_timing_observer.py b/src/pipecat/observers/startup_timing_observer.py index a1ea04d47..6c6eb8204 100644 --- a/src/pipecat/observers/startup_timing_observer.py +++ b/src/pipecat/observers/startup_timing_observer.py @@ -36,7 +36,6 @@ Example:: import time from dataclasses import dataclass -from typing import Dict, List, Optional, Tuple, Type from pydantic import BaseModel, Field @@ -84,7 +83,7 @@ class StartupTimingReport(BaseModel): start_time: float total_duration_secs: float - processor_timings: List[ProcessorStartupTiming] = Field(default_factory=list) + processor_timings: list[ProcessorStartupTiming] = Field(default_factory=list) class TransportTimingReport(BaseModel): @@ -98,8 +97,8 @@ class TransportTimingReport(BaseModel): """ start_time: float - bot_connected_secs: Optional[float] = None - client_connected_secs: Optional[float] = None + bot_connected_secs: float | None = None + client_connected_secs: float | None = None class StartupTimingObserver(BaseObserver): @@ -157,7 +156,7 @@ class StartupTimingObserver(BaseObserver): def __init__( self, *, - processor_types: Optional[Tuple[Type[FrameProcessor], ...]] = None, + processor_types: tuple[type[FrameProcessor], ...] | None = None, **kwargs, ): """Initialize the startup timing observer. @@ -171,13 +170,13 @@ class StartupTimingObserver(BaseObserver): self._processor_types = processor_types # Map processor ID -> arrival info. - self._arrivals: Dict[int, _ArrivalInfo] = {} + self._arrivals: dict[int, _ArrivalInfo] = {} # Collected timings in pipeline order. - self._timings: List[ProcessorStartupTiming] = [] + self._timings: list[ProcessorStartupTiming] = [] # Lock onto the first StartFrame we see (by frame ID). - self._start_frame_id: Optional[str] = None + self._start_frame_id: str | None = None # Whether we've already emitted the startup timing report. self._startup_timing_reported = False @@ -186,13 +185,13 @@ class StartupTimingObserver(BaseObserver): self._transport_timing_reported = False # Timestamp (ns) when we first see a StartFrame arrive at a processor. - self._start_frame_arrival_ns: Optional[int] = None + self._start_frame_arrival_ns: int | None = None # Bot connected timing (stored for inclusion in the transport report). - self._bot_connected_secs: Optional[float] = None + self._bot_connected_secs: float | None = None # Wall clock time when the StartFrame was first seen. - self._start_wall_clock: Optional[float] = None + self._start_wall_clock: float | None = None self._register_event_handler("on_startup_timing_report") self._register_event_handler("on_transport_timing_report") diff --git a/src/pipecat/observers/user_bot_latency_observer.py b/src/pipecat/observers/user_bot_latency_observer.py index 0672b689c..5e4084406 100644 --- a/src/pipecat/observers/user_bot_latency_observer.py +++ b/src/pipecat/observers/user_bot_latency_observer.py @@ -14,7 +14,6 @@ is measured. Optionally collects per-service latency breakdown metrics import time from collections import deque -from typing import Dict, List, Optional from pydantic import BaseModel, Field @@ -48,7 +47,7 @@ class TTFBBreakdownMetrics(BaseModel): """ processor: str - model: Optional[str] = None + model: str | None = None start_time: float duration_secs: float @@ -105,13 +104,13 @@ class LatencyBreakdown(BaseModel): this cycle. Empty if no function calls occurred. """ - ttfb: List[TTFBBreakdownMetrics] = Field(default_factory=list) - text_aggregation: Optional[TextAggregationBreakdownMetrics] = None - user_turn_start_time: Optional[float] = None - user_turn_secs: Optional[float] = None - function_calls: List[FunctionCallMetrics] = Field(default_factory=list) + ttfb: list[TTFBBreakdownMetrics] = Field(default_factory=list) + text_aggregation: TextAggregationBreakdownMetrics | None = None + user_turn_start_time: float | None = None + user_turn_secs: float | None = None + function_calls: list[FunctionCallMetrics] = Field(default_factory=list) - def chronological_events(self) -> List[str]: + def chronological_events(self) -> list[str]: """Return human-readable event labels sorted by start time. Collects all sub-metrics into a flat list, sorts by ``start_time``, @@ -120,7 +119,7 @@ class LatencyBreakdown(BaseModel): Returns: List of formatted strings, one per event, in chronological order. """ - events: List[tuple] = [] + events: list[tuple] = [] if self.user_turn_start_time is not None and self.user_turn_secs is not None: events.append((self.user_turn_start_time, f"User turn: {self.user_turn_secs:.3f}s")) @@ -181,12 +180,12 @@ class UserBotLatencyObserver(BaseObserver): **kwargs: Additional arguments passed to parent class. """ super().__init__(**kwargs) - self._user_stopped_time: Optional[float] = None - self._user_turn_start_time: Optional[float] = None - self._user_turn: Optional[float] = None + self._user_stopped_time: float | None = None + self._user_turn_start_time: float | None = None + self._user_turn: float | None = None # First bot speech tracking - self._client_connected_time: Optional[float] = None + self._client_connected_time: float | None = None self._first_bot_speech_measured: bool = False # Frame deduplication (bounded deque + set pattern) @@ -194,10 +193,10 @@ class UserBotLatencyObserver(BaseObserver): self._frame_history: deque = deque(maxlen=max_frames) # Per-cycle metric accumulators - self._ttfb: List[TTFBBreakdownMetrics] = [] - self._text_aggregation: Optional[TextAggregationBreakdownMetrics] = None - self._function_call_starts: Dict[str, tuple[str, float]] = {} - self._function_call_metrics: List[FunctionCallMetrics] = [] + self._ttfb: list[TTFBBreakdownMetrics] = [] + self._text_aggregation: TextAggregationBreakdownMetrics | None = None + self._function_call_starts: dict[str, tuple[str, float]] = {} + self._function_call_metrics: list[FunctionCallMetrics] = [] self._register_event_handler("on_latency_measured") self._register_event_handler("on_latency_breakdown") diff --git a/src/pipecat/pipeline/base_task.py b/src/pipecat/pipeline/base_task.py index 788342482..b5ba06645 100644 --- a/src/pipecat/pipeline/base_task.py +++ b/src/pipecat/pipeline/base_task.py @@ -12,8 +12,8 @@ tasks that manage the lifecycle and execution of frame processing pipelines. import asyncio from abc import abstractmethod +from collections.abc import AsyncIterable, Iterable from dataclasses import dataclass -from typing import AsyncIterable, Iterable from pipecat.frames.frames import Frame from pipecat.utils.base_object import BaseObject diff --git a/src/pipecat/pipeline/llm_switcher.py b/src/pipecat/pipeline/llm_switcher.py index d65d50d08..71a7c7974 100644 --- a/src/pipecat/pipeline/llm_switcher.py +++ b/src/pipecat/pipeline/llm_switcher.py @@ -6,7 +6,7 @@ """LLM switcher for switching between different LLMs at runtime, with different switching strategies.""" -from typing import Any, List, Optional, Type +from typing import Any from pipecat.adapters.schemas.direct_function import DirectFunction from pipecat.pipeline.service_switcher import ( @@ -28,8 +28,8 @@ class LLMSwitcher(ServiceSwitcher[StrategyType]): def __init__( self, - llms: List[LLMService], - strategy_type: Type[StrategyType] = ServiceSwitcherStrategyManual, + llms: list[LLMService], + strategy_type: type[StrategyType] = ServiceSwitcherStrategyManual, ): """Initialize the service switcher with a list of LLMs and a switching strategy. @@ -41,7 +41,7 @@ class LLMSwitcher(ServiceSwitcher[StrategyType]): super().__init__(llms, strategy_type) @property - def llms(self) -> List[LLMService]: + def llms(self) -> list[LLMService]: """Get the list of LLMs managed by this switcher. Returns: @@ -58,7 +58,7 @@ class LLMSwitcher(ServiceSwitcher[StrategyType]): """ return self.strategy.active_service - async def run_inference(self, context: LLMContext, **kwargs) -> Optional[str]: + async def run_inference(self, context: LLMContext, **kwargs) -> str | None: """Run a one-shot, out-of-band (i.e. out-of-pipeline) inference with the given LLM context, using the currently active LLM. Args: @@ -75,11 +75,11 @@ class LLMSwitcher(ServiceSwitcher[StrategyType]): def register_function( self, - function_name: Optional[str], + function_name: str | None, handler: Any, *, cancel_on_interruption: bool = True, - timeout_secs: Optional[float] = None, + timeout_secs: float | None = None, ): """Register a function handler for LLM function calls, on all LLMs, active or not. @@ -105,7 +105,7 @@ class LLMSwitcher(ServiceSwitcher[StrategyType]): handler: DirectFunction, *, cancel_on_interruption: bool = True, - timeout_secs: Optional[float] = None, + timeout_secs: float | None = None, ): """Register a direct function handler for LLM function calls, on all LLMs, active or not. diff --git a/src/pipecat/pipeline/parallel_pipeline.py b/src/pipecat/pipeline/parallel_pipeline.py index 1e2e03a8f..d92fdada9 100644 --- a/src/pipecat/pipeline/parallel_pipeline.py +++ b/src/pipecat/pipeline/parallel_pipeline.py @@ -12,7 +12,6 @@ handling of pipeline lifecycle events. """ from itertools import chain -from typing import Dict, List from loguru import logger @@ -51,7 +50,7 @@ class ParallelPipeline(BasePipeline): self._pipelines = [] self._seen_ids = set() - self._frame_counter: Dict[int, int] = {} + self._frame_counter: dict[int, int] = {} self._synchronizing: bool = False self._buffered_frames: list[tuple[Frame, FrameDirection]] = [] @@ -93,7 +92,7 @@ class ParallelPipeline(BasePipeline): return self._pipelines @property - def entry_processors(self) -> List["FrameProcessor"]: + def entry_processors(self) -> list["FrameProcessor"]: """Return the list of entry processors for this processor. Entry processors are the first processors in a compound processor @@ -106,7 +105,7 @@ class ParallelPipeline(BasePipeline): """ return self._pipelines - def processors_with_metrics(self) -> List[FrameProcessor]: + def processors_with_metrics(self) -> list[FrameProcessor]: """Collect processors that can generate metrics from all parallel branches. Returns: diff --git a/src/pipecat/pipeline/pipeline.py b/src/pipecat/pipeline/pipeline.py index 9114b9b09..325cedb82 100644 --- a/src/pipecat/pipeline/pipeline.py +++ b/src/pipecat/pipeline/pipeline.py @@ -11,7 +11,7 @@ in sequence and manages frame flow between them, along with helper classes for pipeline source and sink operations. """ -from typing import Callable, Coroutine, List, Optional +from collections.abc import Callable, Coroutine from pipecat.frames.frames import Frame from pipecat.pipeline.base_pipeline import BasePipeline @@ -98,10 +98,10 @@ class Pipeline(BasePipeline): def __init__( self, - processors: List[FrameProcessor], + processors: list[FrameProcessor], *, - source: Optional[FrameProcessor] = None, - sink: Optional[FrameProcessor] = None, + source: FrameProcessor | None = None, + sink: FrameProcessor | None = None, ): """Initialize the pipeline with a list of processors. @@ -116,7 +116,7 @@ class Pipeline(BasePipeline): # downstream outside of the pipeline. self._source = source or PipelineSource(self.push_frame, name=f"{self}::Source") self._sink = sink or PipelineSink(self.push_frame, name=f"{self}::Sink") - self._processors: List[FrameProcessor] = [self._source] + processors + [self._sink] + self._processors: list[FrameProcessor] = [self._source] + processors + [self._sink] self._link_processors() @@ -137,7 +137,7 @@ class Pipeline(BasePipeline): return self._processors @property - def entry_processors(self) -> List["FrameProcessor"]: + def entry_processors(self) -> list["FrameProcessor"]: """Return the list of entry processors for this processor. Entry processors are the first processors in a compound processor diff --git a/src/pipecat/pipeline/runner.py b/src/pipecat/pipeline/runner.py index b584377c6..db1767866 100644 --- a/src/pipecat/pipeline/runner.py +++ b/src/pipecat/pipeline/runner.py @@ -14,7 +14,6 @@ management. import asyncio import gc import signal -from typing import Optional from loguru import logger @@ -34,11 +33,11 @@ class PipelineRunner(BaseObject): def __init__( self, *, - name: Optional[str] = None, + name: str | None = None, handle_sigint: bool = True, handle_sigterm: bool = False, force_gc: bool = False, - loop: Optional[asyncio.AbstractEventLoop] = None, + loop: asyncio.AbstractEventLoop | None = None, ): """Initialize the pipeline runner. diff --git a/src/pipecat/pipeline/service_switcher.py b/src/pipecat/pipeline/service_switcher.py index 9d2e2a56e..a7f98f9b1 100644 --- a/src/pipecat/pipeline/service_switcher.py +++ b/src/pipecat/pipeline/service_switcher.py @@ -6,7 +6,7 @@ """Service switcher for switching between different services at runtime, with different switching strategies.""" -from typing import Any, Generic, List, Optional, Type, TypeVar +from typing import Any, Generic, TypeVar from loguru import logger @@ -42,7 +42,7 @@ class ServiceSwitcherStrategy(BaseObject): ... """ - def __init__(self, services: List[FrameProcessor]): + def __init__(self, services: list[FrameProcessor]): """Initialize the service switcher strategy with a list of services. Note: @@ -62,7 +62,7 @@ class ServiceSwitcherStrategy(BaseObject): self._register_event_handler("on_service_switched") @property - def services(self) -> List[FrameProcessor]: + def services(self) -> list[FrameProcessor]: """Return the list of available services.""" return self._services @@ -73,7 +73,7 @@ class ServiceSwitcherStrategy(BaseObject): async def handle_frame( self, frame: ServiceSwitcherFrame, direction: FrameDirection - ) -> Optional[FrameProcessor]: + ) -> FrameProcessor | None: """Handle a frame that controls service switching. The base implementation returns ``None`` for all frames. Subclasses @@ -88,7 +88,7 @@ class ServiceSwitcherStrategy(BaseObject): """ return None - async def handle_error(self, error: ErrorFrame) -> Optional[FrameProcessor]: + async def handle_error(self, error: ErrorFrame) -> FrameProcessor | None: """Handle an error from the active service. Called by ``ServiceSwitcher`` when a non-fatal ``ErrorFrame`` is pushed @@ -103,7 +103,7 @@ class ServiceSwitcherStrategy(BaseObject): """ return None - async def _set_active_if_available(self, service: FrameProcessor) -> Optional[FrameProcessor]: + async def _set_active_if_available(self, service: FrameProcessor) -> FrameProcessor | None: """Set the active service to the given one, if it is in the list of available services. If it's not in the list, the request is ignored, as it may have been @@ -139,7 +139,7 @@ class ServiceSwitcherStrategyManual(ServiceSwitcherStrategy): async def handle_frame( self, frame: ServiceSwitcherFrame, direction: FrameDirection - ) -> Optional[FrameProcessor]: + ) -> FrameProcessor | None: """Handle a frame that controls service switching. Args: @@ -179,7 +179,7 @@ class ServiceSwitcherStrategyFailover(ServiceSwitcherStrategyManual): ... """ - async def handle_error(self, error: ErrorFrame) -> Optional[FrameProcessor]: + async def handle_error(self, error: ErrorFrame) -> FrameProcessor | None: """Handle an error from the active service by failing over. Switches to the next service in the list. The failed service remains @@ -223,8 +223,8 @@ class ServiceSwitcher(ParallelPipeline, Generic[StrategyType]): def __init__( self, - services: List[FrameProcessor], - strategy_type: Type[StrategyType] = ServiceSwitcherStrategyManual, + services: list[FrameProcessor], + strategy_type: type[StrategyType] = ServiceSwitcherStrategyManual, ): """Initialize the service switcher with a list of services and a switching strategy. @@ -244,14 +244,14 @@ class ServiceSwitcher(ParallelPipeline, Generic[StrategyType]): return self._strategy @property - def services(self) -> List[FrameProcessor]: + def services(self) -> list[FrameProcessor]: """Return the list of available services.""" return self._services @staticmethod def _make_pipeline_definitions( - services: List[FrameProcessor], strategy: ServiceSwitcherStrategy - ) -> List[Any]: + services: list[FrameProcessor], strategy: ServiceSwitcherStrategy + ) -> list[Any]: pipelines = [] for service in services: pipelines.append(ServiceSwitcher._make_pipeline_definition(service, strategy)) diff --git a/src/pipecat/pipeline/sync_parallel_pipeline.py b/src/pipecat/pipeline/sync_parallel_pipeline.py index 74cfdfdb9..b265e9eb3 100644 --- a/src/pipecat/pipeline/sync_parallel_pipeline.py +++ b/src/pipecat/pipeline/sync_parallel_pipeline.py @@ -20,7 +20,6 @@ import asyncio from dataclasses import dataclass from enum import Enum from itertools import chain -from typing import List from loguru import logger @@ -215,7 +214,7 @@ class SyncParallelPipeline(BasePipeline): return self._pipelines @property - def entry_processors(self) -> List["FrameProcessor"]: + def entry_processors(self) -> list["FrameProcessor"]: """Return the list of entry processors for this processor. Entry processors are the first processors in a compound processor @@ -228,7 +227,7 @@ class SyncParallelPipeline(BasePipeline): """ return [s["processor"] for s in self._sources] - def processors_with_metrics(self) -> List[FrameProcessor]: + def processors_with_metrics(self) -> list[FrameProcessor]: """Collect processors that can generate metrics from all parallel pipelines. Returns: diff --git a/src/pipecat/pipeline/task.py b/src/pipecat/pipeline/task.py index 1f9de78a6..394b3d2e2 100644 --- a/src/pipecat/pipeline/task.py +++ b/src/pipecat/pipeline/task.py @@ -14,8 +14,9 @@ including heartbeats, idle detection, and observer integration. import asyncio import importlib.util import os +from collections.abc import AsyncIterable, Iterable from pathlib import Path -from typing import Any, AsyncIterable, Dict, Iterable, List, Optional, Set, Tuple, Type, TypeVar +from typing import Any, TypeVar from loguru import logger from pydantic import BaseModel, ConfigDict, Field @@ -74,7 +75,7 @@ class IdleFrameObserver(BaseObserver): """ - def __init__(self, *, idle_event: asyncio.Event, idle_timeout_frames: Tuple[Type[Frame], ...]): + def __init__(self, *, idle_event: asyncio.Event, idle_timeout_frames: tuple[type[Frame], ...]): """Initialize the observer. Args: @@ -134,7 +135,7 @@ class PipelineParams(BaseModel): heartbeats_monitor_secs: float = HEARTBEAT_MONITOR_SECS report_only_initial_ttfb: bool = False send_initial_empty_metrics: bool = True - start_metadata: Dict[str, Any] = Field(default_factory=dict) + start_metadata: dict[str, Any] = Field(default_factory=dict) class PipelineTask(BasePipelineTask): @@ -190,22 +191,22 @@ class PipelineTask(BasePipelineTask): self, pipeline: BasePipeline, *, - params: Optional[PipelineParams] = None, - additional_span_attributes: Optional[dict] = None, + params: PipelineParams | None = None, + additional_span_attributes: dict | None = None, cancel_on_idle_timeout: bool = True, cancel_timeout_secs: float = CANCEL_TIMEOUT_SECS, check_dangling_tasks: bool = True, - clock: Optional[BaseClock] = None, - conversation_id: Optional[str] = None, + clock: BaseClock | None = None, + conversation_id: str | None = None, enable_tracing: bool = False, enable_turn_tracking: bool = True, enable_rtvi: bool = True, - idle_timeout_frames: Tuple[Type[Frame], ...] = (BotSpeakingFrame, UserSpeakingFrame), - idle_timeout_secs: Optional[float] = IDLE_TIMEOUT_SECS, - observers: Optional[List[BaseObserver]] = None, - rtvi_processor: Optional[RTVIProcessor] = None, - rtvi_observer_params: Optional[RTVIObserverParams] = None, - task_manager: Optional[BaseTaskManager] = None, + idle_timeout_frames: tuple[type[Frame], ...] = (BotSpeakingFrame, UserSpeakingFrame), + idle_timeout_secs: float | None = IDLE_TIMEOUT_SECS, + observers: list[BaseObserver] | None = None, + rtvi_processor: RTVIProcessor | None = None, + rtvi_observer_params: RTVIObserverParams | None = None, + task_manager: BaseTaskManager | None = None, ): """Initialize the PipelineTask. @@ -246,10 +247,10 @@ class PipelineTask(BasePipelineTask): self._enable_turn_tracking = enable_turn_tracking self._idle_timeout_secs = idle_timeout_secs observers = observers or [] - self._turn_tracking_observer: Optional[TurnTrackingObserver] = None - self._user_bot_latency_observer: Optional[UserBotLatencyObserver] = None - self._turn_trace_observer: Optional[TurnTraceObserver] = None - self._tracing_context: Optional[TracingContext] = None + self._turn_tracking_observer: TurnTrackingObserver | None = None + self._user_bot_latency_observer: UserBotLatencyObserver | None = None + self._turn_trace_observer: TurnTraceObserver | None = None + self._tracing_context: TracingContext | None = None if self._enable_turn_tracking: self._turn_tracking_observer = TurnTrackingObserver() observers.append(self._turn_tracking_observer) @@ -278,13 +279,13 @@ class PipelineTask(BasePipelineTask): # This queue is the queue used to push frames to the pipeline. self._push_queue = asyncio.Queue() - self._process_push_task: Optional[asyncio.Task] = None + self._process_push_task: asyncio.Task | None = None # This is the heartbeat queue. When a heartbeat frame is received in the # down queue we add it to the heartbeat queue for processing. self._heartbeat_queue = asyncio.Queue() - self._heartbeat_push_task: Optional[asyncio.Task] = None - self._heartbeat_monitor_task: Optional[asyncio.Task] = None + self._heartbeat_push_task: asyncio.Task | None = None + self._heartbeat_monitor_task: asyncio.Task | None = None # RTVI support self._rtvi = None @@ -323,7 +324,7 @@ class PipelineTask(BasePipelineTask): # processor we consider the pipeline is not idle. We use an observer # which will be listening any part of the pipeline. self._idle_event = asyncio.Event() - self._idle_monitor_task: Optional[asyncio.Task] = None + self._idle_monitor_task: asyncio.Task | None = None if self._idle_timeout_secs: idle_frame_observer = IdleFrameObserver( idle_event=self._idle_event, @@ -365,8 +366,8 @@ class PipelineTask(BasePipelineTask): # in. This is mainly for efficiency reason because each event handler # creates a task and most likely you only care about one or two frame # types. - self._reached_upstream_types: Set[Type[Frame]] = set() - self._reached_downstream_types: Set[Type[Frame]] = set() + self._reached_upstream_types: set[type[Frame]] = set() + self._reached_downstream_types: set[type[Frame]] = set() self._register_event_handler("on_frame_reached_upstream") self._register_event_handler("on_frame_reached_downstream") self._register_event_handler("on_idle_timeout") @@ -395,7 +396,7 @@ class PipelineTask(BasePipelineTask): return self._pipeline @property - def turn_tracking_observer(self) -> Optional[TurnTrackingObserver]: + def turn_tracking_observer(self) -> TurnTrackingObserver | None: """Get the turn tracking observer if enabled. Returns: @@ -404,7 +405,7 @@ class PipelineTask(BasePipelineTask): return self._turn_tracking_observer @property - def turn_trace_observer(self) -> Optional[TurnTraceObserver]: + def turn_trace_observer(self) -> TurnTraceObserver | None: """Get the turn trace observer if enabled. Returns: @@ -424,7 +425,7 @@ class PipelineTask(BasePipelineTask): return self._rtvi @property - def reached_upstream_types(self) -> Tuple[Type[Frame], ...]: + def reached_upstream_types(self) -> tuple[type[Frame], ...]: """Get the currently configured upstream frame type filters. Returns: @@ -433,7 +434,7 @@ class PipelineTask(BasePipelineTask): return tuple(self._reached_upstream_types) @property - def reached_downstream_types(self) -> Tuple[Type[Frame], ...]: + def reached_downstream_types(self) -> tuple[type[Frame], ...]: """Get the currently configured downstream frame type filters. Returns: @@ -457,7 +458,7 @@ class PipelineTask(BasePipelineTask): """ await self._observer.remove_observer(observer) - def set_reached_upstream_filter(self, types: Tuple[Type[Frame], ...]): + def set_reached_upstream_filter(self, types: tuple[type[Frame], ...]): """Set which frame types trigger the on_frame_reached_upstream event. Args: @@ -465,7 +466,7 @@ class PipelineTask(BasePipelineTask): """ self._reached_upstream_types = set(types) - def set_reached_downstream_filter(self, types: Tuple[Type[Frame], ...]): + def set_reached_downstream_filter(self, types: tuple[type[Frame], ...]): """Set which frame types trigger the on_frame_reached_downstream event. Args: @@ -473,7 +474,7 @@ class PipelineTask(BasePipelineTask): """ self._reached_downstream_types = set(types) - def add_reached_upstream_filter(self, types: Tuple[Type[Frame], ...]): + def add_reached_upstream_filter(self, types: tuple[type[Frame], ...]): """Add frame types to trigger the on_frame_reached_upstream event. Args: @@ -481,7 +482,7 @@ class PipelineTask(BasePipelineTask): """ self._reached_upstream_types.update(types) - def add_reached_downstream_filter(self, types: Tuple[Type[Frame], ...]): + def add_reached_downstream_filter(self, types: tuple[type[Frame], ...]): """Add frame types to trigger the on_frame_reached_downstream event. Args: @@ -509,7 +510,7 @@ class PipelineTask(BasePipelineTask): logger.debug(f"Task {self} scheduled to stop when done") await self.queue_frame(EndFrame()) - async def cancel(self, *, reason: Optional[str] = None): + async def cancel(self, *, reason: str | None = None): """Request the running pipeline to cancel. Args: @@ -597,7 +598,7 @@ class PipelineTask(BasePipelineTask): for frame in frames: await self.queue_frame(frame, direction) - async def _cancel(self, *, reason: Optional[str] = None): + async def _cancel(self, *, reason: str | None = None): """Internal cancellation logic for the pipeline task. Args: @@ -685,7 +686,7 @@ class PipelineTask(BasePipelineTask): self._pipeline_end_event.wait(), timeout=self._cancel_timeout_secs ) logger.debug(f"{self}: {frame} reached the end of the pipeline.") - except asyncio.TimeoutError: + except TimeoutError: logger.warning( f"{self}: timeout waiting for {frame} to reach the end of the pipeline (being blocked somewhere?)." ) @@ -895,7 +896,7 @@ class PipelineTask(BasePipelineTask): process_time = (self._clock.get_time() - frame.timestamp) / 1_000_000_000 logger.trace(f"{self}: heartbeat frame processed in {process_time} seconds") self._heartbeat_queue.task_done() - except asyncio.TimeoutError: + except TimeoutError: logger.warning( f"{self}: heartbeat frame not received for more than {wait_time} seconds" ) @@ -913,7 +914,7 @@ class PipelineTask(BasePipelineTask): try: await asyncio.wait_for(self._idle_event.wait(), timeout=self._idle_timeout_secs) self._idle_event.clear() - except asyncio.TimeoutError: + except TimeoutError: running = await self._idle_timeout_detected() async def _idle_timeout_detected(self) -> bool: @@ -972,7 +973,7 @@ class PipelineTask(BasePipelineTask): if tasks: logger.warning(f"{self} dangling tasks detected: {tasks}") - def _create_start_metadata(self) -> Dict[str, Any]: + def _create_start_metadata(self) -> dict[str, Any]: """Build and return start metadata including user-provided values.""" start_metadata = {} @@ -981,7 +982,7 @@ class PipelineTask(BasePipelineTask): return start_metadata - def _find_processor(self, processor: FrameProcessor, processor_type: Type[T]) -> Optional[T]: + def _find_processor(self, processor: FrameProcessor, processor_type: type[T]) -> T | None: """Recursively find a processor of the given type in the pipeline.""" if isinstance(processor, processor_type): return processor diff --git a/src/pipecat/pipeline/task_observer.py b/src/pipecat/pipeline/task_observer.py index 44326d7c3..c6603c1d8 100644 --- a/src/pipecat/pipeline/task_observer.py +++ b/src/pipecat/pipeline/task_observer.py @@ -12,7 +12,7 @@ the main pipeline execution. """ import asyncio -from typing import Any, Dict, List, Optional +from typing import Any from attr import dataclass @@ -61,7 +61,7 @@ class TaskObserver(BaseObserver): def __init__( self, *, - observers: Optional[List[BaseObserver]] = None, + observers: list[BaseObserver] | None = None, task_manager: BaseTaskManager, **kwargs, ): @@ -75,7 +75,7 @@ class TaskObserver(BaseObserver): super().__init__(**kwargs) self._observers = observers or [] self._task_manager = task_manager - self._proxies: Optional[Dict[BaseObserver, Proxy]] = ( + self._proxies: dict[BaseObserver, Proxy] | None = ( None # Becomes a dict after start() is called ) @@ -164,7 +164,7 @@ class TaskObserver(BaseObserver): proxy = Proxy(queue=queue, task=task, observer=observer) return proxy - def _create_proxies(self, observers: List[BaseObserver]) -> Dict[BaseObserver, Proxy]: + def _create_proxies(self, observers: list[BaseObserver]) -> dict[BaseObserver, Proxy]: """Create proxies for all observers.""" proxies = {} for observer in observers: diff --git a/src/pipecat/processors/aggregators/dtmf_aggregator.py b/src/pipecat/processors/aggregators/dtmf_aggregator.py index ea56ba6fc..289aca085 100644 --- a/src/pipecat/processors/aggregators/dtmf_aggregator.py +++ b/src/pipecat/processors/aggregators/dtmf_aggregator.py @@ -12,7 +12,6 @@ for downstream processing by LLM context aggregators. """ import asyncio -from typing import Optional from pipecat.audio.dtmf.types import KeypadEntry from pipecat.frames.frames import ( @@ -62,7 +61,7 @@ class DTMFAggregator(FrameProcessor): self._prefix = prefix self._digit_event = asyncio.Event() - self._aggregation_task: Optional[asyncio.Task] = None + self._aggregation_task: asyncio.Task | None = None async def cleanup(self) -> None: """Clean up resources.""" @@ -130,7 +129,7 @@ class DTMFAggregator(FrameProcessor): try: await asyncio.wait_for(self._digit_event.wait(), timeout=self._idle_timeout) self._digit_event.clear() - except asyncio.TimeoutError: + except TimeoutError: if self._aggregation: await self._flush_aggregation() diff --git a/src/pipecat/processors/aggregators/gated.py b/src/pipecat/processors/aggregators/gated.py index 6a37fce66..5bb73e334 100644 --- a/src/pipecat/processors/aggregators/gated.py +++ b/src/pipecat/processors/aggregators/gated.py @@ -11,8 +11,6 @@ custom gate open/close functions, allowing for conditional frame buffering and release in frame processing pipelines. """ -from typing import List, Tuple - from loguru import logger from pipecat.frames.frames import Frame, SystemFrame @@ -48,7 +46,7 @@ class GatedAggregator(FrameProcessor): self._gate_close_fn = gate_close_fn self._gate_open = start_open self._direction = direction - self._accumulator: List[Tuple[Frame, FrameDirection]] = [] + self._accumulator: list[tuple[Frame, FrameDirection]] = [] async def process_frame(self, frame: Frame, direction: FrameDirection): """Process incoming frames with gated accumulation logic. diff --git a/src/pipecat/processors/aggregators/llm_context.py b/src/pipecat/processors/aggregators/llm_context.py index 7bcb68c86..b34dbfaec 100644 --- a/src/pipecat/processors/aggregators/llm_context.py +++ b/src/pipecat/processors/aggregators/llm_context.py @@ -19,8 +19,9 @@ import base64 import copy import io import wave +from collections.abc import Callable from dataclasses import dataclass -from typing import Any, Callable, List, Optional, TypeAlias, Union +from typing import Any, TypeAlias from loguru import logger from openai._types import NOT_GIVEN as OPEN_AI_NOT_GIVEN @@ -57,7 +58,7 @@ class LLMSpecificMessage: message: Any -LLMContextMessage: TypeAlias = Union[LLMStandardMessage, LLMSpecificMessage] +LLMContextMessage: TypeAlias = LLMStandardMessage | LLMSpecificMessage class LLMContext: @@ -70,7 +71,7 @@ class LLMContext: def __init__( self, - messages: Optional[List[LLMContextMessage]] = None, + messages: list[LLMContextMessage] | None = None, tools: ToolsSchema | NotGiven = NOT_GIVEN, tool_choice: LLMContextToolChoice | NotGiven = NOT_GIVEN, ): @@ -81,7 +82,7 @@ class LLMContext: tools: Available tools for the LLM to use. tool_choice: Tool selection strategy for the LLM. """ - self._messages: List[LLMContextMessage] = messages if messages else [] + self._messages: list[LLMContextMessage] = messages if messages else [] self._tools: ToolsSchema | NotGiven = LLMContext._normalize_and_validate_tools(tools) self._tool_choice: LLMContextToolChoice | NotGiven = tool_choice @@ -90,7 +91,7 @@ class LLMContext: *, role: str = "user", url: str, - text: Optional[str] = None, + text: str | None = None, ) -> LLMContextMessage: """Create a context message containing an image URL. @@ -114,7 +115,7 @@ class LLMContext: format: str, size: tuple[int, int], image: bytes, - text: Optional[str] = None, + text: str | None = None, ) -> LLMContextMessage: """Create a context message containing an image. @@ -187,7 +188,7 @@ class LLMContext: return {"role": role, "content": content} @property - def messages(self) -> List[LLMContextMessage]: + def messages(self) -> list[LLMContextMessage]: """Get the current messages list. NOTE: This is equivalent to calling `get_messages()` with no filter. If @@ -201,10 +202,10 @@ class LLMContext: def get_messages( self, - llm_specific_filter: Optional[str] = None, + llm_specific_filter: str | None = None, *, truncate_large_values: bool = False, - ) -> List[LLMContextMessage]: + ) -> list[LLMContextMessage]: """Get the current messages list. Args: @@ -242,8 +243,8 @@ class LLMContext: @staticmethod def _truncate_large_values_from_messages( - messages: List[LLMContextMessage], - ) -> List[LLMContextMessage]: + messages: list[LLMContextMessage], + ) -> list[LLMContextMessage]: """Return deep copies of messages with large values replaced by placeholders. For standard (universal-format) messages, the following known binary @@ -344,7 +345,7 @@ class LLMContext: """ self._messages.append(message) - def add_messages(self, messages: List[LLMContextMessage]): + def add_messages(self, messages: list[LLMContextMessage]): """Add multiple messages to the context. Args: @@ -352,7 +353,7 @@ class LLMContext: """ self._messages.extend(messages) - def set_messages(self, messages: List[LLMContextMessage]): + def set_messages(self, messages: list[LLMContextMessage]): """Replace all messages in the context. Args: @@ -361,7 +362,7 @@ class LLMContext: self._messages[:] = messages def transform_messages( - self, transform: Callable[[List[LLMContextMessage]], List[LLMContextMessage]] + self, transform: Callable[[list[LLMContextMessage]], list[LLMContextMessage]] ): """Transform the current messages using the provided function. @@ -393,7 +394,7 @@ class LLMContext: format: str, size: tuple[int, int], image: bytes, - text: Optional[str] = None, + text: str | None = None, role: str = "user", ): """Add a message containing an image frame. diff --git a/src/pipecat/processors/aggregators/llm_context_summarizer.py b/src/pipecat/processors/aggregators/llm_context_summarizer.py index 40be383fa..516b6062f 100644 --- a/src/pipecat/processors/aggregators/llm_context_summarizer.py +++ b/src/pipecat/processors/aggregators/llm_context_summarizer.py @@ -9,7 +9,7 @@ import asyncio import uuid from dataclasses import dataclass -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING from loguru import logger @@ -101,7 +101,7 @@ class LLMContextSummarizer(BaseObject): self, *, context: LLMContext, - config: Optional[LLMAutoContextSummarizationConfig] = None, + config: LLMAutoContextSummarizationConfig | None = None, auto_trigger: bool = True, ): """Initialize the context summarizer. @@ -122,10 +122,10 @@ class LLMContextSummarizer(BaseObject): self._auto_config = config or LLMAutoContextSummarizationConfig() self._auto_trigger = auto_trigger - self._task_manager: Optional[BaseTaskManager] = None + self._task_manager: BaseTaskManager | None = None self._summarization_in_progress = False - self._pending_summary_request_id: Optional[str] = None + self._pending_summary_request_id: str | None = None self._register_event_handler("on_request_summarization", sync=True) self._register_event_handler("on_summary_applied") @@ -269,9 +269,7 @@ class LLMContextSummarizer(BaseObject): logger.debug(f"{self}: ✓ Summarization needed - {', '.join(reason)}") return True - async def _request_summarization( - self, config_override: Optional[LLMContextSummaryConfig] = None - ): + async def _request_summarization(self, config_override: LLMContextSummaryConfig | None = None): """Request context summarization from LLM service. Creates a summarization request frame and either handles it directly @@ -338,7 +336,7 @@ class LLMContextSummarizer(BaseObject): summary=summary, last_summarized_index=last_index, ) - except asyncio.TimeoutError: + except TimeoutError: error = f"Context summarization timed out after {timeout}s" logger.error(f"{self}: {error}") result_frame = LLMContextSummaryResultFrame( diff --git a/src/pipecat/processors/aggregators/llm_response_universal.py b/src/pipecat/processors/aggregators/llm_response_universal.py index bc4129b43..bf910a0c4 100644 --- a/src/pipecat/processors/aggregators/llm_response_universal.py +++ b/src/pipecat/processors/aggregators/llm_response_universal.py @@ -15,8 +15,9 @@ import asyncio import json import warnings from abc import abstractmethod +from collections.abc import Callable from dataclasses import dataclass, field -from typing import Any, Callable, Dict, List, Literal, Optional, Set, Type +from typing import Any, Literal from loguru import logger @@ -119,14 +120,14 @@ class LLMUserAggregatorParams: filter_incomplete_user_turns is True. """ - user_turn_strategies: Optional[UserTurnStrategies] = None - user_mute_strategies: List[BaseUserMuteStrategy] = field(default_factory=list) + user_turn_strategies: UserTurnStrategies | None = None + user_mute_strategies: list[BaseUserMuteStrategy] = field(default_factory=list) user_turn_stop_timeout: float = 5.0 user_idle_timeout: float = 0 - vad_analyzer: Optional[VADAnalyzer] = None + vad_analyzer: VADAnalyzer | None = None audio_idle_timeout: float = 1.0 filter_incomplete_user_turns: bool = False - user_turn_completion_config: Optional[UserTurnCompletionConfig] = None + user_turn_completion_config: UserTurnCompletionConfig | None = None @dataclass @@ -145,14 +146,14 @@ class LLMAssistantAggregatorParams: """ enable_auto_context_summarization: bool = False - auto_context_summarization_config: Optional[LLMAutoContextSummarizationConfig] = None + auto_context_summarization_config: LLMAutoContextSummarizationConfig | None = None # --------------------------------------------------------------------------- # Deprecated field names — kept for backward compatibility. # Use enable_auto_context_summarization and auto_context_summarization_config instead. # --------------------------------------------------------------------------- - enable_context_summarization: Optional[bool] = None - context_summarization_config: Optional[LLMContextSummarizationConfig] = None + enable_context_summarization: bool | None = None + context_summarization_config: LLMContextSummarizationConfig | None = None def __post_init__(self): if self.enable_context_summarization is not None: @@ -198,7 +199,7 @@ class UserTurnStoppedMessage: content: str timestamp: str - user_id: Optional[str] = None + user_id: str | None = None @dataclass @@ -259,10 +260,10 @@ class LLMContextAggregator(FrameProcessor): self._context = context self._role = role - self._aggregation: List[TextPartForConcatenation] = [] + self._aggregation: list[TextPartForConcatenation] = [] @property - def messages(self) -> List[LLMContextMessage]: + def messages(self) -> list[LLMContextMessage]: """Get messages from the LLM context. Returns: @@ -322,7 +323,7 @@ class LLMContextAggregator(FrameProcessor): self._context.set_messages(messages) def transform_messages( - self, transform: Callable[[List[LLMContextMessage]], List[LLMContextMessage]] + self, transform: Callable[[list[LLMContextMessage]], list[LLMContextMessage]] ): """Transform the context messages using a provided function. @@ -423,7 +424,7 @@ class LLMUserAggregator(LLMContextAggregator): self, context: LLMContext, *, - params: Optional[LLMUserAggregatorParams] = None, + params: LLMUserAggregatorParams | None = None, **kwargs, ): """Initialize the user context aggregator. @@ -473,7 +474,7 @@ class LLMUserAggregator(LLMContextAggregator): self._user_idle_controller.add_event_handler("on_user_turn_idle", self._on_user_turn_idle) # VAD controller - self._vad_controller: Optional[VADController] = None + self._vad_controller: VADController | None = None if self._params.vad_analyzer: self._vad_controller = VADController( self._params.vad_analyzer, @@ -681,7 +682,7 @@ class LLMUserAggregator(LLMContextAggregator): ) ) - async def _queued_broadcast_frame(self, frame_cls: Type[Frame], **kwargs): + async def _queued_broadcast_frame(self, frame_cls: type[Frame], **kwargs): """Broadcasts a frame upstream and queues it for internal processing. Queues the frame so it flows through `process_frame` and is handled @@ -701,7 +702,7 @@ class LLMUserAggregator(LLMContextAggregator): ): await self.queue_frame(frame, direction) - async def _on_broadcast_frame(self, controller, frame_cls: Type[Frame], **kwargs): + async def _on_broadcast_frame(self, controller, frame_cls: type[Frame], **kwargs): await self._queued_broadcast_frame(frame_cls, **kwargs) async def _on_vad_speech_started(self, controller): @@ -768,7 +769,7 @@ class LLMUserAggregator(LLMContextAggregator): async def _maybe_emit_user_turn_stopped( self, - strategy: Optional[BaseUserTurnStopStrategy] = None, + strategy: BaseUserTurnStopStrategy | None = None, on_session_end: bool = False, ): """Maybe emit user turn stopped event. @@ -832,7 +833,7 @@ class LLMAssistantAggregator(LLMContextAggregator): self, context: LLMContext, *, - params: Optional[LLMAssistantAggregatorParams] = None, + params: LLMAssistantAggregatorParams | None = None, **kwargs, ): """Initialize the assistant context aggregator. @@ -845,9 +846,9 @@ class LLMAssistantAggregator(LLMContextAggregator): super().__init__(context=context, role="assistant", **kwargs) self._params = params or LLMAssistantAggregatorParams() - self._function_calls_in_progress: Dict[str, Optional[FunctionCallInProgressFrame]] = {} - self._function_calls_image_results: Dict[str, UserImageRawFrame] = {} - self._context_updated_tasks: Set[asyncio.Task] = set() + self._function_calls_in_progress: dict[str, FunctionCallInProgressFrame | None] = {} + self._function_calls_image_results: dict[str, UserImageRawFrame] = {} + self._context_updated_tasks: set[asyncio.Task] = set() self._user_speaking: bool = False self._bot_speaking: bool = False @@ -862,14 +863,14 @@ class LLMAssistantAggregator(LLMContextAggregator): self._thought_append_to_context = False self._thought_llm: str = "" - self._thought_aggregation: List[TextPartForConcatenation] = [] + self._thought_aggregation: list[TextPartForConcatenation] = [] self._thought_start_time: str = "" # Context summarization — always create the summarizer so that manually # pushed LLMSummarizeContextFrame frames are always handled. # Auto-triggering based on thresholds is only enabled when # enable_auto_context_summarization is True. - self._summarizer: Optional[LLMContextSummarizer] = LLMContextSummarizer( + self._summarizer: LLMContextSummarizer | None = LLMContextSummarizer( context=self._context, config=self._params.auto_context_summarization_config, auto_trigger=self._params.enable_auto_context_summarization, @@ -1475,8 +1476,8 @@ class LLMContextAggregatorPair: self, context: LLMContext, *, - user_params: Optional[LLMUserAggregatorParams] = None, - assistant_params: Optional[LLMAssistantAggregatorParams] = None, + user_params: LLMUserAggregatorParams | None = None, + assistant_params: LLMAssistantAggregatorParams | None = None, ): """Initialize the LLM context aggregator pair. diff --git a/src/pipecat/processors/aggregators/llm_text_processor.py b/src/pipecat/processors/aggregators/llm_text_processor.py index dce739656..862cf138b 100644 --- a/src/pipecat/processors/aggregators/llm_text_processor.py +++ b/src/pipecat/processors/aggregators/llm_text_processor.py @@ -13,8 +13,6 @@ components such as TTS services or context aggregators. It can be used to pre-ag and categorize, modify, or filter direct output tokens from the LLM. """ -from typing import Optional - from pipecat.frames.frames import ( AggregatedTextFrame, EndFrame, @@ -38,7 +36,7 @@ class LLMTextProcessor(FrameProcessor): output tokens from the LLM. """ - def __init__(self, *, text_aggregator: Optional[BaseTextAggregator] = None, **kwargs): + def __init__(self, *, text_aggregator: BaseTextAggregator | None = None, **kwargs): """Initialize the LLM text processor. Args: @@ -91,7 +89,7 @@ class LLMTextProcessor(FrameProcessor): out_frame.skip_tts = in_frame.skip_tts await self.push_frame(out_frame) - async def _handle_llm_end(self, skip_tts: Optional[bool] = None): + async def _handle_llm_end(self, skip_tts: bool | None = None): # Flush any remaining text remaining = await self._text_aggregator.flush() if remaining: diff --git a/src/pipecat/processors/async_generator.py b/src/pipecat/processors/async_generator.py index 4fac1a9d4..643b5eacc 100644 --- a/src/pipecat/processors/async_generator.py +++ b/src/pipecat/processors/async_generator.py @@ -7,7 +7,8 @@ """Async generator processor for frame serialization and streaming.""" import asyncio -from typing import Any, AsyncGenerator +from collections.abc import AsyncGenerator +from typing import Any from pipecat.frames.frames import ( CancelFrame, diff --git a/src/pipecat/processors/audio/audio_buffer_processor.py b/src/pipecat/processors/audio/audio_buffer_processor.py index 40a907224..21d6a4528 100644 --- a/src/pipecat/processors/audio/audio_buffer_processor.py +++ b/src/pipecat/processors/audio/audio_buffer_processor.py @@ -11,8 +11,6 @@ of audio from both user input and bot output sources, with support for various a configurations and event-driven processing. """ -from typing import Optional - from pipecat.audio.utils import create_stream_resampler, interleave_stereo_audio, mix_audio from pipecat.frames.frames import ( BotStartedSpeakingFrame, @@ -55,7 +53,7 @@ class AudioBufferProcessor(FrameProcessor): def __init__( self, *, - sample_rate: Optional[int] = None, + sample_rate: int | None = None, num_channels: int = 1, buffer_size: int = 0, enable_turn_audio: bool = False, @@ -263,7 +261,7 @@ class AudioBufferProcessor(FrameProcessor): silence_needed = target_position - current_len buffer.extend(b"\x00" * silence_needed) - async def _process_turn_recording(self, frame: Frame, resampled_audio: Optional[bytes] = None): + async def _process_turn_recording(self, frame: Frame, resampled_audio: bytes | None = None): """Process frames for turn-based audio recording.""" # Speaking state (_user_speaking / _bot_speaking) is maintained by # _process_recording so it is always up-to-date here. diff --git a/src/pipecat/processors/audio/vad_processor.py b/src/pipecat/processors/audio/vad_processor.py index aaa769061..75bcaf1d9 100644 --- a/src/pipecat/processors/audio/vad_processor.py +++ b/src/pipecat/processors/audio/vad_processor.py @@ -10,8 +10,6 @@ This module provides a VADProcessor that wraps a VADController to process audio frames and push VAD-related frames into the pipeline. """ -from typing import Type - from loguru import logger from pipecat.audio.vad.vad_analyzer import VADAnalyzer @@ -94,7 +92,7 @@ class VADProcessor(FrameProcessor): await self.push_frame(frame, direction) @self._vad_controller.event_handler("on_broadcast_frame") - async def on_broadcast_frame(_controller, frame_cls: Type[Frame], **kwargs): + async def on_broadcast_frame(_controller, frame_cls: type[Frame], **kwargs): await self.broadcast_frame(frame_cls, **kwargs) async def cleanup(self): diff --git a/src/pipecat/processors/consumer_processor.py b/src/pipecat/processors/consumer_processor.py index f7349031a..8600a6ec4 100644 --- a/src/pipecat/processors/consumer_processor.py +++ b/src/pipecat/processors/consumer_processor.py @@ -7,7 +7,7 @@ """Consumer processor for consuming frames from ProducerProcessor queues.""" import asyncio -from typing import Awaitable, Callable, Optional +from collections.abc import Awaitable, Callable from pipecat.frames.frames import CancelFrame, EndFrame, Frame, StartFrame from pipecat.processors.frame_processor import FrameDirection, FrameProcessor @@ -42,7 +42,7 @@ class ConsumerProcessor(FrameProcessor): self._transformer = transformer self._direction = direction self._producer = producer - self._consumer_task: Optional[asyncio.Task] = None + self._consumer_task: asyncio.Task | None = None async def process_frame(self, frame: Frame, direction: FrameDirection): """Process incoming frames and handle lifecycle events. diff --git a/src/pipecat/processors/filters/frame_filter.py b/src/pipecat/processors/filters/frame_filter.py index 3784409a6..67dd5b7b4 100644 --- a/src/pipecat/processors/filters/frame_filter.py +++ b/src/pipecat/processors/filters/frame_filter.py @@ -6,8 +6,6 @@ """Frame filtering processor for the Pipecat framework.""" -from typing import Tuple, Type - from pipecat.frames.frames import EndFrame, Frame, SystemFrame from pipecat.processors.frame_processor import FrameDirection, FrameProcessor @@ -20,7 +18,7 @@ class FrameFilter(FrameProcessor): automatically allowed to pass through to maintain pipeline integrity. """ - def __init__(self, types: Tuple[Type[Frame], ...]): + def __init__(self, types: tuple[type[Frame], ...]): """Initialize the frame filter. Args: diff --git a/src/pipecat/processors/filters/function_filter.py b/src/pipecat/processors/filters/function_filter.py index 46b1945ce..d955b8f9e 100644 --- a/src/pipecat/processors/filters/function_filter.py +++ b/src/pipecat/processors/filters/function_filter.py @@ -10,7 +10,7 @@ This module provides a processor that filters frames based on a custom function, allowing for flexible frame filtering logic in processing pipelines. """ -from typing import Awaitable, Callable, Optional +from collections.abc import Awaitable, Callable from pipecat.frames.frames import CancelFrame, EndFrame, Frame, StartFrame, SystemFrame from pipecat.processors.frame_processor import FrameDirection, FrameProcessor @@ -29,7 +29,7 @@ class FunctionFilter(FrameProcessor): def __init__( self, filter: FilterType, - direction: Optional[FrameDirection] = FrameDirection.DOWNSTREAM, + direction: FrameDirection | None = FrameDirection.DOWNSTREAM, filter_system_frames: bool = False, **kwargs, ): diff --git a/src/pipecat/processors/filters/wake_check_filter.py b/src/pipecat/processors/filters/wake_check_filter.py index 6a9e524e6..e5159f4d5 100644 --- a/src/pipecat/processors/filters/wake_check_filter.py +++ b/src/pipecat/processors/filters/wake_check_filter.py @@ -18,7 +18,6 @@ import re import time import warnings from enum import Enum -from typing import List from loguru import logger @@ -71,7 +70,7 @@ class WakeCheckFilter(FrameProcessor): self.wake_timer = 0.0 self.accumulator = "" - def __init__(self, wake_phrases: List[str], keepalive_timeout: float = 3): + def __init__(self, wake_phrases: list[str], keepalive_timeout: float = 3): """Initialize the wake phrase filter. .. deprecated:: 0.0.106 diff --git a/src/pipecat/processors/filters/wake_notifier_filter.py b/src/pipecat/processors/filters/wake_notifier_filter.py index 91c9b5969..f244dff43 100644 --- a/src/pipecat/processors/filters/wake_notifier_filter.py +++ b/src/pipecat/processors/filters/wake_notifier_filter.py @@ -6,7 +6,7 @@ """Wake notifier filter for conditional frame-based notifications.""" -from typing import Awaitable, Callable, Tuple, Type +from collections.abc import Awaitable, Callable from pipecat.frames.frames import Frame from pipecat.processors.frame_processor import FrameDirection, FrameProcessor @@ -25,7 +25,7 @@ class WakeNotifierFilter(FrameProcessor): self, notifier: BaseNotifier, *, - types: Tuple[Type[Frame], ...], + types: tuple[type[Frame], ...], filter: Callable[[Frame], Awaitable[bool]], **kwargs, ): diff --git a/src/pipecat/processors/frame_processor.py b/src/pipecat/processors/frame_processor.py index 77a35fb29..92cf920ae 100644 --- a/src/pipecat/processors/frame_processor.py +++ b/src/pipecat/processors/frame_processor.py @@ -11,21 +11,17 @@ audio/video processing pipelines. It includes frame processors, pipeline management, and frame flow control mechanisms. """ +from __future__ import annotations + import asyncio import dataclasses import traceback +from collections.abc import Awaitable, Callable, Coroutine from dataclasses import dataclass from enum import Enum from typing import ( Any, - Awaitable, - Callable, - Coroutine, - List, Optional, - Tuple, - Type, - Union, ) from loguru import logger @@ -79,7 +75,7 @@ class FrameProcessorSetup: clock: BaseClock task_manager: BaseTaskManager - observer: Optional[BaseObserver] = None + observer: BaseObserver | None = None class FrameProcessorQueue(asyncio.PriorityQueue): @@ -100,7 +96,7 @@ class FrameProcessorQueue(asyncio.PriorityQueue): self.__high_counter = 0 self.__low_counter = 0 - async def put(self, item: Tuple[Frame, FrameDirection, FrameCallback]): + async def put(self, item: tuple[Frame, FrameDirection, FrameCallback]): """Put an item into the priority queue. System frames (`SystemFrame`) have higher priority than any other @@ -160,9 +156,9 @@ class FrameProcessor(BaseObject): def __init__( self, *, - name: Optional[str] = None, + name: str | None = None, enable_direct_mode: bool = False, - metrics: Optional[FrameProcessorMetrics] = None, + metrics: FrameProcessorMetrics | None = None, **kwargs, ): """Initialize the frame processor. @@ -174,20 +170,20 @@ class FrameProcessor(BaseObject): **kwargs: Additional arguments passed to parent class. """ super().__init__(name=name, **kwargs) - self._prev: Optional["FrameProcessor"] = None - self._next: Optional["FrameProcessor"] = None + self._prev: FrameProcessor | None = None + self._next: FrameProcessor | None = None # Enable direct mode to skip queues and process frames right away. self._enable_direct_mode = enable_direct_mode # Clock - self._clock: Optional[BaseClock] = None + self._clock: BaseClock | None = None # Task Manager - self._task_manager: Optional[BaseTaskManager] = None + self._task_manager: BaseTaskManager | None = None # Observer - self._observer: Optional[BaseObserver] = None + self._observer: BaseObserver | None = None # Other properties self._enable_metrics = False @@ -221,8 +217,8 @@ class FrameProcessor(BaseObject): # frames right away and queues non-system frames for later processing. self.__should_block_system_frames = False self.__input_queue = FrameProcessorQueue() - self.__input_event: Optional[asyncio.Event] = None - self.__input_frame_task: Optional[asyncio.Task] = None + self.__input_event: asyncio.Event | None = None + self.__input_frame_task: asyncio.Task | None = None # The process task processes non-system frames. Non-system frames will # be processed as soon as they are received by the processing task @@ -231,9 +227,9 @@ class FrameProcessor(BaseObject): # `resume_processing_frames()` which will wake up the event. self.__should_block_frames = False self.__process_queue = FrameQueue(frame_getter=lambda item: item[0]) - self.__process_event: Optional[asyncio.Event] = None - self.__process_frame_task: Optional[asyncio.Task] = None - self.__process_current_frame: Optional[Frame] = None + self.__process_event: asyncio.Event | None = None + self.__process_frame_task: asyncio.Task | None = None + self.__process_current_frame: Frame | None = None # Frame processor events. self._register_event_handler("on_before_process_frame", sync=True) @@ -261,7 +257,7 @@ class FrameProcessor(BaseObject): return self._name @property - def processors(self) -> List["FrameProcessor"]: + def processors(self) -> list[FrameProcessor]: """Return the list of sub-processors contained within this processor. Only compound processors (e.g. pipelines and parallel pipelines) have @@ -273,7 +269,7 @@ class FrameProcessor(BaseObject): return [] @property - def entry_processors(self) -> List["FrameProcessor"]: + def entry_processors(self) -> list[FrameProcessor]: """Return the list of entry processors for this processor. Entry processors are the first processors in a compound processor @@ -287,7 +283,7 @@ class FrameProcessor(BaseObject): return [] @property - def next(self) -> Optional["FrameProcessor"]: + def next(self) -> FrameProcessor | None: """Get the next processor. Returns: @@ -296,7 +292,7 @@ class FrameProcessor(BaseObject): return self._next @property - def previous(self) -> Optional["FrameProcessor"]: + def previous(self) -> FrameProcessor | None: """Get the previous processor. Returns: @@ -372,7 +368,7 @@ class FrameProcessor(BaseObject): """ self._metrics.set_core_metrics_data(data) - async def start_ttfb_metrics(self, *, start_time: Optional[float] = None): + async def start_ttfb_metrics(self, *, start_time: float | None = None): """Start time-to-first-byte metrics collection. Args: @@ -384,7 +380,7 @@ class FrameProcessor(BaseObject): start_time=start_time, report_only_initial_ttfb=self._report_only_initial_ttfb ) - async def stop_ttfb_metrics(self, *, end_time: Optional[float] = None): + async def stop_ttfb_metrics(self, *, end_time: float | None = None): """Stop time-to-first-byte metrics collection and push results. Args: @@ -396,7 +392,7 @@ class FrameProcessor(BaseObject): if frame: await self.push_frame(frame) - async def start_processing_metrics(self, *, start_time: Optional[float] = None): + async def start_processing_metrics(self, *, start_time: float | None = None): """Start processing metrics collection. Args: @@ -406,7 +402,7 @@ class FrameProcessor(BaseObject): if self.can_generate_metrics() and self.metrics_enabled: await self._metrics.start_processing_metrics(start_time=start_time) - async def stop_processing_metrics(self, *, end_time: Optional[float] = None): + async def stop_processing_metrics(self, *, end_time: float | None = None): """Stop processing metrics collection and push results. Args: @@ -458,7 +454,7 @@ class FrameProcessor(BaseObject): await self.stop_processing_metrics() await self.stop_text_aggregation_metrics() - def create_task(self, coroutine: Coroutine, name: Optional[str] = None) -> asyncio.Task: + def create_task(self, coroutine: Coroutine, name: str | None = None) -> asyncio.Task: """Create a new task managed by this processor. Args: @@ -474,7 +470,7 @@ class FrameProcessor(BaseObject): name = f"{self}::{coroutine.cr_code.co_name}" return self.task_manager.create_task(coroutine, name) - async def cancel_task(self, task: asyncio.Task, timeout: Optional[float] = 1.0): + async def cancel_task(self, task: asyncio.Task, timeout: float | None = 1.0): """Cancel a task managed by this processor. A default timeout if 1 second is used in order to avoid potential @@ -511,7 +507,7 @@ class FrameProcessor(BaseObject): if self._metrics is not None: await self._metrics.cleanup() - def link(self, processor: "FrameProcessor"): + def link(self, processor: FrameProcessor): """Link this processor to the next processor in the pipeline. Args: @@ -546,7 +542,7 @@ class FrameProcessor(BaseObject): self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM, - callback: Optional[FrameCallback] = None, + callback: FrameCallback | None = None, ): """Queue a frame for processing. @@ -622,7 +618,7 @@ class FrameProcessor(BaseObject): async def push_error( self, error_msg: str, - exception: Optional[Exception] = None, + exception: Exception | None = None, fatal: bool = False, ): """Creates and pushes an ErrorFrame upstream. @@ -720,7 +716,7 @@ class FrameProcessor(BaseObject): await self.broadcast_interruption() - async def broadcast_frame(self, frame_cls: Type[Frame], **kwargs): + async def broadcast_frame(self, frame_cls: type[Frame], **kwargs): """Broadcasts a frame of the specified class upstream and downstream. This method creates two instances of the given frame class using the @@ -929,7 +925,7 @@ class FrameProcessor(BaseObject): """Reset non-system frame processing queue.""" self.__process_queue.reset() - def has_queued_frame(self, frame_type: Union[Type[Frame], Type[UninterruptibleFrame]]) -> bool: + def has_queued_frame(self, frame_type: type[Frame] | type[UninterruptibleFrame]) -> bool: """Return True if a frame of the given type is waiting in the processing queue. Delegates to :meth:`FrameQueue.has_frame` so the check is O(distinct @@ -951,7 +947,7 @@ class FrameProcessor(BaseObject): self.__process_frame_task = None async def __process_frame( - self, frame: Frame, direction: FrameDirection, callback: Optional[FrameCallback] + self, frame: Frame, direction: FrameDirection, callback: FrameCallback | None ): try: await self._call_event_handler("on_before_process_frame", frame) diff --git a/src/pipecat/processors/frameworks/langchain.py b/src/pipecat/processors/frameworks/langchain.py index 165f749ea..4400327fc 100644 --- a/src/pipecat/processors/frameworks/langchain.py +++ b/src/pipecat/processors/frameworks/langchain.py @@ -6,8 +6,6 @@ """Langchain integration processor for Pipecat.""" -from typing import Optional, Union - from loguru import logger from pipecat.frames.frames import ( @@ -45,7 +43,7 @@ class LangchainProcessor(FrameProcessor): super().__init__() self._chain = chain self._transcript_key = transcript_key - self._participant_id: Optional[str] = None + self._participant_id: str | None = None def set_participant_id(self, participant_id: str): """Set the participant ID for session tracking. @@ -76,7 +74,7 @@ class LangchainProcessor(FrameProcessor): await self.push_frame(frame, direction) @staticmethod - def __get_token_value(text: Union[str, AIMessageChunk]) -> str: + def __get_token_value(text: str | AIMessageChunk) -> str: """Extract token value from various text types. Args: diff --git a/src/pipecat/processors/frameworks/rtvi/frames.py b/src/pipecat/processors/frameworks/rtvi/frames.py index 36070e6e8..092755510 100644 --- a/src/pipecat/processors/frameworks/rtvi/frames.py +++ b/src/pipecat/processors/frameworks/rtvi/frames.py @@ -7,7 +7,7 @@ """RTVI pipeline frame definitions.""" from dataclasses import dataclass -from typing import Any, Optional +from typing import Any from pipecat.frames.frames import SystemFrame @@ -37,7 +37,7 @@ class RTVIClientMessageFrame(SystemFrame): msg_id: str type: str - data: Optional[Any] = None + data: Any | None = None @dataclass @@ -53,5 +53,5 @@ class RTVIServerResponseFrame(SystemFrame): """ client_msg: RTVIClientMessageFrame - data: Optional[Any] = None - error: Optional[str] = None + data: Any | None = None + error: str | None = None diff --git a/src/pipecat/processors/frameworks/rtvi/models.py b/src/pipecat/processors/frameworks/rtvi/models.py index 9e54dd227..81c1b2aae 100644 --- a/src/pipecat/processors/frameworks/rtvi/models.py +++ b/src/pipecat/processors/frameworks/rtvi/models.py @@ -14,12 +14,10 @@ Import this module under the ``RTVI`` alias to use as a namespace:: msg = RTVI.BotReady(id="1", data=RTVI.BotReadyData(version=RTVI.PROTOCOL_VERSION)) """ +from collections.abc import Mapping from typing import ( Any, - Dict, Literal, - Mapping, - Optional, ) from pydantic import BaseModel @@ -46,7 +44,7 @@ class Message(BaseModel): label: MessageLiteral = MESSAGE_LABEL type: str id: str - data: Optional[Dict[str, Any]] = None + data: dict[str, Any] | None = None # -- Client -> Pipecat messages. @@ -56,7 +54,7 @@ class RawClientMessageData(BaseModel): """Data structure expected from client messages sent to the RTVI server.""" t: str - d: Optional[Any] = None + d: Any | None = None class ClientMessage(BaseModel): @@ -64,14 +62,14 @@ class ClientMessage(BaseModel): msg_id: str type: str - data: Optional[Any] = None + data: Any | None = None class RawServerResponseData(BaseModel): """Data structure for server responses to client messages.""" t: str - d: Optional[Any] = None + d: Any | None = None class ServerResponse(BaseModel): @@ -94,10 +92,10 @@ class AboutClientData(BaseModel): """ library: str - library_version: Optional[str] = None - platform: Optional[str] = None - platform_version: Optional[str] = None - platform_details: Optional[Any] = None + library_version: str | None = None + platform: str | None = None + platform_version: str | None = None + platform_details: Any | None = None class ClientReadyData(BaseModel): @@ -165,7 +163,7 @@ class BotReadyData(BaseModel): """ version: str - about: Optional[Mapping[str, Any]] = None + about: Mapping[str, Any] | None = None class BotReady(BaseModel): @@ -226,7 +224,7 @@ class SendTextData(BaseModel): """ content: str - options: Optional[SendTextOptions] = None + options: SendTextOptions | None = None class LLMFunctionCallStartMessageData(BaseModel): @@ -236,7 +234,7 @@ class LLMFunctionCallStartMessageData(BaseModel): the configured function_call_report_level for security. """ - function_name: Optional[str] = None + function_name: str | None = None class LLMFunctionCallStartMessage(BaseModel): @@ -270,8 +268,8 @@ class LLMFunctionCallInProgressMessageData(BaseModel): """ tool_call_id: str - function_name: Optional[str] = None - arguments: Optional[Mapping[str, Any]] = None + function_name: str | None = None + arguments: Mapping[str, Any] | None = None class LLMFunctionCallInProgressMessage(BaseModel): @@ -295,8 +293,8 @@ class LLMFunctionCallStoppedMessageData(BaseModel): tool_call_id: str cancelled: bool - function_name: Optional[str] = None - result: Optional[Any] = None + function_name: str | None = None + result: Any | None = None class LLMFunctionCallStoppedMessage(BaseModel): diff --git a/src/pipecat/processors/frameworks/rtvi/observer.py b/src/pipecat/processors/frameworks/rtvi/observer.py index 958ba8841..7b952530d 100644 --- a/src/pipecat/processors/frameworks/rtvi/observer.py +++ b/src/pipecat/processors/frameworks/rtvi/observer.py @@ -7,17 +7,12 @@ """RTVI observer for converting pipeline frames to outgoing RTVI messages.""" import time +from collections.abc import Awaitable, Callable from dataclasses import dataclass, field -from enum import Enum +from enum import Enum, StrEnum from typing import ( TYPE_CHECKING, - Awaitable, - Callable, - Dict, - List, Optional, - Set, - Tuple, ) from loguru import logger @@ -71,7 +66,7 @@ if TYPE_CHECKING: from pipecat.processors.frameworks.rtvi.processor import RTVIProcessor -class RTVIFunctionCallReportLevel(str, Enum): +class RTVIFunctionCallReportLevel(StrEnum): """Level of detail to include in function call RTVI events. Controls what information is exposed in function call events for security. @@ -148,18 +143,14 @@ class RTVIObserverParams: user_audio_level_enabled: bool = False metrics_enabled: bool = True system_logs_enabled: bool = False - ignored_sources: List[FrameProcessor] = field(default_factory=list) - skip_aggregator_types: Optional[List[AggregationType | str]] = None - bot_output_transforms: Optional[ - List[ - Tuple[ - AggregationType | str, - Callable[[str, AggregationType | str], Awaitable[str]], - ] - ] - ] = None + ignored_sources: list[FrameProcessor] = field(default_factory=list) + skip_aggregator_types: list[AggregationType | str] | None = None + bot_output_transforms: ( + list[tuple[AggregationType | str, Callable[[str, AggregationType | str], Awaitable[str]]]] + | None + ) = None audio_level_period_secs: float = 0.15 - function_call_report_level: Dict[str, RTVIFunctionCallReportLevel] = field( + function_call_report_level: dict[str, RTVIFunctionCallReportLevel] = field( default_factory=lambda: {"*": RTVIFunctionCallReportLevel.NONE} ) @@ -180,7 +171,7 @@ class RTVIObserver(BaseObserver): self, rtvi: Optional["RTVIProcessor"] = None, *, - params: Optional[RTVIObserverParams] = None, + params: RTVIObserverParams | None = None, **kwargs, ): """Initialize the RTVI observer. @@ -194,7 +185,7 @@ class RTVIObserver(BaseObserver): self._rtvi = rtvi self._params = params or RTVIObserverParams() - self._ignored_sources: Set[FrameProcessor] = set(self._params.ignored_sources) + self._ignored_sources: set[FrameProcessor] = set(self._params.ignored_sources) self._frames_seen = set() self._bot_transcription = "" @@ -203,13 +194,13 @@ class RTVIObserver(BaseObserver): # Track bot speaking state for queuing aggregated text frames self._bot_is_speaking = False - self._queued_aggregated_text_frames: List[AggregatedTextFrame] = [] + self._queued_aggregated_text_frames: list[AggregatedTextFrame] = [] if self._params.system_logs_enabled: self._system_logger_id = logger.add(self._logger_sink) - self._aggregation_transforms: List[ - Tuple[AggregationType | str, Callable[[str, AggregationType | str], Awaitable[str]]] + self._aggregation_transforms: list[ + tuple[AggregationType | str, Callable[[str, AggregationType | str], Awaitable[str]]] ] = self._params.bot_output_transforms or [] def add_bot_output_transformer( diff --git a/src/pipecat/processors/frameworks/rtvi/processor.py b/src/pipecat/processors/frameworks/rtvi/processor.py index 0a05560c0..5586ec8ae 100644 --- a/src/pipecat/processors/frameworks/rtvi/processor.py +++ b/src/pipecat/processors/frameworks/rtvi/processor.py @@ -8,7 +8,8 @@ import asyncio import base64 -from typing import Any, Mapping, Optional +from collections.abc import Mapping +from typing import Any from loguru import logger from pydantic import BaseModel, ValidationError @@ -51,7 +52,7 @@ class RTVIProcessor(FrameProcessor): def __init__( self, *, - transport: Optional[BaseTransport] = None, + transport: BaseTransport | None = None, **kwargs, ): """Initialize the RTVI processor. @@ -70,7 +71,7 @@ class RTVIProcessor(FrameProcessor): self._llm_skip_tts: bool = False # Keep in sync with llm_service.py's configuration. # A task to process incoming transport messages. - self._message_task: Optional[asyncio.Task] = None + self._message_task: asyncio.Task | None = None self._register_event_handler("on_bot_started") self._register_event_handler("on_client_ready") @@ -84,7 +85,7 @@ class RTVIProcessor(FrameProcessor): self._input_transport = input_transport self._input_transport.enable_audio_in_stream_on_start(False) - def create_rtvi_observer(self, *, params: Optional[RTVIObserverParams] = None, **kwargs): + def create_rtvi_observer(self, *, params: RTVIObserverParams | None = None, **kwargs): """Creates a new RTVI Observer. Args: diff --git a/src/pipecat/processors/frameworks/strands_agents.py b/src/pipecat/processors/frameworks/strands_agents.py index eb1edbfdc..7383cd089 100644 --- a/src/pipecat/processors/frameworks/strands_agents.py +++ b/src/pipecat/processors/frameworks/strands_agents.py @@ -4,8 +4,6 @@ This module provides integration with Strands Agents for handling conversational interactions. It supports both single agent and multi-agent graphs. """ -from typing import Optional - from loguru import logger from pipecat.frames.frames import ( @@ -38,9 +36,9 @@ class StrandsAgentsProcessor(FrameProcessor): def __init__( self, - agent: Optional[Agent] = None, - graph: Optional[Graph] = None, - graph_exit_node: Optional[str] = None, + agent: Agent | None = None, + graph: Graph | None = None, + graph_exit_node: str | None = None, ): """Initialize the Strands Agents processor. diff --git a/src/pipecat/processors/gstreamer/pipeline_source.py b/src/pipecat/processors/gstreamer/pipeline_source.py index ec7ef0ec7..c2a819f69 100644 --- a/src/pipecat/processors/gstreamer/pipeline_source.py +++ b/src/pipecat/processors/gstreamer/pipeline_source.py @@ -7,7 +7,6 @@ """GStreamer pipeline source integration for Pipecat.""" import asyncio -from typing import Optional from loguru import logger from pydantic import BaseModel @@ -58,11 +57,11 @@ class GStreamerPipelineSource(FrameProcessor): video_width: int = 1280 video_height: int = 720 - audio_sample_rate: Optional[int] = None + audio_sample_rate: int | None = None audio_channels: int = 1 clock_sync: bool = True - def __init__(self, *, pipeline: str, out_params: Optional[OutputParams] = None, **kwargs): + def __init__(self, *, pipeline: str, out_params: OutputParams | None = None, **kwargs): """Initialize the GStreamer pipeline source. Args: diff --git a/src/pipecat/processors/idle_frame_processor.py b/src/pipecat/processors/idle_frame_processor.py index 3a7f1b860..9426006be 100644 --- a/src/pipecat/processors/idle_frame_processor.py +++ b/src/pipecat/processors/idle_frame_processor.py @@ -7,7 +7,7 @@ """Idle frame processor for timeout-based callback execution.""" import asyncio -from typing import Awaitable, Callable, List, Optional +from collections.abc import Awaitable, Callable from pipecat.frames.frames import Frame, StartFrame from pipecat.processors.frame_processor import FrameDirection, FrameProcessor @@ -26,7 +26,7 @@ class IdleFrameProcessor(FrameProcessor): *, callback: Callable[["IdleFrameProcessor"], Awaitable[None]], timeout: float, - types: Optional[List[type]] = None, + types: list[type] | None = None, **kwargs, ): """Initialize the idle frame processor. @@ -86,5 +86,5 @@ class IdleFrameProcessor(FrameProcessor): try: await asyncio.wait_for(self._idle_event.wait(), timeout=self._timeout) self._idle_event.clear() - except asyncio.TimeoutError: + except TimeoutError: await self._callback(self) diff --git a/src/pipecat/processors/logger.py b/src/pipecat/processors/logger.py index 2e50c5a75..6cfea1b02 100644 --- a/src/pipecat/processors/logger.py +++ b/src/pipecat/processors/logger.py @@ -6,8 +6,6 @@ """Frame logging utilities for debugging and monitoring frame flow in Pipecat pipelines.""" -from typing import Optional, Tuple, Type - from loguru import logger from pipecat.frames.frames import ( @@ -33,8 +31,8 @@ class FrameLogger(FrameProcessor): def __init__( self, prefix="Frame", - color: Optional[str] = None, - ignored_frame_types: Tuple[Type[Frame], ...] = ( + color: str | None = None, + ignored_frame_types: tuple[type[Frame], ...] = ( BotSpeakingFrame, UserSpeakingFrame, InputAudioRawFrame, diff --git a/src/pipecat/processors/metrics/frame_processor_metrics.py b/src/pipecat/processors/metrics/frame_processor_metrics.py index 7a52895a2..18ef7f580 100644 --- a/src/pipecat/processors/metrics/frame_processor_metrics.py +++ b/src/pipecat/processors/metrics/frame_processor_metrics.py @@ -7,7 +7,6 @@ """Frame processor metrics collection and reporting.""" import time -from typing import Optional from loguru import logger @@ -70,7 +69,7 @@ class FrameProcessorMetrics(BaseObject): return self._task_manager @property - def ttfb(self) -> Optional[float]: + def ttfb(self) -> float | None: """Get the current TTFB value in seconds. Returns: @@ -110,7 +109,7 @@ class FrameProcessorMetrics(BaseObject): self._core_metrics_data = MetricsData(processor=name) async def start_ttfb_metrics( - self, *, start_time: Optional[float] = None, report_only_initial_ttfb: bool + self, *, start_time: float | None = None, report_only_initial_ttfb: bool ): """Start measuring time-to-first-byte (TTFB). @@ -124,7 +123,7 @@ class FrameProcessorMetrics(BaseObject): self._last_ttfb_time = 0 self._should_report_ttfb = not report_only_initial_ttfb - async def stop_ttfb_metrics(self, *, end_time: Optional[float] = None): + async def stop_ttfb_metrics(self, *, end_time: float | None = None): """Stop TTFB measurement and generate metrics frame. Args: @@ -147,7 +146,7 @@ class FrameProcessorMetrics(BaseObject): self._start_ttfb_time = 0 return MetricsFrame(data=[ttfb]) - async def start_processing_metrics(self, *, start_time: Optional[float] = None): + async def start_processing_metrics(self, *, start_time: float | None = None): """Start measuring processing time. Args: @@ -156,7 +155,7 @@ class FrameProcessorMetrics(BaseObject): """ self._start_processing_time = start_time or time.time() - async def stop_processing_metrics(self, *, end_time: Optional[float] = None): + async def stop_processing_metrics(self, *, end_time: float | None = None): """Stop processing time measurement and generate metrics frame. Args: diff --git a/src/pipecat/processors/metrics/sentry.py b/src/pipecat/processors/metrics/sentry.py index c865ee470..b043b9058 100644 --- a/src/pipecat/processors/metrics/sentry.py +++ b/src/pipecat/processors/metrics/sentry.py @@ -7,7 +7,6 @@ """Sentry integration for frame processor metrics.""" import asyncio -from typing import Optional from loguru import logger @@ -72,7 +71,7 @@ class SentryMetrics(FrameProcessorMetrics): sentry_sdk.flush(timeout=5.0) async def start_ttfb_metrics( - self, *, start_time: Optional[float] = None, report_only_initial_ttfb: bool + self, *, start_time: float | None = None, report_only_initial_ttfb: bool ): """Start tracking time-to-first-byte metrics. @@ -93,7 +92,7 @@ class SentryMetrics(FrameProcessorMetrics): f"{self} Sentry transaction started (ID: {self._ttfb_metrics_tx.span_id} Name: {self._ttfb_metrics_tx.name})" ) - async def stop_ttfb_metrics(self, *, end_time: Optional[float] = None): + async def stop_ttfb_metrics(self, *, end_time: float | None = None): """Stop tracking time-to-first-byte metrics. Args: @@ -105,7 +104,7 @@ class SentryMetrics(FrameProcessorMetrics): await self._sentry_queue.put(self._ttfb_metrics_tx) self._ttfb_metrics_tx = None - async def start_processing_metrics(self, *, start_time: Optional[float] = None): + async def start_processing_metrics(self, *, start_time: float | None = None): """Start tracking frame processing metrics. Args: @@ -122,7 +121,7 @@ class SentryMetrics(FrameProcessorMetrics): f"{self} Sentry transaction started (ID: {self._processing_metrics_tx.span_id} Name: {self._processing_metrics_tx.name})" ) - async def stop_processing_metrics(self, *, end_time: Optional[float] = None): + async def stop_processing_metrics(self, *, end_time: float | None = None): """Stop tracking frame processing metrics. Args: diff --git a/src/pipecat/processors/producer_processor.py b/src/pipecat/processors/producer_processor.py index 3de6efc40..c70c066f1 100644 --- a/src/pipecat/processors/producer_processor.py +++ b/src/pipecat/processors/producer_processor.py @@ -7,7 +7,7 @@ """Producer processor for frame filtering and distribution.""" import asyncio -from typing import Awaitable, Callable, List +from collections.abc import Awaitable, Callable from pipecat.frames.frames import Frame from pipecat.processors.frame_processor import FrameDirection, FrameProcessor @@ -55,7 +55,7 @@ class ProducerProcessor(FrameProcessor): self._filter = filter self._transformer = transformer self._passthrough = passthrough - self._consumers: List[asyncio.Queue] = [] + self._consumers: list[asyncio.Queue] = [] def add_consumer(self): """Add a new consumer and return its associated queue. diff --git a/src/pipecat/processors/text_transformer.py b/src/pipecat/processors/text_transformer.py index c97ef4e46..dffee7ac4 100644 --- a/src/pipecat/processors/text_transformer.py +++ b/src/pipecat/processors/text_transformer.py @@ -6,7 +6,7 @@ """Stateless text transformation processor for Pipecat.""" -from typing import Callable, Coroutine, Union +from collections.abc import Callable, Coroutine from pipecat.frames.frames import Frame, TextFrame from pipecat.processors.frame_processor import FrameDirection, FrameProcessor @@ -21,7 +21,7 @@ class StatelessTextTransformer(FrameProcessor): """ def __init__( - self, transform_fn: Union[Callable[[str], str], Callable[[str], Coroutine[None, None, str]]] + self, transform_fn: Callable[[str], str] | Callable[[str], Coroutine[None, None, str]] ): """Initialize the text transformer. diff --git a/src/pipecat/runner/daily.py b/src/pipecat/runner/daily.py index 082d7271c..bc80f0641 100644 --- a/src/pipecat/runner/daily.py +++ b/src/pipecat/runner/daily.py @@ -37,7 +37,6 @@ Example:: import os import time import uuid -from typing import Dict, List, Optional import aiohttp from loguru import logger @@ -64,7 +63,7 @@ class DailyRoomConfig(BaseModel): room_url: str token: str - sip_endpoint: Optional[str] = None + sip_endpoint: str | None = None def __iter__(self): """Enable tuple unpacking for backward compatibility. @@ -78,18 +77,18 @@ class DailyRoomConfig(BaseModel): async def configure( aiohttp_session: aiohttp.ClientSession, *, - api_key: Optional[str] = None, + api_key: str | None = None, room_exp_duration: float = 2.0, token_exp_duration: float = 2.0, - sip_caller_phone: Optional[str] = None, + sip_caller_phone: str | None = None, sip_enable_video: bool = False, sip_num_endpoints: int = 1, enable_dialout: bool = False, - sip_codecs: Optional[Dict[str, List[str]]] = None, - sip_provider: Optional[str] = None, - room_geo: Optional[str] = None, - room_properties: Optional[DailyRoomProperties] = None, - token_properties: Optional[DailyMeetingTokenProperties] = None, + sip_codecs: dict[str, list[str]] | None = None, + sip_provider: str | None = None, + room_geo: str | None = None, + room_properties: DailyRoomProperties | None = None, + token_properties: DailyMeetingTokenProperties | None = None, ) -> DailyRoomConfig: """Configure Daily room URL and token with optional SIP capabilities. diff --git a/src/pipecat/runner/livekit.py b/src/pipecat/runner/livekit.py index 4eef59207..1be30f3d0 100644 --- a/src/pipecat/runner/livekit.py +++ b/src/pipecat/runner/livekit.py @@ -30,7 +30,6 @@ Example:: import argparse import os -from typing import Optional from livekit import api from loguru import logger @@ -98,7 +97,7 @@ async def configure(): return (url, token, room_name) -async def configure_with_args(parser: Optional[argparse.ArgumentParser] = None): +async def configure_with_args(parser: argparse.ArgumentParser | None = None): """Configure LiveKit room with command-line argument parsing. Args: diff --git a/src/pipecat/runner/run.py b/src/pipecat/runner/run.py index 90f7b8ce1..c6d43fbbd 100644 --- a/src/pipecat/runner/run.py +++ b/src/pipecat/runner/run.py @@ -74,7 +74,7 @@ import uuid from contextlib import asynccontextmanager from http import HTTPMethod from pathlib import Path -from typing import Any, Dict, List, Optional, TypedDict, Union +from typing import Any, TypedDict import aiohttp from fastapi.responses import FileResponse, Response @@ -106,7 +106,7 @@ os.environ["ENV"] = "local" TELEPHONY_TRANSPORTS = ["twilio", "telnyx", "plivo", "exotel"] -RUNNER_DOWNLOADS_FOLDER: Optional[str] = None +RUNNER_DOWNLOADS_FOLDER: str | None = None RUNNER_HOST: str = "localhost" RUNNER_PORT: int = 7860 @@ -220,17 +220,17 @@ def _setup_webrtc_routes(app: FastAPI, args: argparse.Namespace): return class IceServer(TypedDict, total=False): - urls: Union[str, List[str]] + urls: str | list[str] class IceConfig(TypedDict): - iceServers: List[IceServer] + iceServers: list[IceServer] class StartBotResult(TypedDict, total=False): sessionId: str - iceConfig: Optional[IceConfig] + iceConfig: IceConfig | None # In-memory store of active sessions: session_id -> session info - active_sessions: Dict[str, Dict[str, Any]] = {} + active_sessions: dict[str, dict[str, Any]] = {} # Mount the frontend app.mount("/client", SmallWebRTCPrebuiltUI) @@ -418,7 +418,7 @@ def _setup_whatsapp_routes(app: FastAPI, args: argparse.Namespace): return # Global WhatsApp client instance - whatsapp_client: Optional[WhatsAppClient] = None + whatsapp_client: WhatsAppClient | None = None @app.get( "/whatsapp", @@ -857,7 +857,7 @@ def _validate_and_clean_proxy(proxy: str) -> str: return proxy -def runner_downloads_folder() -> Optional[str]: +def runner_downloads_folder() -> str | None: """Returns the folder where files are stored for later download.""" return RUNNER_DOWNLOADS_FOLDER @@ -872,7 +872,7 @@ def runner_port() -> int: return RUNNER_PORT -def main(parser: Optional[argparse.ArgumentParser] = None): +def main(parser: argparse.ArgumentParser | None = None): """Start the Pipecat development runner. Parses command-line arguments and starts a FastAPI server configured diff --git a/src/pipecat/runner/types.py b/src/pipecat/runner/types.py index e48f10a08..055824a22 100644 --- a/src/pipecat/runner/types.py +++ b/src/pipecat/runner/types.py @@ -12,7 +12,7 @@ information to bot functions. import argparse from dataclasses import dataclass, field -from typing import Any, Dict, Optional +from typing import Any from fastapi import WebSocket from pydantic import BaseModel @@ -34,9 +34,9 @@ class DialinSettings(BaseModel): call_id: str call_domain: str - To: Optional[str] = None - From: Optional[str] = None - sip_headers: Optional[Dict[str, str]] = None + To: str | None = None + From: str | None = None + sip_headers: dict[str, str] | None = None class DailyDialinRequest(BaseModel): @@ -64,8 +64,8 @@ class RunnerArguments: handle_sigint: bool = field(init=False, kw_only=True) handle_sigterm: bool = field(init=False, kw_only=True) pipeline_idle_timeout_secs: int = field(init=False, kw_only=True) - body: Optional[Any] = field(default_factory=dict, kw_only=True) - cli_args: Optional[argparse.Namespace] = field(default=None, init=False, kw_only=True) + body: Any | None = field(default_factory=dict, kw_only=True) + cli_args: argparse.Namespace | None = field(default=None, init=False, kw_only=True) def __post_init__(self): self.handle_sigint = False @@ -84,7 +84,7 @@ class DailyRunnerArguments(RunnerArguments): """ room_url: str - token: Optional[str] = None + token: str | None = None @dataclass @@ -122,4 +122,4 @@ class LiveKitRunnerArguments(RunnerArguments): room_name: str url: str - token: Optional[str] = None + token: str | None = None diff --git a/src/pipecat/runner/utils.py b/src/pipecat/runner/utils.py index d0bb44a88..7a4b3034c 100644 --- a/src/pipecat/runner/utils.py +++ b/src/pipecat/runner/utils.py @@ -32,7 +32,8 @@ Example:: import json import os import re -from typing import Any, Callable, Dict, Optional +from collections.abc import Callable +from typing import Any from fastapi import WebSocket from loguru import logger @@ -373,7 +374,7 @@ def _smallwebrtc_sdp_cleanup_fingerprints(text: str) -> str: return "\r\n".join(result) + "\r\n" -def smallwebrtc_sdp_munging(sdp: str, host: Optional[str]) -> str: +def smallwebrtc_sdp_munging(sdp: str, host: str | None) -> str: """Apply SDP modifications for SmallWebRTC compatibility. Args: @@ -389,7 +390,7 @@ def smallwebrtc_sdp_munging(sdp: str, host: Optional[str]) -> str: return sdp -def _get_transport_params(transport_key: str, transport_params: Dict[str, Callable]) -> Any: +def _get_transport_params(transport_key: str, transport_params: dict[str, Callable]) -> Any: """Get transport parameters from factory function. Args: @@ -415,7 +416,7 @@ def _get_transport_params(transport_key: str, transport_params: Dict[str, Callab async def _create_telephony_transport( websocket: WebSocket, - params: Optional[Any] = None, + params: Any | None = None, transport_type: str = None, call_data: dict = None, ) -> BaseTransport: @@ -488,7 +489,7 @@ async def _create_telephony_transport( async def create_transport( - runner_args: Any, transport_params: Dict[str, Callable] + runner_args: Any, transport_params: dict[str, Callable] ) -> BaseTransport: """Create a transport from runner arguments using factory functions. diff --git a/src/pipecat/serializers/base_serializer.py b/src/pipecat/serializers/base_serializer.py index d9414e43d..7c354d253 100644 --- a/src/pipecat/serializers/base_serializer.py +++ b/src/pipecat/serializers/base_serializer.py @@ -7,7 +7,6 @@ """Frame serialization interfaces for Pipecat.""" from abc import abstractmethod -from typing import Optional from pydantic import BaseModel @@ -39,7 +38,7 @@ class FrameSerializer(BaseObject): ignore_rtvi_messages: bool = True - def __init__(self, params: Optional[InputParams] = None, **kwargs): + def __init__(self, params: InputParams | None = None, **kwargs): """Initialize the FrameSerializer. Args: diff --git a/src/pipecat/serializers/exotel.py b/src/pipecat/serializers/exotel.py index abf170d65..ff2510f57 100644 --- a/src/pipecat/serializers/exotel.py +++ b/src/pipecat/serializers/exotel.py @@ -8,7 +8,6 @@ import base64 import json -from typing import Optional from loguru import logger @@ -48,10 +47,10 @@ class ExotelFrameSerializer(FrameSerializer): """ exotel_sample_rate: int = 8000 - sample_rate: Optional[int] = None + sample_rate: int | None = None def __init__( - self, stream_sid: str, call_sid: Optional[str] = None, params: Optional[InputParams] = None + self, stream_sid: str, call_sid: str | None = None, params: InputParams | None = None ): """Initialize the ExotelFrameSerializer. diff --git a/src/pipecat/serializers/genesys.py b/src/pipecat/serializers/genesys.py index 0cfdba22b..e52abc6aa 100644 --- a/src/pipecat/serializers/genesys.py +++ b/src/pipecat/serializers/genesys.py @@ -23,8 +23,8 @@ Audio Format: import json import uuid from datetime import timedelta -from enum import Enum -from typing import Any, Dict, List, Optional +from enum import StrEnum +from typing import Any from loguru import logger @@ -46,7 +46,7 @@ from pipecat.frames.frames import ( from pipecat.serializers.base_serializer import FrameSerializer -class AudioHookMessageType(str, Enum): +class AudioHookMessageType(StrEnum): """AudioHook protocol message types.""" OPEN = "open" @@ -63,7 +63,7 @@ class AudioHookMessageType(str, Enum): DISCONNECT = "disconnect" -class AudioHookChannel(str, Enum): +class AudioHookChannel(StrEnum): """AudioHook audio channel configuration.""" EXTERNAL = "external" # Customer audio only (mono) @@ -71,7 +71,7 @@ class AudioHookChannel(str, Enum): BOTH = "both" # Stereo: external=left, internal=right -class AudioHookMediaFormat(str, Enum): +class AudioHookMediaFormat(StrEnum): """Supported audio formats.""" PCMU = "PCMU" # μ-law, 8kHz @@ -146,18 +146,18 @@ class GenesysAudioHookSerializer(FrameSerializer): """ genesys_sample_rate: int = 8000 - sample_rate: Optional[int] = None + sample_rate: int | None = None channel: AudioHookChannel = AudioHookChannel.EXTERNAL media_format: AudioHookMediaFormat = AudioHookMediaFormat.PCMU process_external: bool = True process_internal: bool = False - supported_languages: Optional[List[str]] = None - selected_language: Optional[str] = None + supported_languages: list[str] | None = None + selected_language: str | None = None start_paused: bool = False def __init__( self, - params: Optional[InputParams] = None, + params: InputParams | None = None, **kwargs, ): """Initialize the GenesysAudioHookSerializer. @@ -185,12 +185,12 @@ class GenesysAudioHookSerializer(FrameSerializer): self._position = timedelta(0) # Session metadata - self._conversation_id: Optional[str] = None - self._participant: Optional[Dict[str, Any]] = None - self._custom_config: Optional[Dict[str, Any]] = None - self._media_info: Optional[List[Dict[str, Any]]] = None - self._input_variables: Optional[Dict[str, Any]] = None # Custom input from Genesys - self._output_variables: Optional[Dict[str, Any]] = None # Custom output to Genesys + self._conversation_id: str | None = None + self._participant: dict[str, Any] | None = None + self._custom_config: dict[str, Any] | None = None + self._media_info: list[dict[str, Any]] | None = None + self._input_variables: dict[str, Any] | None = None # Custom input from Genesys + self._output_variables: dict[str, Any] | None = None # Custom output to Genesys # Event handlers self._register_event_handler("on_open") @@ -207,7 +207,7 @@ class GenesysAudioHookSerializer(FrameSerializer): return self._session_id @property - def conversation_id(self) -> Optional[str]: + def conversation_id(self) -> str | None: """Get the Genesys conversation ID.""" return self._conversation_id @@ -222,21 +222,21 @@ class GenesysAudioHookSerializer(FrameSerializer): return self._is_paused @property - def participant(self) -> Optional[Dict[str, Any]]: + def participant(self) -> dict[str, Any] | None: """Get participant info (ani, dnis, etc.) from the open message.""" return self._participant @property - def input_variables(self) -> Optional[Dict[str, Any]]: + def input_variables(self) -> dict[str, Any] | None: """Get custom input variables from the open message.""" return self._input_variables @property - def output_variables(self) -> Optional[Dict[str, Any]]: + def output_variables(self) -> dict[str, Any] | None: """Get custom output variables to send back to Genesys.""" return self._output_variables - def set_output_variables(self, variables: Dict[str, Any]) -> None: + def set_output_variables(self, variables: dict[str, Any]) -> None: """Set custom output variables to send back to Genesys on close. These variables will be included in the 'closed' response when Genesys @@ -305,9 +305,9 @@ class GenesysAudioHookSerializer(FrameSerializer): def _create_message( self, msg_type: AudioHookMessageType, - parameters: Optional[Dict[str, Any]] = None, + parameters: dict[str, Any] | None = None, include_position: bool = True, - ) -> Dict[str, Any]: + ) -> dict[str, Any]: """Create a protocol message with common fields. Based on the Genesys AudioHook protocol, responses include: @@ -341,9 +341,9 @@ class GenesysAudioHookSerializer(FrameSerializer): def create_opened_response( self, start_paused: bool = False, - supported_languages: Optional[List[str]] = None, - selected_language: Optional[str] = None, - ) -> Dict[str, Any]: + supported_languages: list[str] | None = None, + selected_language: str | None = None, + ) -> dict[str, Any]: """Create an 'opened' response message for the client. This should be sent in response to an 'open' message from Genesys. @@ -397,8 +397,8 @@ class GenesysAudioHookSerializer(FrameSerializer): def create_closed_response( self, - output_variables: Optional[Dict[str, Any]] = None, - ) -> Dict[str, Any]: + output_variables: dict[str, Any] | None = None, + ) -> dict[str, Any]: """Create a 'closed' response message. This should be sent in response to a 'close' message from Genesys. @@ -422,7 +422,7 @@ class GenesysAudioHookSerializer(FrameSerializer): } ) """ - parameters: Optional[Dict[str, Any]] = None + parameters: dict[str, Any] | None = None if output_variables: parameters = {"outputVariables": output_variables} @@ -437,7 +437,7 @@ class GenesysAudioHookSerializer(FrameSerializer): return msg - def create_pong_response(self) -> Dict[str, Any]: + def create_pong_response(self) -> dict[str, Any]: """Create a 'pong' response message. This should be sent in response to a 'ping' message from Genesys. @@ -448,7 +448,7 @@ class GenesysAudioHookSerializer(FrameSerializer): msg = self._create_message(AudioHookMessageType.PONG) return msg - def create_resumed_response(self) -> Dict[str, Any]: + def create_resumed_response(self) -> dict[str, Any]: """Create a 'resumed' response message. This should be sent in response to a 'pause' message when ready to resume. @@ -463,7 +463,7 @@ class GenesysAudioHookSerializer(FrameSerializer): return msg - def create_barge_in_event(self) -> Dict[str, Any]: + def create_barge_in_event(self) -> dict[str, Any]: """Create a barge-in event message. This notifies Genesys Cloud that the user has interrupted the bot's @@ -485,9 +485,9 @@ class GenesysAudioHookSerializer(FrameSerializer): self, reason: str = "completed", action: str = "transfer", - output_variables: Optional[Dict[str, Any]] = None, - info: Optional[str] = None, - ) -> Dict[str, Any]: + output_variables: dict[str, Any] | None = None, + info: str | None = None, + ) -> dict[str, Any]: """Create a 'disconnect' message to initiate session termination. Args: @@ -499,7 +499,7 @@ class GenesysAudioHookSerializer(FrameSerializer): Returns: Dictionary of the disconnect message. """ - parameters: Dict[str, Any] = {"reason": reason} + parameters: dict[str, Any] = {"reason": reason} # Build outputVariables out_vars = {"action": action} @@ -523,7 +523,7 @@ class GenesysAudioHookSerializer(FrameSerializer): code: int, message: str, retryable: bool = False, - ) -> Dict[str, Any]: + ) -> dict[str, Any]: """Create an 'error' message. Args: @@ -700,7 +700,7 @@ class GenesysAudioHookSerializer(FrameSerializer): return audio_frame - async def _handle_control_message(self, message: Dict[str, Any]) -> Frame | None: + async def _handle_control_message(self, message: dict[str, Any]) -> Frame | None: """Handle a JSON control message from Genesys. Args: @@ -748,7 +748,7 @@ class GenesysAudioHookSerializer(FrameSerializer): logger.warning(f"Unknown AudioHook message type: {msg_type}") return None - async def _handle_open(self, message: Dict[str, Any]) -> Frame | None: + async def _handle_open(self, message: dict[str, Any]) -> Frame | None: """Handle an 'open' message from Genesys. This initializes the session with metadata from Genesys Cloud and @@ -781,7 +781,7 @@ class GenesysAudioHookSerializer(FrameSerializer): # media is a list like: [{"type": "audio", "format": "PCMU", "channels": ["external"], "rate": 8000}] media_list = self._media_info if media_list and isinstance(media_list, list) and len(media_list) > 0: - audio_media: Dict[str, Any] = media_list[0] # Get first media entry + audio_media: dict[str, Any] = media_list[0] # Get first media entry channels = audio_media.get("channels", []) logger.debug( f"📡 Genesys audio config: format={audio_media.get('format')}, channels={channels}, rate={audio_media.get('rate')}" @@ -815,7 +815,7 @@ class GenesysAudioHookSerializer(FrameSerializer): ) ) - async def _handle_close(self, message: Dict[str, Any]) -> Frame | None: + async def _handle_close(self, message: dict[str, Any]) -> Frame | None: """Handle a 'close' message from Genesys. Automatically responds with a 'closed' message. If output_variables @@ -846,7 +846,7 @@ class GenesysAudioHookSerializer(FrameSerializer): message=self.create_closed_response(output_variables=self._output_variables) ) - async def _handle_ping(self, message: Dict[str, Any]) -> Frame | None: + async def _handle_ping(self, message: dict[str, Any]) -> Frame | None: """Handle a 'ping' message from Genesys. Automatically responds with a 'pong' message to maintain the connection. @@ -864,7 +864,7 @@ class GenesysAudioHookSerializer(FrameSerializer): # Return as urgent frame to be sent through pipeline immediately return OutputTransportMessageUrgentFrame(message=self.create_pong_response()) - async def _handle_pause(self, message: Dict[str, Any]) -> Frame | None: + async def _handle_pause(self, message: dict[str, Any]) -> Frame | None: """Handle a 'pause' message from Genesys. This is used when audio streaming is temporarily suspended @@ -888,7 +888,7 @@ class GenesysAudioHookSerializer(FrameSerializer): # Note: Application should call create_resumed_response() when ready return None - async def _handle_update(self, message: Dict[str, Any]) -> Frame | None: + async def _handle_update(self, message: dict[str, Any]) -> Frame | None: """Handle an 'update' message from Genesys. Updates may include changes to participants or configuration. @@ -910,7 +910,7 @@ class GenesysAudioHookSerializer(FrameSerializer): return None - async def _handle_error(self, message: Dict[str, Any]) -> Frame | None: + async def _handle_error(self, message: dict[str, Any]) -> Frame | None: """Handle an 'error' message from Genesys. Args: @@ -929,7 +929,7 @@ class GenesysAudioHookSerializer(FrameSerializer): return None - async def _handle_dtmf(self, message: Dict[str, Any]) -> Frame | None: + async def _handle_dtmf(self, message: dict[str, Any]) -> Frame | None: """Handle a 'dtmf' message from Genesys. DTMF (Dual-Tone Multi-Frequency) events are sent when the user diff --git a/src/pipecat/serializers/plivo.py b/src/pipecat/serializers/plivo.py index b6346d542..e86d18b8c 100644 --- a/src/pipecat/serializers/plivo.py +++ b/src/pipecat/serializers/plivo.py @@ -8,7 +8,6 @@ import base64 import json -from typing import Optional from loguru import logger @@ -52,16 +51,16 @@ class PlivoFrameSerializer(FrameSerializer): """ plivo_sample_rate: int = 8000 - sample_rate: Optional[int] = None + sample_rate: int | None = None auto_hang_up: bool = True def __init__( self, stream_id: str, - call_id: Optional[str] = None, - auth_id: Optional[str] = None, - auth_token: Optional[str] = None, - params: Optional[InputParams] = None, + call_id: str | None = None, + auth_id: str | None = None, + auth_token: str | None = None, + params: InputParams | None = None, ): """Initialize the PlivoFrameSerializer. diff --git a/src/pipecat/serializers/protobuf.py b/src/pipecat/serializers/protobuf.py index 66f4d0daa..78d20fa24 100644 --- a/src/pipecat/serializers/protobuf.py +++ b/src/pipecat/serializers/protobuf.py @@ -8,7 +8,6 @@ import dataclasses import json -from typing import Optional from loguru import logger @@ -61,7 +60,7 @@ class ProtobufFrameSerializer(FrameSerializer): } DESERIALIZABLE_FIELDS = {v: k for k, v in DESERIALIZABLE_TYPES.items()} - def __init__(self, params: Optional[FrameSerializer.InputParams] = None): + def __init__(self, params: FrameSerializer.InputParams | None = None): """Initialize the Protobuf frame serializer. Args: diff --git a/src/pipecat/serializers/telnyx.py b/src/pipecat/serializers/telnyx.py index 1c0405ade..0d74664ab 100644 --- a/src/pipecat/serializers/telnyx.py +++ b/src/pipecat/serializers/telnyx.py @@ -8,7 +8,6 @@ import base64 import json -from typing import Optional import aiohttp from loguru import logger @@ -59,7 +58,7 @@ class TelnyxFrameSerializer(FrameSerializer): """ telnyx_sample_rate: int = 8000 - sample_rate: Optional[int] = None + sample_rate: int | None = None inbound_encoding: str = "PCMU" outbound_encoding: str = "PCMU" auto_hang_up: bool = True @@ -69,9 +68,9 @@ class TelnyxFrameSerializer(FrameSerializer): stream_id: str, outbound_encoding: str, inbound_encoding: str, - call_control_id: Optional[str] = None, - api_key: Optional[str] = None, - params: Optional[InputParams] = None, + call_control_id: str | None = None, + api_key: str | None = None, + params: InputParams | None = None, ): """Initialize the TelnyxFrameSerializer. diff --git a/src/pipecat/serializers/twilio.py b/src/pipecat/serializers/twilio.py index 4d4b5344a..857610b4e 100644 --- a/src/pipecat/serializers/twilio.py +++ b/src/pipecat/serializers/twilio.py @@ -8,7 +8,6 @@ import base64 import json -from typing import Optional from loguru import logger @@ -52,18 +51,18 @@ class TwilioFrameSerializer(FrameSerializer): """ twilio_sample_rate: int = 8000 - sample_rate: Optional[int] = None + sample_rate: int | None = None auto_hang_up: bool = True def __init__( self, stream_sid: str, - call_sid: Optional[str] = None, - account_sid: Optional[str] = None, - auth_token: Optional[str] = None, - region: Optional[str] = None, - edge: Optional[str] = None, - params: Optional[InputParams] = None, + call_sid: str | None = None, + account_sid: str | None = None, + auth_token: str | None = None, + region: str | None = None, + edge: str | None = None, + params: InputParams | None = None, ): """Initialize the TwilioFrameSerializer. diff --git a/src/pipecat/serializers/vonage.py b/src/pipecat/serializers/vonage.py index c14ae4025..d778cf62c 100644 --- a/src/pipecat/serializers/vonage.py +++ b/src/pipecat/serializers/vonage.py @@ -7,7 +7,6 @@ """Vonage Audio Connector WebSocket serializer for Pipecat.""" import json -from typing import Optional from loguru import logger @@ -47,9 +46,9 @@ class VonageFrameSerializer(FrameSerializer): """ vonage_sample_rate: int = 16000 - sample_rate: Optional[int] = None + sample_rate: int | None = None - def __init__(self, params: Optional[InputParams] = None): + def __init__(self, params: InputParams | None = None): """Initialize the VonageFrameSerializer. Args: diff --git a/src/pipecat/services/ai_service.py b/src/pipecat/services/ai_service.py index dd9ef1dba..5d914dd00 100644 --- a/src/pipecat/services/ai_service.py +++ b/src/pipecat/services/ai_service.py @@ -11,7 +11,8 @@ model management, settings handling, and frame processing lifecycle methods. """ import warnings -from typing import Any, AsyncGenerator, Dict +from collections.abc import AsyncGenerator +from typing import Any from loguru import logger @@ -51,7 +52,7 @@ class AIService(FrameProcessor): or ServiceSettings() ) self._sync_model_name_to_metrics() - self._session_properties: Dict[str, Any] = {} + self._session_properties: dict[str, Any] = {} self._tracing_enabled: bool = False self._tracing_context = None @@ -104,7 +105,7 @@ class AIService(FrameProcessor): """ pass - async def _update_settings(self, delta: ServiceSettings) -> Dict[str, Any]: + async def _update_settings(self, delta: ServiceSettings) -> dict[str, Any]: """Apply a settings delta and return the changed fields. The delta is applied to ``_settings`` and a dict mapping each changed diff --git a/src/pipecat/services/anthropic/llm.py b/src/pipecat/services/anthropic/llm.py index f51e7927e..03286bee3 100644 --- a/src/pipecat/services/anthropic/llm.py +++ b/src/pipecat/services/anthropic/llm.py @@ -14,7 +14,7 @@ import asyncio import json import re from dataclasses import dataclass, field -from typing import Any, Dict, Literal, Optional, Union +from typing import Any, Literal, Optional, Union import httpx from loguru import logger @@ -66,7 +66,7 @@ class AnthropicThinkingConfig(BaseModel): # No client-side validation on budget_tokens — we let the server # enforce the rules so we stay forward-compatible if they change. - budget_tokens: Optional[int] = None + budget_tokens: int | None = None @dataclass @@ -133,26 +133,26 @@ class AnthropicLLMService(LLMService): extra: Additional parameters to pass to the API. """ - enable_prompt_caching: Optional[bool] = None - max_tokens: Optional[int] = Field(default_factory=lambda: 4096, ge=1) - temperature: Optional[float] = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=1.0) - top_k: Optional[int] = Field(default_factory=lambda: NOT_GIVEN, ge=0) - top_p: Optional[float] = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=1.0) + enable_prompt_caching: bool | None = None + max_tokens: int | None = Field(default_factory=lambda: 4096, ge=1) + temperature: float | None = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=1.0) + top_k: int | None = Field(default_factory=lambda: NOT_GIVEN, ge=0) + top_p: float | None = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=1.0) thinking: Optional["AnthropicLLMService.ThinkingConfig"] = Field( default_factory=lambda: NOT_GIVEN ) - extra: Optional[Dict[str, Any]] = Field(default_factory=dict) + extra: dict[str, Any] | None = Field(default_factory=dict) def __init__( self, *, api_key: str, - model: Optional[str] = None, - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, + model: str | None = None, + params: InputParams | None = None, + settings: Settings | None = None, client=None, - retry_timeout_secs: Optional[float] = 5.0, - retry_on_timeout: Optional[bool] = False, + retry_timeout_secs: float | None = 5.0, + retry_on_timeout: bool | None = False, **kwargs, ): """Initialize the Anthropic LLM service. @@ -251,7 +251,7 @@ class AnthropicLLMService(LLMService): api_call(**params), timeout=self._retry_timeout_secs ) return response - except (APITimeoutError, asyncio.TimeoutError): + except (TimeoutError, APITimeoutError): # Retry, this time without a timeout so we get a response logger.debug(f"{self}: Retrying message creation due to timeout") response = await api_call(**params) @@ -263,9 +263,9 @@ class AnthropicLLMService(LLMService): async def run_inference( self, context: LLMContext, - max_tokens: Optional[int] = None, - system_instruction: Optional[str] = None, - ) -> Optional[str]: + max_tokens: int | None = None, + system_instruction: str | None = None, + ) -> str | None: """Run a one-shot, out-of-band (i.e. out-of-pipeline) inference with the given LLM context. Args: diff --git a/src/pipecat/services/assemblyai/models.py b/src/pipecat/services/assemblyai/models.py index cffebcf06..dd223e2be 100644 --- a/src/pipecat/services/assemblyai/models.py +++ b/src/pipecat/services/assemblyai/models.py @@ -10,7 +10,7 @@ This module defines Pydantic models for handling AssemblyAI's real-time transcription WebSocket messages and connection configuration. """ -from typing import List, Literal, Optional +from typing import Literal from loguru import logger from pydantic import BaseModel, ConfigDict, Field, model_validator @@ -85,10 +85,10 @@ class TurnMessage(BaseMessage): end_of_turn: bool transcript: str end_of_turn_confidence: float - words: List[Word] - language_code: Optional[str] = None - language_confidence: Optional[float] = None - speaker: Optional[str] = Field(default=None, alias="speaker_label") + words: list[Word] + language_code: str | None = None + language_confidence: float | None = None + speaker: str | None = Field(default=None, alias="speaker_label") class SpeechStartedMessage(BaseMessage): @@ -158,19 +158,19 @@ class AssemblyAIConnectionParams(BaseModel): sample_rate: int = 16000 encoding: Literal["pcm_s16le", "pcm_mulaw"] = "pcm_s16le" - end_of_turn_confidence_threshold: Optional[float] = None - min_turn_silence: Optional[int] = None - min_end_of_turn_silence_when_confident: Optional[int] = None # Deprecated - max_turn_silence: Optional[int] = None - keyterms_prompt: Optional[List[str]] = None - prompt: Optional[str] = None + end_of_turn_confidence_threshold: float | None = None + min_turn_silence: int | None = None + min_end_of_turn_silence_when_confident: int | None = None # Deprecated + max_turn_silence: int | None = None + keyterms_prompt: list[str] | None = None + prompt: str | None = None speech_model: Literal[ "universal-streaming-english", "universal-streaming-multilingual", "u3-rt-pro" ] = "u3-rt-pro" - language_detection: Optional[bool] = None + language_detection: bool | None = None format_turns: bool = True - speaker_labels: Optional[bool] = None - vad_threshold: Optional[float] = None + speaker_labels: bool | None = None + vad_threshold: float | None = None @model_validator(mode="after") def handle_deprecated_param(self): diff --git a/src/pipecat/services/assemblyai/stt.py b/src/pipecat/services/assemblyai/stt.py index 8b273188f..dbaa61c86 100644 --- a/src/pipecat/services/assemblyai/stt.py +++ b/src/pipecat/services/assemblyai/stt.py @@ -12,8 +12,9 @@ WebSocket API for streaming audio transcription. import asyncio import json +from collections.abc import AsyncGenerator from dataclasses import dataclass, field -from typing import Any, AsyncGenerator, Dict, List, Optional +from typing import Any from urllib.parse import urlencode from loguru import logger @@ -114,7 +115,7 @@ class AssemblyAISTTSettings(STTSettings): ) min_turn_silence: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) max_turn_silence: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - keyterms_prompt: List[str] | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + keyterms_prompt: list[str] | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) prompt: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) language_detection: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) format_turns: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) @@ -148,16 +149,16 @@ class AssemblyAISTTService(WebsocketSTTService): self, *, api_key: str, - language: Optional[Language] = None, + language: Language | None = None, api_endpoint_base_url: str = "wss://streaming.assemblyai.com/v3/ws", sample_rate: int = 16000, encoding: str = "pcm_s16le", - connection_params: Optional[AssemblyAIConnectionParams] = None, + connection_params: AssemblyAIConnectionParams | None = None, vad_force_turn_endpoint: bool = True, should_interrupt: bool = True, - speaker_format: Optional[str] = None, - settings: Optional[Settings] = None, - ttfs_p99_latency: Optional[float] = ASSEMBLYAI_TTFS_P99, + speaker_format: str | None = None, + settings: Settings | None = None, + ttfs_p99_latency: float | None = ASSEMBLYAI_TTFS_P99, **kwargs, ): """Initialize the AssemblyAI STT service. @@ -543,7 +544,7 @@ class AssemblyAISTTService(WebsocketSTTService): try: await asyncio.wait_for(self._termination_event.wait(), timeout=5.0) - except asyncio.TimeoutError: + except TimeoutError: logger.warning("Timed out waiting for termination message from server") except Exception as e: @@ -625,7 +626,7 @@ class AssemblyAISTTService(WebsocketSTTService): except json.JSONDecodeError: logger.warning(f"Received non-JSON message: {message}") - def _parse_message(self, message: Dict[str, Any]) -> BaseMessage: + def _parse_message(self, message: dict[str, Any]) -> BaseMessage: """Parse a raw message into the appropriate message type.""" msg_type = message.get("type") @@ -640,7 +641,7 @@ class AssemblyAISTTService(WebsocketSTTService): else: raise ValueError(f"Unknown message type: {msg_type}") - async def _handle_message(self, message: Dict[str, Any]): + async def _handle_message(self, message: dict[str, Any]): """Handle AssemblyAI WebSocket messages.""" try: parsed_message = self._parse_message(message) diff --git a/src/pipecat/services/asyncai/tts.py b/src/pipecat/services/asyncai/tts.py index 0f140281d..3d8750f0c 100644 --- a/src/pipecat/services/asyncai/tts.py +++ b/src/pipecat/services/asyncai/tts.py @@ -9,8 +9,9 @@ import asyncio import base64 import json +from collections.abc import AsyncGenerator from dataclasses import dataclass -from typing import Any, AsyncGenerator, Optional +from typing import Any import aiohttp from loguru import logger @@ -41,7 +42,7 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") -def language_to_async_language(language: Language) -> Optional[str]: +def language_to_async_language(language: Language) -> str | None: """Convert a Language enum to Async language code. Args: @@ -98,23 +99,23 @@ class AsyncAITTSService(WebsocketTTSService): language: Language to use for synthesis. """ - language: Optional[Language] = None + language: Language | None = None def __init__( self, *, api_key: str, - voice_id: Optional[str] = None, + voice_id: str | None = None, version: str = "v1", url: str = "wss://api.async.com/text_to_speech/websocket/ws", - model: Optional[str] = None, - sample_rate: Optional[int] = None, + model: str | None = None, + sample_rate: int | None = None, encoding: str = "pcm_s16le", container: str = "raw", - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, - aggregate_sentences: Optional[bool] = None, - text_aggregation_mode: Optional[TextAggregationMode] = None, + params: InputParams | None = None, + settings: Settings | None = None, + aggregate_sentences: bool | None = None, + text_aggregation_mode: TextAggregationMode | None = None, **kwargs, ): """Initialize the Async TTS service. @@ -222,7 +223,7 @@ class AsyncAITTSService(WebsocketTTSService): """ return True - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: """Convert a Language enum to Async language format. Args: @@ -339,7 +340,7 @@ class AsyncAITTSService(WebsocketTTSService): return self._websocket raise Exception("Websocket not connected") - async def flush_audio(self, context_id: Optional[str] = None): + async def flush_audio(self, context_id: str | None = None): """Flush any pending audio. Args: @@ -499,22 +500,22 @@ class AsyncAIHttpTTSService(TTSService): language: Language to use for synthesis. """ - language: Optional[Language] = None + language: Language | None = None def __init__( self, *, api_key: str, - voice_id: Optional[str] = None, + voice_id: str | None = None, aiohttp_session: aiohttp.ClientSession, - model: Optional[str] = None, + model: str | None = None, url: str = "https://api.async.com", version: str = "v1", - sample_rate: Optional[int] = None, + sample_rate: int | None = None, encoding: str = "pcm_s16le", container: str = "raw", - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, + params: InputParams | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize the Async TTS service. @@ -598,7 +599,7 @@ class AsyncAIHttpTTSService(TTSService): """ return True - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: """Convert a Language enum to Async language format. Args: diff --git a/src/pipecat/services/aws/agent_core.py b/src/pipecat/services/aws/agent_core.py index d66af9c5b..2f1560b7c 100644 --- a/src/pipecat/services/aws/agent_core.py +++ b/src/pipecat/services/aws/agent_core.py @@ -13,7 +13,7 @@ Amazon Bedrock AgentCore Runtime and streams their responses as LLMTextFrames. import asyncio import json import os -from typing import Callable, Optional +from collections.abc import Callable import aioboto3 from loguru import logger @@ -31,7 +31,7 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor def default_context_to_payload_transformer( context: LLMContext, -) -> Optional[str]: +) -> str | None: """Default transformer to create AgentCore payload from LLM context. Extracts the latest user or system message text and wraps it in {"prompt": ""}. @@ -68,7 +68,7 @@ def default_context_to_payload_transformer( return json.dumps({"prompt": prompt}) -def default_response_to_output_transformer(response_line: str) -> Optional[str]: +def default_response_to_output_transformer(response_line: str) -> str | None: """Default transformer to extract output text from AgentCore response. Expects responses with {"response": ""} format. @@ -110,12 +110,12 @@ class AWSAgentCoreProcessor(FrameProcessor): def __init__( self, agentArn: str, - aws_access_key: Optional[str] = None, - aws_secret_key: Optional[str] = None, - aws_session_token: Optional[str] = None, - aws_region: Optional[str] = None, - context_to_payload_transformer: Optional[Callable[[LLMContext], Optional[str]]] = None, - response_to_output_transformer: Optional[Callable[[str], Optional[str]]] = None, + aws_access_key: str | None = None, + aws_secret_key: str | None = None, + aws_session_token: str | None = None, + aws_region: str | None = None, + context_to_payload_transformer: Callable[[LLMContext], str | None] | None = None, + response_to_output_transformer: Callable[[str], str | None] | None = None, **kwargs, ): """Initialize the AWS AgentCore processor. @@ -157,8 +157,8 @@ class AWSAgentCoreProcessor(FrameProcessor): # State for managing output response bookends self._output_response_open = False - self._last_text_frame_time: Optional[float] = None - self._close_task: Optional[asyncio.Task] = None + self._last_text_frame_time: float | None = None + self._close_task: asyncio.Task | None = None self._output_response_timeout = 1.0 # seconds async def _close_output_response_after_timeout(self): diff --git a/src/pipecat/services/aws/llm.py b/src/pipecat/services/aws/llm.py index ef726e39b..b146bd11a 100644 --- a/src/pipecat/services/aws/llm.py +++ b/src/pipecat/services/aws/llm.py @@ -16,7 +16,7 @@ import json import os import re from dataclasses import dataclass, field -from typing import Any, Dict, List, Optional +from typing import Any from loguru import logger from pydantic import BaseModel, Field @@ -66,10 +66,10 @@ class AWSBedrockLLMSettings(LLMSettings): additional_model_request_fields: Additional model-specific parameters. """ - stop_sequences: List[str] | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + stop_sequences: list[str] | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) latency: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) enable_prompt_caching: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - additional_model_request_fields: Dict[str, Any] | _NotGiven = field( + additional_model_request_fields: dict[str, Any] | _NotGiven = field( default_factory=lambda: NOT_GIVEN ) @@ -104,27 +104,27 @@ class AWSBedrockLLMService(LLMService): additional_model_request_fields: Additional model-specific parameters. """ - max_tokens: Optional[int] = Field(default=None, ge=1) - temperature: Optional[float] = Field(default=None, ge=0.0, le=1.0) - top_p: Optional[float] = Field(default=None, ge=0.0, le=1.0) - stop_sequences: Optional[List[str]] = Field(default_factory=lambda: []) - latency: Optional[str] = Field(default=None) - additional_model_request_fields: Optional[Dict[str, Any]] = Field(default_factory=dict) + max_tokens: int | None = Field(default=None, ge=1) + temperature: float | None = Field(default=None, ge=0.0, le=1.0) + top_p: float | None = Field(default=None, ge=0.0, le=1.0) + stop_sequences: list[str] | None = Field(default_factory=lambda: []) + latency: str | None = Field(default=None) + additional_model_request_fields: dict[str, Any] | None = Field(default_factory=dict) def __init__( self, *, - model: Optional[str] = None, - aws_access_key: Optional[str] = None, - aws_secret_key: Optional[str] = None, - aws_session_token: Optional[str] = None, - aws_region: Optional[str] = None, - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, - stop_sequences: Optional[List[str]] = None, - client_config: Optional[Config] = None, - retry_timeout_secs: Optional[float] = 5.0, - retry_on_timeout: Optional[bool] = False, + model: str | None = None, + aws_access_key: str | None = None, + aws_secret_key: str | None = None, + aws_session_token: str | None = None, + aws_region: str | None = None, + params: InputParams | None = None, + settings: Settings | None = None, + stop_sequences: list[str] | None = None, + client_config: Config | None = None, + retry_timeout_secs: float | None = 5.0, + retry_on_timeout: bool | None = False, **kwargs, ): """Initialize the AWS Bedrock LLM service. @@ -239,7 +239,7 @@ class AWSBedrockLLMService(LLMService): """ return True - def _build_inference_config(self) -> Dict[str, Any]: + def _build_inference_config(self) -> dict[str, Any]: """Build inference config with only the parameters that are set. This prevents conflicts with models (e.g., Claude Sonnet 4.5) that don't @@ -262,9 +262,9 @@ class AWSBedrockLLMService(LLMService): async def run_inference( self, context: LLMContext, - max_tokens: Optional[int] = None, - system_instruction: Optional[str] = None, - ) -> Optional[str]: + max_tokens: int | None = None, + system_instruction: str | None = None, + ) -> str | None: """Run a one-shot, out-of-band (i.e. out-of-pipeline) inference with the given LLM context. Args: @@ -344,7 +344,7 @@ class AWSBedrockLLMService(LLMService): client.converse_stream(**request_params), timeout=self._retry_timeout_secs ) return response - except (ReadTimeoutError, asyncio.TimeoutError) as e: + except (TimeoutError, ReadTimeoutError) as e: # Retry, this time without a timeout so we get a response logger.debug(f"{self}: Retrying converse_stream due to timeout") response = await client.converse_stream(**request_params) @@ -553,7 +553,7 @@ class AWSBedrockLLMService(LLMService): # also get cancelled. use_completion_tokens_estimate = True raise - except (ReadTimeoutError, asyncio.TimeoutError): + except (TimeoutError, ReadTimeoutError): await self._call_event_handler("on_completion_timeout") except Exception as e: await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e) diff --git a/src/pipecat/services/aws/nova_sonic/llm.py b/src/pipecat/services/aws/nova_sonic/llm.py index fd35375c2..a99de2d8a 100644 --- a/src/pipecat/services/aws/nova_sonic/llm.py +++ b/src/pipecat/services/aws/nova_sonic/llm.py @@ -19,7 +19,7 @@ import wave from dataclasses import dataclass, field from enum import Enum from importlib.resources import files -from typing import Any, List, Optional +from typing import Any from loguru import logger from pydantic import BaseModel, Field @@ -163,22 +163,22 @@ class Params(BaseModel): """ # Audio input - input_sample_rate: Optional[int] = Field(default=16000) - input_sample_size: Optional[int] = Field(default=16) - input_channel_count: Optional[int] = Field(default=1) + input_sample_rate: int | None = Field(default=16000) + input_sample_size: int | None = Field(default=16) + input_channel_count: int | None = Field(default=1) # Audio output - output_sample_rate: Optional[int] = Field(default=24000) - output_sample_size: Optional[int] = Field(default=16) - output_channel_count: Optional[int] = Field(default=1) + output_sample_rate: int | None = Field(default=24000) + output_sample_size: int | None = Field(default=16) + output_channel_count: int | None = Field(default=1) # Inference - max_tokens: Optional[int] = Field(default=1024) - top_p: Optional[float] = Field(default=0.9) - temperature: Optional[float] = Field(default=0.7) + max_tokens: int | None = Field(default=1024) + top_p: float | None = Field(default=0.9) + temperature: float | None = Field(default=0.7) # Turn-taking - endpointing_sensitivity: Optional[str] = Field(default=None) + endpointing_sensitivity: str | None = Field(default=None) @property def audio_config(self) -> "AudioConfig": @@ -206,14 +206,14 @@ class AudioConfig(BaseModel): """ # Input - input_sample_rate: Optional[int] = Field(default=16000) - input_sample_size: Optional[int] = Field(default=16) - input_channel_count: Optional[int] = Field(default=1) + input_sample_rate: int | None = Field(default=16000) + input_sample_size: int | None = Field(default=16) + input_channel_count: int | None = Field(default=1) # Output - output_sample_rate: Optional[int] = Field(default=24000) - output_sample_size: Optional[int] = Field(default=16) - output_channel_count: Optional[int] = Field(default=1) + output_sample_rate: int | None = Field(default=24000) + output_sample_size: int | None = Field(default=16) + output_channel_count: int | None = Field(default=1) @dataclass @@ -248,15 +248,15 @@ class AWSNovaSonicLLMService(LLMService): *, secret_access_key: str, access_key_id: str, - session_token: Optional[str] = None, + session_token: str | None = None, region: str, model: str = "amazon.nova-2-sonic-v1:0", voice_id: str = "matthew", - params: Optional[Params] = None, - audio_config: Optional[AudioConfig] = None, - settings: Optional[Settings] = None, - system_instruction: Optional[str] = None, - tools: Optional[ToolsSchema] = None, + params: Params | None = None, + audio_config: AudioConfig | None = None, + settings: Settings | None = None, + system_instruction: str | None = None, + tools: ToolsSchema | None = None, **kwargs, ): """Initializes the AWS Nova Sonic LLM service. @@ -363,7 +363,7 @@ class AWSNovaSonicLLMService(LLMService): self._access_key_id = access_key_id self._session_token = session_token self._region = region - self._client: Optional[BedrockRuntimeClient] = None + self._client: BedrockRuntimeClient | None = None # Audio I/O config (hardware settings, not runtime-tunable) # Priority: audio_config > params (deprecated) > defaults @@ -383,29 +383,30 @@ class AWSNovaSonicLLMService(LLMService): ) self._settings.endpointing_sensitivity = None - self._context: Optional[LLMContext] = None - self._stream: Optional[ + self._context: LLMContext | None = None + self._stream: ( DuplexEventStream[ InvokeModelWithBidirectionalStreamInput, InvokeModelWithBidirectionalStreamOutput, InvokeModelWithBidirectionalStreamOperationOutput, ] - ] = None - self._receive_task: Optional[asyncio.Task] = None - self._prompt_name: Optional[str] = None - self._input_audio_content_name: Optional[str] = None - self._content_being_received: Optional[CurrentContent] = None + | None + ) = None + self._receive_task: asyncio.Task | None = None + self._prompt_name: str | None = None + self._input_audio_content_name: str | None = None + self._content_being_received: CurrentContent | None = None self._assistant_is_responding = False self._ready_to_send_context = False self._triggering_assistant_response = False self._waiting_for_trigger_transcription = False self._disconnecting = False - self._connected_time: Optional[float] = None + self._connected_time: float | None = None self._wants_connection = False self._user_text_buffer = "" self._completed_tool_calls = set() self._audio_input_started = False - self._pending_speculative_text: Optional[str] = None + self._pending_speculative_text: str | None = None file_path = files("pipecat.services.aws.nova_sonic").joinpath("ready.wav") with wave.open(file_path.open("rb"), "rb") as wav_file: @@ -762,7 +763,7 @@ class AWSNovaSonicLLMService(LLMService): """ await self._send_client_event(session_start) - async def _send_prompt_start_event(self, tools: List[Any]): + async def _send_prompt_start_event(self, tools: list[Any]): if not self._prompt_name: return diff --git a/src/pipecat/services/aws/sagemaker/bidi_client.py b/src/pipecat/services/aws/sagemaker/bidi_client.py index 10382acae..8d7bdeaa1 100644 --- a/src/pipecat/services/aws/sagemaker/bidi_client.py +++ b/src/pipecat/services/aws/sagemaker/bidi_client.py @@ -12,7 +12,6 @@ and JSON data to SageMaker model endpoints and receiving streaming responses. """ import os -from typing import Optional from loguru import logger @@ -80,10 +79,10 @@ class SageMakerBidiClient: self.model_invocation_path = model_invocation_path self.model_query_string = model_query_string self.bidi_endpoint = f"https://runtime.sagemaker.{region}.amazonaws.com:8443" - self._client: Optional[SageMakerRuntimeHTTP2Client] = None - self._stream: Optional[ - DuplexEventStream[RequestStreamEventPayloadPart, ResponseStreamEvent, any] - ] = None + self._client: SageMakerRuntimeHTTP2Client | None = None + self._stream: ( + DuplexEventStream[RequestStreamEventPayloadPart, ResponseStreamEvent, any] | None + ) = None self._output_stream = None self._is_active = False @@ -161,7 +160,7 @@ class SageMakerBidiClient: self._is_active = False raise RuntimeError(f"Failed to start SageMaker BiDi session: {e}") - async def send_data(self, data_bytes: bytes, data_type: Optional[str] = None): + async def send_data(self, data_bytes: bytes, data_type: str | None = None): """Send a chunk of data to the stream. Generic method for sending any type of data to the SageMaker endpoint. @@ -232,7 +231,7 @@ class SageMakerBidiClient: await self.send_data(json.dumps(data).encode("utf-8"), data_type="UTF8") - async def receive_response(self) -> Optional[ResponseStreamEvent]: + async def receive_response(self) -> ResponseStreamEvent | None: """Receive a response from the stream. Blocks until a response is available from the SageMaker endpoint. Returns diff --git a/src/pipecat/services/aws/stt.py b/src/pipecat/services/aws/stt.py index ace05090d..2c791bc97 100644 --- a/src/pipecat/services/aws/stt.py +++ b/src/pipecat/services/aws/stt.py @@ -14,8 +14,9 @@ import json import os import random import string +from collections.abc import AsyncGenerator from dataclasses import dataclass -from typing import Any, AsyncGenerator, Optional +from typing import Any from loguru import logger @@ -66,14 +67,14 @@ class AWSTranscribeSTTService(WebsocketSTTService): def __init__( self, *, - api_key: Optional[str] = None, - aws_access_key_id: Optional[str] = None, - aws_session_token: Optional[str] = None, - region: Optional[str] = None, - sample_rate: Optional[int] = None, - language: Optional[Language] = None, - settings: Optional[Settings] = None, - ttfs_p99_latency: Optional[float] = AWS_TRANSCRIBE_TTFS_P99, + api_key: str | None = None, + aws_access_key_id: str | None = None, + aws_session_token: str | None = None, + region: str | None = None, + sample_rate: int | None = None, + language: Language | None = None, + settings: Settings | None = None, + ttfs_p99_latency: float | None = AWS_TRANSCRIBE_TTFS_P99, **kwargs, ): """Initialize the AWS Transcribe STT service. @@ -496,7 +497,7 @@ class AWSTranscribeSTTService(WebsocketSTTService): @traced_stt async def _handle_transcription( - self, transcript: str, is_final: bool, language: Optional[str] = None + self, transcript: str, is_final: bool, language: str | None = None ): pass diff --git a/src/pipecat/services/aws/tts.py b/src/pipecat/services/aws/tts.py index f46539b9b..93919c860 100644 --- a/src/pipecat/services/aws/tts.py +++ b/src/pipecat/services/aws/tts.py @@ -11,8 +11,8 @@ supporting multiple languages, voices, and SSML features. """ import os +from collections.abc import AsyncGenerator from dataclasses import dataclass, field -from typing import AsyncGenerator, List, Optional from loguru import logger from pydantic import BaseModel @@ -37,7 +37,7 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") -def language_to_aws_language(language: Language) -> Optional[str]: +def language_to_aws_language(language: Language) -> str | None: """Convert a Language enum to AWS Polly language code. Args: @@ -137,7 +137,7 @@ class AWSPollyTTSSettings(TTSSettings): pitch: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) rate: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) volume: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - lexicon_names: List[str] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + lexicon_names: list[str] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class AWSPollyTTSService(TTSService): @@ -166,24 +166,24 @@ class AWSPollyTTSService(TTSService): lexicon_names: List of pronunciation lexicons to apply. """ - engine: Optional[str] = None - language: Optional[Language] = Language.EN - pitch: Optional[str] = None - rate: Optional[str] = None - volume: Optional[str] = None - lexicon_names: Optional[List[str]] = None + engine: str | None = None + language: Language | None = Language.EN + pitch: str | None = None + rate: str | None = None + volume: str | None = None + lexicon_names: list[str] | None = None def __init__( self, *, - api_key: Optional[str] = None, - aws_access_key_id: Optional[str] = None, - aws_session_token: Optional[str] = None, - region: Optional[str] = None, - voice_id: Optional[str] = None, - sample_rate: Optional[int] = None, - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, + api_key: str | None = None, + aws_access_key_id: str | None = None, + aws_session_token: str | None = None, + region: str | None = None, + voice_id: str | None = None, + sample_rate: int | None = None, + params: InputParams | None = None, + settings: Settings | None = None, **kwargs, ): """Initializes the AWS Polly TTS service. @@ -268,7 +268,7 @@ class AWSPollyTTSService(TTSService): """ return True - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: """Convert a Language enum to AWS Polly language format. Args: diff --git a/src/pipecat/services/aws/utils.py b/src/pipecat/services/aws/utils.py index 4a4fc6db5..2b69cf035 100644 --- a/src/pipecat/services/aws/utils.py +++ b/src/pipecat/services/aws/utils.py @@ -17,21 +17,20 @@ import hmac import json import struct import urllib.parse -from typing import Dict, Optional def get_presigned_url( *, region: str, - credentials: Dict[str, Optional[str]], + credentials: dict[str, str | None], language_code: str, media_encoding: str = "pcm", sample_rate: int = 16000, number_of_channels: int = 1, enable_partial_results_stabilization: bool = True, partial_results_stability: str = "high", - vocabulary_name: Optional[str] = None, - vocabulary_filter_name: Optional[str] = None, + vocabulary_name: str | None = None, + vocabulary_filter_name: str | None = None, show_speaker_label: bool = False, enable_channel_identification: bool = False, ) -> str: @@ -199,7 +198,7 @@ class AWSTranscribePresignedURL: self.canonical_querystring += "&vocabulary-name=" + vocabulary_name # Create payload hash - self.payload_hash = hashlib.sha256("".encode("utf-8")).hexdigest() + self.payload_hash = hashlib.sha256(b"").hexdigest() # Create canonical request self.canonical_request = f"{self.method}\n{self.canonical_uri}\n{self.canonical_querystring}\n{self.canonical_headers}\n{self.signed_headers}\n{self.payload_hash}" @@ -213,7 +212,7 @@ class AWSTranscribePresignedURL: # Calculate signature k_date = hmac.new( - f"AWS4{self.secret_key}".encode("utf-8"), self.datestamp.encode("utf-8"), hashlib.sha256 + f"AWS4{self.secret_key}".encode(), self.datestamp.encode("utf-8"), hashlib.sha256 ).digest() k_region = hmac.new(k_date, self.region.encode("utf-8"), hashlib.sha256).digest() k_service = hmac.new(k_region, self.service.encode("utf-8"), hashlib.sha256).digest() diff --git a/src/pipecat/services/azure/common.py b/src/pipecat/services/azure/common.py index dc7aaa359..8bb48cd04 100644 --- a/src/pipecat/services/azure/common.py +++ b/src/pipecat/services/azure/common.py @@ -6,12 +6,10 @@ """Language conversion utilities for Azure services.""" -from typing import Optional - from pipecat.transcriptions.language import Language, resolve_language -def language_to_azure_language(language: Language) -> Optional[str]: +def language_to_azure_language(language: Language) -> str | None: """Convert a Language enum to Azure language code. Args: diff --git a/src/pipecat/services/azure/image.py b/src/pipecat/services/azure/image.py index fc50d710a..b7fa732a3 100644 --- a/src/pipecat/services/azure/image.py +++ b/src/pipecat/services/azure/image.py @@ -12,8 +12,8 @@ using REST endpoints for creating images from text prompts. import asyncio import io +from collections.abc import AsyncGenerator from dataclasses import dataclass, field -from typing import AsyncGenerator, Optional import aiohttp from PIL import Image @@ -49,13 +49,13 @@ class AzureImageGenServiceREST(ImageGenService): def __init__( self, *, - image_size: Optional[str] = None, + image_size: str | None = None, api_key: str, endpoint: str, - model: Optional[str] = None, + model: str | None = None, aiohttp_session: aiohttp.ClientSession, api_version="2023-06-01-preview", - settings: Optional[Settings] = None, + settings: Settings | None = None, ): """Initialize the AzureImageGenServiceREST. diff --git a/src/pipecat/services/azure/llm.py b/src/pipecat/services/azure/llm.py index 8b5050e5b..f542dfe86 100644 --- a/src/pipecat/services/azure/llm.py +++ b/src/pipecat/services/azure/llm.py @@ -7,7 +7,6 @@ """Azure OpenAI service implementation for the Pipecat AI framework.""" from dataclasses import dataclass -from typing import Optional from loguru import logger from openai import AsyncAzureOpenAI @@ -37,9 +36,9 @@ class AzureLLMService(OpenAILLMService): *, api_key: str, endpoint: str, - model: Optional[str] = None, + model: str | None = None, api_version: str = "2024-09-01-preview", - settings: Optional[Settings] = None, + settings: Settings | None = None, **kwargs, ): """Initialize the Azure LLM service. diff --git a/src/pipecat/services/azure/stt.py b/src/pipecat/services/azure/stt.py index 57306e06a..9b2247793 100644 --- a/src/pipecat/services/azure/stt.py +++ b/src/pipecat/services/azure/stt.py @@ -11,8 +11,9 @@ Speech SDK for real-time audio transcription. """ import asyncio +from collections.abc import AsyncGenerator from dataclasses import dataclass -from typing import Any, AsyncGenerator, Optional +from typing import Any from loguru import logger @@ -73,13 +74,13 @@ class AzureSTTService(STTService): self, *, api_key: str, - region: Optional[str] = None, - language: Optional[Language] = Language.EN_US, - sample_rate: Optional[int] = None, - private_endpoint: Optional[str] = None, - endpoint_id: Optional[str] = None, - settings: Optional[Settings] = None, - ttfs_p99_latency: Optional[float] = AZURE_TTFS_P99, + region: str | None = None, + language: Language | None = Language.EN_US, + sample_rate: int | None = None, + private_endpoint: str | None = None, + endpoint_id: str | None = None, + settings: Settings | None = None, + ttfs_p99_latency: float | None = AZURE_TTFS_P99, **kwargs, ): """Initialize the Azure STT service. @@ -165,7 +166,7 @@ class AzureSTTService(STTService): """ return True - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: """Convert a Language enum to Azure service-specific language code. Args: @@ -272,7 +273,7 @@ class AzureSTTService(STTService): @traced_stt async def _handle_transcription( - self, transcript: str, is_final: bool, language: Optional[Language] = None + self, transcript: str, is_final: bool, language: Language | None = None ): """Handle a transcription result with tracing.""" await self.stop_processing_metrics() diff --git a/src/pipecat/services/azure/tts.py b/src/pipecat/services/azure/tts.py index 79dc8a2e1..dd54a1b41 100644 --- a/src/pipecat/services/azure/tts.py +++ b/src/pipecat/services/azure/tts.py @@ -7,8 +7,8 @@ """Azure Cognitive Services Text-to-Speech service implementations.""" import asyncio +from collections.abc import AsyncGenerator from dataclasses import dataclass, field -from typing import AsyncGenerator, Optional from loguru import logger from pydantic import BaseModel @@ -127,14 +127,14 @@ class AzureBaseTTSService: volume: Volume level (e.g., "+20%", "loud", "x-soft"). """ - emphasis: Optional[str] = None - language: Optional[Language] = Language.EN_US - pitch: Optional[str] = None - rate: Optional[str] = None - role: Optional[str] = None - style: Optional[str] = None - style_degree: Optional[str] = None - volume: Optional[str] = None + emphasis: str | None = None + language: Language | None = Language.EN_US + pitch: str | None = None + rate: str | None = None + role: str | None = None + style: str | None = None + style_degree: str | None = None + volume: str | None = None def _init_azure_base( self, @@ -154,7 +154,7 @@ class AzureBaseTTSService: self._region = region self._speech_synthesizer = None - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: """Convert a Language enum to Azure language format. Args: @@ -254,12 +254,12 @@ class AzureTTSService(TTSService, AzureBaseTTSService): *, api_key: str, region: str, - voice: Optional[str] = None, - sample_rate: Optional[int] = None, - params: Optional[AzureBaseTTSService.InputParams] = None, - settings: Optional[Settings] = None, - aggregate_sentences: Optional[bool] = None, - text_aggregation_mode: Optional[TextAggregationMode] = None, + voice: str | None = None, + sample_rate: int | None = None, + params: AzureBaseTTSService.InputParams | None = None, + settings: Settings | None = None, + aggregate_sentences: bool | None = None, + text_aggregation_mode: TextAggregationMode | None = None, **kwargs, ): """Initialize the Azure streaming TTS service. @@ -350,11 +350,9 @@ class AzureTTSService(TTSService, AzureBaseTTSService): self._current_sentence_max_word_offset: float = ( 0.0 # Max word boundary offset seen in current sentence (for 8kHz workaround) ) - self._last_word: Optional[str] = None # Track last word for punctuation merging - self._last_timestamp: Optional[float] = None # Track last timestamp - self._current_context_id: Optional[str] = ( - None # Track current context_id for word timestamps - ) + self._last_word: str | None = None # Track last word for punctuation merging + self._last_timestamp: float | None = None # Track last timestamp + self._current_context_id: str | None = None # Track current context_id for word timestamps def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -622,7 +620,7 @@ class AzureTTSService(TTSService, AzureBaseTTSService): self._last_timestamp = None self._current_context_id = None - async def flush_audio(self, context_id: Optional[str] = None): + async def flush_audio(self, context_id: str | None = None): """Flush any pending audio data.""" logger.trace(f"{self}: flushing audio") @@ -753,10 +751,10 @@ class AzureHttpTTSService(TTSService, AzureBaseTTSService): *, api_key: str, region: str, - voice: Optional[str] = None, - sample_rate: Optional[int] = None, - params: Optional[AzureBaseTTSService.InputParams] = None, - settings: Optional[Settings] = None, + voice: str | None = None, + sample_rate: int | None = None, + params: AzureBaseTTSService.InputParams | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize the Azure HTTP TTS service. diff --git a/src/pipecat/services/camb/tts.py b/src/pipecat/services/camb/tts.py index b6b83a928..f2bc094b9 100644 --- a/src/pipecat/services/camb/tts.py +++ b/src/pipecat/services/camb/tts.py @@ -16,8 +16,9 @@ Features: - Model-specific sample rates: mars-pro (48kHz), mars-flash (22.05kHz) """ +from collections.abc import AsyncGenerator from dataclasses import dataclass, field -from typing import Any, AsyncGenerator, Dict, Optional +from typing import Any from camb import StreamTtsOutputConfiguration from camb.client import AsyncCambAI @@ -36,14 +37,14 @@ from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts # Model-specific sample rates -MODEL_SAMPLE_RATES: Dict[str, int] = { +MODEL_SAMPLE_RATES: dict[str, int] = { "mars-flash": 22050, # 22.05kHz "mars-pro": 48000, # 48kHz "mars-instruct": 22050, # 22.05kHz } -def language_to_camb_language(language: Language) -> Optional[str]: +def language_to_camb_language(language: Language) -> str | None: """Convert a Pipecat Language enum to Camb.ai language code. Args: @@ -193,8 +194,8 @@ class CambTTSService(TTSService): Ignored for other models. Max 1000 characters. """ - language: Optional[Language] = Language.EN - user_instructions: Optional[str] = Field( + language: Language | None = Language.EN + user_instructions: str | None = Field( default=None, max_length=1000, description="Custom instructions for mars-instruct model only. " @@ -205,12 +206,12 @@ class CambTTSService(TTSService): self, *, api_key: str, - voice_id: Optional[int] = None, - model: Optional[str] = None, + voice_id: int | None = None, + model: str | None = None, timeout: float = 60.0, - sample_rate: Optional[int] = None, - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, + sample_rate: int | None = None, + params: InputParams | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize the Camb.ai TTS service. @@ -297,7 +298,7 @@ class CambTTSService(TTSService): """ return True - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: """Convert a Language enum to Camb.ai language format. Args: @@ -342,7 +343,7 @@ class CambTTSService(TTSService): try: # Build SDK parameters - tts_kwargs: Dict[str, Any] = { + tts_kwargs: dict[str, Any] = { "text": text, "voice_id": self._settings.voice, "language": self._settings.language, diff --git a/src/pipecat/services/cartesia/stt.py b/src/pipecat/services/cartesia/stt.py index 2606e0d6d..8e66eb965 100644 --- a/src/pipecat/services/cartesia/stt.py +++ b/src/pipecat/services/cartesia/stt.py @@ -12,8 +12,9 @@ the Cartesia Live transcription API for real-time speech recognition. import json import urllib.parse +from collections.abc import AsyncGenerator from dataclasses import dataclass -from typing import Any, AsyncGenerator, Optional +from typing import Any from loguru import logger @@ -155,10 +156,10 @@ class CartesiaSTTService(WebsocketSTTService): api_key: str, base_url: str = "", encoding: str = "pcm_s16le", - sample_rate: Optional[int] = None, - live_options: Optional[CartesiaLiveOptions] = None, - settings: Optional[Settings] = None, - ttfs_p99_latency: Optional[float] = CARTESIA_TTFS_P99, + sample_rate: int | None = None, + live_options: CartesiaLiveOptions | None = None, + settings: Settings | None = None, + ttfs_p99_latency: float | None = CARTESIA_TTFS_P99, **kwargs, ): """Initialize CartesiaSTTService with API key and options. @@ -389,7 +390,7 @@ class CartesiaSTTService(WebsocketSTTService): @traced_stt async def _handle_transcription( - self, transcript: str, is_final: bool, language: Optional[Language] = None + self, transcript: str, is_final: bool, language: Language | None = None ): """Handle a transcription result with tracing.""" pass diff --git a/src/pipecat/services/cartesia/tts.py b/src/pipecat/services/cartesia/tts.py index e8e033ee9..222939104 100644 --- a/src/pipecat/services/cartesia/tts.py +++ b/src/pipecat/services/cartesia/tts.py @@ -8,9 +8,10 @@ import base64 import json +from collections.abc import AsyncGenerator from dataclasses import dataclass, field -from enum import Enum -from typing import Any, AsyncGenerator, List, Optional +from enum import StrEnum +from typing import Any import aiohttp from loguru import logger @@ -56,12 +57,12 @@ class GenerationConfig(BaseModel): and Marian. """ - volume: Optional[float] = None - speed: Optional[float] = None - emotion: Optional[str] = None + volume: float | None = None + speed: float | None = None + emotion: str | None = None -def language_to_cartesia_language(language: Language) -> Optional[str]: +def language_to_cartesia_language(language: Language) -> str | None: """Convert a Language enum to Cartesia language code. Args: @@ -118,7 +119,7 @@ def language_to_cartesia_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=True) -class CartesiaEmotion(str, Enum): +class CartesiaEmotion(StrEnum): """Predefined Emotions supported by Cartesia.""" # Primary emotions supported by Cartesia @@ -222,25 +223,25 @@ class CartesiaTTSService(WebsocketTTSService): pronunciation_dict_id: The ID of the pronunciation dictionary to use for custom pronunciations. """ - language: Optional[Language] = Language.EN - generation_config: Optional[GenerationConfig] = None - pronunciation_dict_id: Optional[str] = None + language: Language | None = Language.EN + generation_config: GenerationConfig | None = None + pronunciation_dict_id: str | None = None def __init__( self, *, api_key: str, - voice_id: Optional[str] = None, + voice_id: str | None = None, cartesia_version: str = "2025-04-16", url: str = "wss://api.cartesia.ai/tts/websocket", - model: Optional[str] = None, - sample_rate: Optional[int] = None, + model: str | None = None, + sample_rate: int | None = None, encoding: str = "pcm_s16le", container: str = "raw", - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, - text_aggregation_mode: Optional[TextAggregationMode] = None, - aggregate_sentences: Optional[bool] = None, + params: InputParams | None = None, + settings: Settings | None = None, + text_aggregation_mode: TextAggregationMode | None = None, + aggregate_sentences: bool | None = None, **kwargs, ): """Initialize the Cartesia TTS service. @@ -362,7 +363,7 @@ class CartesiaTTSService(WebsocketTTSService): """ return True - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: """Convert a Language enum to Cartesia language format. Args: @@ -408,8 +409,8 @@ class CartesiaTTSService(WebsocketTTSService): return base_lang in cjk_languages def _process_word_timestamps_for_language( - self, words: List[str], starts: List[float] - ) -> List[tuple[str, float]]: + self, words: list[str], starts: list[float] + ) -> list[tuple[str, float]]: """Process word timestamps based on the current language. For CJK languages, Cartesia groups related characters in the same timestamp message. @@ -576,7 +577,7 @@ class CartesiaTTSService(WebsocketTTSService): """ await super().on_audio_context_completed(context_id) - async def flush_audio(self, context_id: Optional[str] = None): + async def flush_audio(self, context_id: str | None = None): """Flush any pending audio and finalize the current context. Args: @@ -715,24 +716,24 @@ class CartesiaHttpTTSService(TTSService): pronunciation_dict_id: The ID of the pronunciation dictionary to use for custom pronunciations. """ - language: Optional[Language] = Language.EN - generation_config: Optional[GenerationConfig] = None - pronunciation_dict_id: Optional[str] = None + language: Language | None = Language.EN + generation_config: GenerationConfig | None = None + pronunciation_dict_id: str | None = None def __init__( self, *, api_key: str, - voice_id: Optional[str] = None, - model: Optional[str] = None, + voice_id: str | None = None, + model: str | None = None, base_url: str = "https://api.cartesia.ai", cartesia_version: str = "2026-03-01", - aiohttp_session: Optional[aiohttp.ClientSession] = None, - sample_rate: Optional[int] = None, + aiohttp_session: aiohttp.ClientSession | None = None, + sample_rate: int | None = None, encoding: str = "pcm_s16le", container: str = "raw", - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, + params: InputParams | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize the Cartesia HTTP TTS service. @@ -825,7 +826,7 @@ class CartesiaHttpTTSService(TTSService): """ return True - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: """Convert a Language enum to Cartesia language format. Args: diff --git a/src/pipecat/services/cerebras/llm.py b/src/pipecat/services/cerebras/llm.py index 5b883ecd3..0476cc120 100644 --- a/src/pipecat/services/cerebras/llm.py +++ b/src/pipecat/services/cerebras/llm.py @@ -7,7 +7,6 @@ """Cerebras LLM service implementation using OpenAI-compatible interface.""" from dataclasses import dataclass -from typing import Optional from loguru import logger @@ -42,8 +41,8 @@ class CerebrasLLMService(OpenAILLMService): *, api_key: str, base_url: str = "https://api.cerebras.ai/v1", - model: Optional[str] = None, - settings: Optional[Settings] = None, + model: str | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize the Cerebras LLM service. diff --git a/src/pipecat/services/deepgram/flux/base.py b/src/pipecat/services/deepgram/flux/base.py index cc58ad477..baefbd060 100644 --- a/src/pipecat/services/deepgram/flux/base.py +++ b/src/pipecat/services/deepgram/flux/base.py @@ -10,8 +10,8 @@ import asyncio import time from abc import abstractmethod from dataclasses import dataclass, field -from enum import Enum -from typing import Any, Dict, Optional +from enum import StrEnum +from typing import Any from urllib.parse import urlencode from loguru import logger @@ -32,7 +32,7 @@ from pipecat.utils.time import time_now_iso8601 from pipecat.utils.tracing.service_decorators import traced_stt -class FluxMessageType(str, Enum): +class FluxMessageType(StrEnum): """Deepgram Flux WebSocket message types. These are the top-level message types that can be received from the @@ -46,7 +46,7 @@ class FluxMessageType(str, Enum): CONFIGURE_FAILURE = "ConfigureFailure" -class FluxEventType(str, Enum): +class FluxEventType(StrEnum): """Deepgram Flux TurnInfo event types. These events are contained within TurnInfo messages and indicate @@ -99,8 +99,8 @@ class DeepgramFluxSTTBase(STTService): self, *, encoding: str = "linear16", - mip_opt_out: Optional[bool] = None, - tag: Optional[list] = None, + mip_opt_out: bool | None = None, + tag: list | None = None, should_interrupt: bool = True, settings: Settings, **kwargs, @@ -128,8 +128,8 @@ class DeepgramFluxSTTBase(STTService): self._connection_established_event = asyncio.Event() # Watchdog state — see _watchdog_task_handler for details - self._last_stt_time: Optional[float] = None - self._watchdog_task: Optional[asyncio.Task] = None + self._last_stt_time: float | None = None + self._watchdog_task: asyncio.Task | None = None self._user_is_speaking = False # Flux event handlers @@ -340,7 +340,7 @@ class DeepgramFluxSTTBase(STTService): @traced_stt async def _handle_transcription( - self, transcript: str, is_final: bool, language: Optional[Language] = None + self, transcript: str, is_final: bool, language: Language | None = None ): """Handle a transcription result with tracing.""" pass @@ -349,7 +349,7 @@ class DeepgramFluxSTTBase(STTService): # Message handling # ------------------------------------------------------------------ - def _validate_message(self, data: Dict[str, Any]) -> bool: + def _validate_message(self, data: dict[str, Any]) -> bool: """Validate basic message structure from Deepgram Flux. Ensures the received message has the expected structure before processing. @@ -370,7 +370,7 @@ class DeepgramFluxSTTBase(STTService): return True - async def _handle_message(self, data: Dict[str, Any]): + async def _handle_message(self, data: dict[str, Any]): """Handle a parsed message from Deepgram Flux. Routes messages to appropriate handlers based on their type. Validates @@ -416,7 +416,7 @@ class DeepgramFluxSTTBase(STTService): # Notify connection is established self._connection_established_event.set() - async def _handle_fatal_error(self, data: Dict[str, Any]): + async def _handle_fatal_error(self, data: dict[str, Any]): """Handle fatal error messages from Deepgram Flux. Fatal errors indicate unrecoverable issues with the connection or @@ -435,7 +435,7 @@ class DeepgramFluxSTTBase(STTService): # Error will be handled by the transport's receive loop error handler raise Exception(deepgram_error) - async def _handle_turn_info(self, data: Dict[str, Any]): + async def _handle_turn_info(self, data: dict[str, Any]): """Handle TurnInfo events from Deepgram Flux. TurnInfo messages contain various turn-based events that indicate @@ -504,7 +504,7 @@ class DeepgramFluxSTTBase(STTService): logger.trace(f"Received event TurnResumed: {event}") await self._call_event_handler("on_turn_resumed") - def _calculate_average_confidence(self, transcript_data) -> Optional[float]: + def _calculate_average_confidence(self, transcript_data) -> float | None: """Calculate the average confidence from transcript data. Return None if the data is missing or invalid. @@ -520,7 +520,7 @@ class DeepgramFluxSTTBase(STTService): return None return sum(confidences) / len(confidences) - async def _handle_end_of_turn(self, transcript: str, data: Dict[str, Any]): + async def _handle_end_of_turn(self, transcript: str, data: dict[str, Any]): """Handle EndOfTurn events from Deepgram Flux. EndOfTurn events are fired when Deepgram Flux determines that a speaking @@ -567,7 +567,7 @@ class DeepgramFluxSTTBase(STTService): await self.broadcast_frame(UserStoppedSpeakingFrame) await self._call_event_handler("on_end_of_turn", transcript) - async def _handle_eager_end_of_turn(self, transcript: str, data: Dict[str, Any]): + async def _handle_eager_end_of_turn(self, transcript: str, data: dict[str, Any]): """Handle EagerEndOfTurn events from Deepgram Flux. EagerEndOfTurn events are fired when the end-of-turn confidence reaches the diff --git a/src/pipecat/services/deepgram/flux/sagemaker/stt.py b/src/pipecat/services/deepgram/flux/sagemaker/stt.py index ffff24d04..da61b169a 100644 --- a/src/pipecat/services/deepgram/flux/sagemaker/stt.py +++ b/src/pipecat/services/deepgram/flux/sagemaker/stt.py @@ -9,8 +9,8 @@ import asyncio import json import time +from collections.abc import AsyncGenerator from dataclasses import dataclass -from typing import AsyncGenerator, Optional from loguru import logger @@ -86,11 +86,11 @@ class DeepgramFluxSageMakerSTTService(DeepgramFluxSTTBase): endpoint_name: str, region: str, encoding: str = "linear16", - sample_rate: Optional[int] = None, - mip_opt_out: Optional[bool] = None, - tag: Optional[list] = None, + sample_rate: int | None = None, + mip_opt_out: bool | None = None, + tag: list | None = None, should_interrupt: bool = True, - settings: Optional[Settings] = None, + settings: Settings | None = None, **kwargs, ): """Initialize the Deepgram Flux SageMaker STT service. @@ -137,8 +137,8 @@ class DeepgramFluxSageMakerSTTService(DeepgramFluxSTTBase): self._endpoint_name = endpoint_name self._region = region - self._client: Optional[SageMakerBidiClient] = None - self._response_task: Optional[asyncio.Task] = None + self._client: SageMakerBidiClient | None = None + self._response_task: asyncio.Task | None = None # ------------------------------------------------------------------ # Transport interface implementation diff --git a/src/pipecat/services/deepgram/flux/stt.py b/src/pipecat/services/deepgram/flux/stt.py index 32854f294..5b0b16472 100644 --- a/src/pipecat/services/deepgram/flux/stt.py +++ b/src/pipecat/services/deepgram/flux/stt.py @@ -8,7 +8,7 @@ import json import time -from typing import AsyncGenerator, Optional +from collections.abc import AsyncGenerator from loguru import logger from pydantic import BaseModel @@ -90,27 +90,27 @@ class DeepgramFluxSTTService(DeepgramFluxSTTBase, WebsocketService): min_confidence: Optional. Minimum confidence required confidence to create a TranscriptionFrame """ - eager_eot_threshold: Optional[float] = None - eot_threshold: Optional[float] = None - eot_timeout_ms: Optional[int] = None + eager_eot_threshold: float | None = None + eot_threshold: float | None = None + eot_timeout_ms: int | None = None keyterm: list = [] - mip_opt_out: Optional[bool] = None + mip_opt_out: bool | None = None tag: list = [] - min_confidence: Optional[float] = None # New parameter + min_confidence: float | None = None # New parameter def __init__( self, *, api_key: str, url: str = "wss://api.deepgram.com/v2/listen", - sample_rate: Optional[int] = None, - mip_opt_out: Optional[bool] = None, - model: Optional[str] = None, + sample_rate: int | None = None, + mip_opt_out: bool | None = None, + model: str | None = None, flux_encoding: str = "linear16", - tag: Optional[list] = None, - params: Optional[InputParams] = None, + tag: list | None = None, + params: InputParams | None = None, should_interrupt: bool = True, - settings: Optional[Settings] = None, + settings: Settings | None = None, **kwargs, ): """Initialize the Deepgram Flux STT service. diff --git a/src/pipecat/services/deepgram/sagemaker/stt.py b/src/pipecat/services/deepgram/sagemaker/stt.py index 1087b124f..c837dc30b 100644 --- a/src/pipecat/services/deepgram/sagemaker/stt.py +++ b/src/pipecat/services/deepgram/sagemaker/stt.py @@ -14,8 +14,9 @@ languages, and various Deepgram features. import asyncio import json +from collections.abc import AsyncGenerator from dataclasses import dataclass, fields -from typing import Any, AsyncGenerator, Optional +from typing import Any from loguru import logger @@ -89,11 +90,11 @@ class DeepgramSageMakerSTTService(STTService): encoding: str = "linear16", channels: int = 1, multichannel: bool = False, - sample_rate: Optional[int] = None, - mip_opt_out: Optional[bool] = None, - live_options: Optional[LiveOptions] = None, - settings: Optional[Settings] = None, - ttfs_p99_latency: Optional[float] = DEEPGRAM_SAGEMAKER_TTFS_P99, + sample_rate: int | None = None, + mip_opt_out: bool | None = None, + live_options: LiveOptions | None = None, + settings: Settings | None = None, + ttfs_p99_latency: float | None = DEEPGRAM_SAGEMAKER_TTFS_P99, **kwargs, ): """Initialize the Deepgram SageMaker STT service. @@ -196,9 +197,9 @@ class DeepgramSageMakerSTTService(STTService): self._multichannel = multichannel self._mip_opt_out = mip_opt_out - self._client: Optional[SageMakerBidiClient] = None - self._response_task: Optional[asyncio.Task] = None - self._keepalive_task: Optional[asyncio.Task] = None + self._client: SageMakerBidiClient | None = None + self._response_task: asyncio.Task | None = None + self._keepalive_task: asyncio.Task | None = None def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -484,7 +485,7 @@ class DeepgramSageMakerSTTService(STTService): @traced_stt async def _handle_transcription( - self, transcript: str, is_final: bool, language: Optional[Language] = None + self, transcript: str, is_final: bool, language: Language | None = None ): """Handle a transcription result with tracing. diff --git a/src/pipecat/services/deepgram/sagemaker/tts.py b/src/pipecat/services/deepgram/sagemaker/tts.py index 6f43475dc..5585992b1 100644 --- a/src/pipecat/services/deepgram/sagemaker/tts.py +++ b/src/pipecat/services/deepgram/sagemaker/tts.py @@ -14,8 +14,9 @@ streaming audio output. import asyncio import json +from collections.abc import AsyncGenerator from dataclasses import dataclass -from typing import Any, AsyncGenerator, Optional +from typing import Any from loguru import logger @@ -72,10 +73,10 @@ class DeepgramSageMakerTTSService(TTSService): *, endpoint_name: str, region: str, - voice: Optional[str] = None, - sample_rate: Optional[int] = None, + voice: str | None = None, + sample_rate: int | None = None, encoding: str = "linear16", - settings: Optional[Settings] = None, + settings: Settings | None = None, **kwargs, ): """Initialize the Deepgram SageMaker TTS service. @@ -122,8 +123,8 @@ class DeepgramSageMakerTTSService(TTSService): self._region = region self._encoding = encoding - self._client: Optional[SageMakerBidiClient] = None - self._response_task: Optional[asyncio.Task] = None + self._client: SageMakerBidiClient | None = None + self._response_task: asyncio.Task | None = None def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -311,7 +312,7 @@ class DeepgramSageMakerTTSService(TTSService): logger.error(f"{self} error sending Clear message: {e}") await super().on_audio_context_interrupted(context_id) - async def flush_audio(self, context_id: Optional[str] = None): + async def flush_audio(self, context_id: str | None = None): """Flush any pending audio synthesis by sending Flush command. This should be called when the LLM finishes a complete response to force diff --git a/src/pipecat/services/deepgram/stt.py b/src/pipecat/services/deepgram/stt.py index 3a7669707..66b1d70da 100644 --- a/src/pipecat/services/deepgram/stt.py +++ b/src/pipecat/services/deepgram/stt.py @@ -7,8 +7,9 @@ """Deepgram speech-to-text service implementation.""" import asyncio +from collections.abc import AsyncGenerator from dataclasses import dataclass, field, fields -from typing import Any, AsyncGenerator, Optional +from typing import Any from loguru import logger @@ -64,33 +65,33 @@ class LiveOptions: def __init__( self, *, - callback: Optional[str] = None, - callback_method: Optional[str] = None, - channels: Optional[int] = None, - detect_entities: Optional[bool] = None, - diarize: Optional[bool] = None, - dictation: Optional[bool] = None, - encoding: Optional[str] = None, - endpointing: Optional[Any] = None, - extra: Optional[Any] = None, - interim_results: Optional[bool] = None, - keyterm: Optional[Any] = None, - keywords: Optional[Any] = None, - language: Optional[str] = None, - mip_opt_out: Optional[bool] = None, - model: Optional[str] = None, - multichannel: Optional[bool] = None, - numerals: Optional[bool] = None, - profanity_filter: Optional[bool] = None, - punctuate: Optional[bool] = None, - redact: Optional[Any] = None, - replace: Optional[Any] = None, - sample_rate: Optional[int] = None, - search: Optional[Any] = None, - smart_format: Optional[bool] = None, - tag: Optional[Any] = None, - utterance_end_ms: Optional[int] = None, - version: Optional[str] = None, + callback: str | None = None, + callback_method: str | None = None, + channels: int | None = None, + detect_entities: bool | None = None, + diarize: bool | None = None, + dictation: bool | None = None, + encoding: str | None = None, + endpointing: Any | None = None, + extra: Any | None = None, + interim_results: bool | None = None, + keyterm: Any | None = None, + keywords: Any | None = None, + language: str | None = None, + mip_opt_out: bool | None = None, + model: str | None = None, + multichannel: bool | None = None, + numerals: bool | None = None, + profanity_filter: bool | None = None, + punctuate: bool | None = None, + redact: Any | None = None, + replace: Any | None = None, + sample_rate: int | None = None, + search: Any | None = None, + smart_format: bool | None = None, + tag: Any | None = None, + utterance_end_ms: int | None = None, + version: str | None = None, **kwargs, ): """Initialize live transcription options. @@ -298,15 +299,15 @@ class DeepgramSTTService(STTService): encoding: str = "linear16", channels: int = 1, multichannel: bool = False, - sample_rate: Optional[int] = None, - callback: Optional[str] = None, - callback_method: Optional[str] = None, - tag: Optional[Any] = None, - mip_opt_out: Optional[bool] = None, - live_options: Optional[LiveOptions] = None, - addons: Optional[dict] = None, - settings: Optional[Settings] = None, - ttfs_p99_latency: Optional[float] = DEEPGRAM_TTFS_P99, + sample_rate: int | None = None, + callback: str | None = None, + callback_method: str | None = None, + tag: Any | None = None, + mip_opt_out: bool | None = None, + live_options: LiveOptions | None = None, + addons: dict | None = None, + settings: Settings | None = None, + ttfs_p99_latency: float | None = DEEPGRAM_TTFS_P99, **kwargs, ): """Initialize the Deepgram STT service. @@ -668,7 +669,7 @@ class DeepgramSTTService(STTService): @traced_stt async def _handle_transcription( - self, transcript: str, is_final: bool, language: Optional[Language] = None + self, transcript: str, is_final: bool, language: Language | None = None ): """Handle a transcription result with tracing.""" pass diff --git a/src/pipecat/services/deepgram/tts.py b/src/pipecat/services/deepgram/tts.py index 3b5d04202..cc7b88455 100644 --- a/src/pipecat/services/deepgram/tts.py +++ b/src/pipecat/services/deepgram/tts.py @@ -11,8 +11,9 @@ for generating speech from text using various voice models. """ import json +from collections.abc import AsyncGenerator from dataclasses import dataclass -from typing import Any, AsyncGenerator, Optional +from typing import Any import aiohttp from loguru import logger @@ -65,11 +66,11 @@ class DeepgramTTSService(WebsocketTTSService): self, *, api_key: str, - voice: Optional[str] = None, + voice: str | None = None, base_url: str = "wss://api.deepgram.com", - sample_rate: Optional[int] = None, + sample_rate: int | None = None, encoding: str = "linear16", - settings: Optional[Settings] = None, + settings: Settings | None = None, **kwargs, ): """Initialize the Deepgram WebSocket TTS service. @@ -315,7 +316,7 @@ class DeepgramTTSService(WebsocketTTSService): except json.JSONDecodeError: logger.error(f"Invalid JSON message: {message}") - async def flush_audio(self, context_id: Optional[str] = None): + async def flush_audio(self, context_id: str | None = None): """Flush any pending audio synthesis by sending Flush command. This should be called when the LLM finishes a complete response to force @@ -374,12 +375,12 @@ class DeepgramHttpTTSService(TTSService): self, *, api_key: str, - voice: Optional[str] = None, + voice: str | None = None, aiohttp_session: aiohttp.ClientSession, base_url: str = "https://api.deepgram.com", - sample_rate: Optional[int] = None, + sample_rate: int | None = None, encoding: str = "linear16", - settings: Optional[Settings] = None, + settings: Settings | None = None, **kwargs, ): """Initialize the Deepgram TTS service. diff --git a/src/pipecat/services/deepseek/llm.py b/src/pipecat/services/deepseek/llm.py index 177a87e63..7485168f9 100644 --- a/src/pipecat/services/deepseek/llm.py +++ b/src/pipecat/services/deepseek/llm.py @@ -7,7 +7,6 @@ """DeepSeek LLM service implementation using OpenAI-compatible interface.""" from dataclasses import dataclass -from typing import Optional from loguru import logger @@ -42,8 +41,8 @@ class DeepSeekLLMService(OpenAILLMService): *, api_key: str, base_url: str = "https://api.deepseek.com/v1", - model: Optional[str] = None, - settings: Optional[Settings] = None, + model: str | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize the DeepSeek LLM service. diff --git a/src/pipecat/services/elevenlabs/stt.py b/src/pipecat/services/elevenlabs/stt.py index aa7fd0659..ac6c01876 100644 --- a/src/pipecat/services/elevenlabs/stt.py +++ b/src/pipecat/services/elevenlabs/stt.py @@ -15,9 +15,10 @@ import asyncio import base64 import io import json +from collections.abc import AsyncGenerator from dataclasses import dataclass, field -from enum import Enum -from typing import Any, AsyncGenerator, Optional +from enum import StrEnum +from typing import Any import aiohttp from loguru import logger @@ -53,7 +54,7 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") -def language_to_elevenlabs_language(language: Language) -> Optional[str]: +def language_to_elevenlabs_language(language: Language) -> str | None: """Convert a Language enum to ElevenLabs language code. Source: @@ -170,7 +171,7 @@ def language_to_elevenlabs_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=False) -class CommitStrategy(str, Enum): +class CommitStrategy(StrEnum): """Commit strategies for transcript segmentation.""" MANUAL = "manual" @@ -230,7 +231,7 @@ class ElevenLabsSTTService(SegmentedSTTService): tag_audio_events: Whether to include audio events like (laughter), (coughing), in the transcription. """ - language: Optional[Language] = None + language: Language | None = None tag_audio_events: bool = True def __init__( @@ -239,11 +240,11 @@ class ElevenLabsSTTService(SegmentedSTTService): api_key: str, aiohttp_session: aiohttp.ClientSession, base_url: str = "https://api.elevenlabs.io", - model: Optional[str] = None, - sample_rate: Optional[int] = None, - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, - ttfs_p99_latency: Optional[float] = ELEVENLABS_TTFS_P99, + model: str | None = None, + sample_rate: int | None = None, + params: InputParams | None = None, + settings: Settings | None = None, + ttfs_p99_latency: float | None = ELEVENLABS_TTFS_P99, **kwargs, ): """Initialize the ElevenLabs STT service. @@ -312,7 +313,7 @@ class ElevenLabsSTTService(SegmentedSTTService): """ return True - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: """Convert a Language enum to ElevenLabs service-specific language code. Args: @@ -364,7 +365,7 @@ class ElevenLabsSTTService(SegmentedSTTService): @traced_stt async def _handle_transcription( - self, transcript: str, is_final: bool, language: Optional[str] = None + self, transcript: str, is_final: bool, language: str | None = None ): """Handle a transcription result with tracing.""" await self.stop_processing_metrics() @@ -474,12 +475,12 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): include_language_detection: Whether to include language detection in transcripts. """ - language_code: Optional[str] = None + language_code: str | None = None commit_strategy: CommitStrategy = CommitStrategy.MANUAL - vad_silence_threshold_secs: Optional[float] = None - vad_threshold: Optional[float] = None - min_speech_duration_ms: Optional[int] = None - min_silence_duration_ms: Optional[int] = None + vad_silence_threshold_secs: float | None = None + vad_threshold: float | None = None + min_speech_duration_ms: int | None = None + min_silence_duration_ms: int | None = None include_timestamps: bool = False enable_logging: bool = False include_language_detection: bool = False @@ -490,14 +491,14 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): api_key: str, base_url: str = "api.elevenlabs.io", commit_strategy: CommitStrategy = CommitStrategy.MANUAL, - model: Optional[str] = None, - sample_rate: Optional[int] = None, + model: str | None = None, + sample_rate: int | None = None, include_timestamps: bool = False, enable_logging: bool = False, include_language_detection: bool = False, - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, - ttfs_p99_latency: Optional[float] = ELEVENLABS_REALTIME_TTFS_P99, + params: InputParams | None = None, + settings: Settings | None = None, + ttfs_p99_latency: float | None = ELEVENLABS_REALTIME_TTFS_P99, **kwargs, ): """Initialize the ElevenLabs Realtime STT service. @@ -908,7 +909,7 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): @traced_stt async def _handle_transcription( - self, transcript: str, is_final: bool, language: Optional[str] = None + self, transcript: str, is_final: bool, language: str | None = None ): """Handle a transcription result with tracing.""" pass diff --git a/src/pipecat/services/elevenlabs/tts.py b/src/pipecat/services/elevenlabs/tts.py index d2cb3786d..02e6383ff 100644 --- a/src/pipecat/services/elevenlabs/tts.py +++ b/src/pipecat/services/elevenlabs/tts.py @@ -13,17 +13,12 @@ with support for streaming audio, word timestamps, and voice customization. import asyncio import base64 import json +from collections.abc import AsyncGenerator, Mapping from dataclasses import dataclass, field from typing import ( Any, - AsyncGenerator, ClassVar, - Dict, - List, Literal, - Mapping, - Optional, - Tuple, Union, ) @@ -74,7 +69,7 @@ ELEVENLABS_MULTILINGUAL_MODELS = { } -def language_to_elevenlabs_language(language: Language) -> Optional[str]: +def language_to_elevenlabs_language(language: Language) -> str | None: """Convert a Language enum to ElevenLabs language code. Args: @@ -152,8 +147,8 @@ def output_format_from_sample_rate(sample_rate: int) -> str: def build_elevenlabs_voice_settings( - settings: Union[Dict[str, Any], "TTSSettings"], -) -> Optional[Dict[str, Union[float, bool]]]: + settings: Union[dict[str, Any], "TTSSettings"], +) -> dict[str, float | bool] | None: """Build voice settings dictionary for ElevenLabs based on provided settings. Args: @@ -255,7 +250,7 @@ def calculate_word_times( cumulative_time: float, partial_word: str = "", partial_word_start_time: float = 0.0, -) -> tuple[List[Tuple[str, float]], str, float]: +) -> tuple[list[tuple[str, float]], str, float]: """Calculate word timestamps from character alignment information. Args: @@ -341,34 +336,34 @@ class ElevenLabsTTSService(WebsocketTTSService): pronunciation_dictionary_locators: List of pronunciation dictionary locators to use. """ - language: Optional[Language] = None - stability: Optional[float] = None - similarity_boost: Optional[float] = None - style: Optional[float] = None - use_speaker_boost: Optional[bool] = None - speed: Optional[float] = None - auto_mode: Optional[bool] = True - enable_ssml_parsing: Optional[bool] = None - enable_logging: Optional[bool] = None - apply_text_normalization: Optional[Literal["auto", "on", "off"]] = None - pronunciation_dictionary_locators: Optional[List[PronunciationDictionaryLocator]] = None + language: Language | None = None + stability: float | None = None + similarity_boost: float | None = None + style: float | None = None + use_speaker_boost: bool | None = None + speed: float | None = None + auto_mode: bool | None = True + enable_ssml_parsing: bool | None = None + enable_logging: bool | None = None + apply_text_normalization: Literal["auto", "on", "off"] | None = None + pronunciation_dictionary_locators: list[PronunciationDictionaryLocator] | None = None def __init__( self, *, api_key: str, - voice_id: Optional[str] = None, - model: Optional[str] = None, + voice_id: str | None = None, + model: str | None = None, url: str = "wss://api.elevenlabs.io", - sample_rate: Optional[int] = None, - auto_mode: Optional[bool] = None, - enable_ssml_parsing: Optional[bool] = None, - enable_logging: Optional[bool] = None, - pronunciation_dictionary_locators: Optional[List[PronunciationDictionaryLocator]] = None, - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, - text_aggregation_mode: Optional[TextAggregationMode] = None, - aggregate_sentences: Optional[bool] = None, + sample_rate: int | None = None, + auto_mode: bool | None = None, + enable_ssml_parsing: bool | None = None, + enable_logging: bool | None = None, + pronunciation_dictionary_locators: list[PronunciationDictionaryLocator] | None = None, + params: InputParams | None = None, + settings: Settings | None = None, + text_aggregation_mode: TextAggregationMode | None = None, + aggregate_sentences: bool | None = None, **kwargs, ): """Initialize the ElevenLabs TTS service. @@ -534,7 +529,7 @@ class ElevenLabsTTSService(WebsocketTTSService): """ return True - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: """Convert a Language enum to ElevenLabs language format. Args: @@ -625,7 +620,7 @@ class ElevenLabsTTSService(WebsocketTTSService): await super().cancel(frame) await self._disconnect() - async def flush_audio(self, context_id: Optional[str] = None): + async def flush_audio(self, context_id: str | None = None): """Flush any pending audio and finalize the current context. Args: @@ -935,31 +930,31 @@ class ElevenLabsHttpTTSService(TTSService): pronunciation_dictionary_locators: List of pronunciation dictionary locators to use. """ - language: Optional[Language] = None - optimize_streaming_latency: Optional[int] = None - stability: Optional[float] = None - similarity_boost: Optional[float] = None - style: Optional[float] = None - use_speaker_boost: Optional[bool] = None - speed: Optional[float] = None - apply_text_normalization: Optional[Literal["auto", "on", "off"]] = None - pronunciation_dictionary_locators: Optional[List[PronunciationDictionaryLocator]] = None + language: Language | None = None + optimize_streaming_latency: int | None = None + stability: float | None = None + similarity_boost: float | None = None + style: float | None = None + use_speaker_boost: bool | None = None + speed: float | None = None + apply_text_normalization: Literal["auto", "on", "off"] | None = None + pronunciation_dictionary_locators: list[PronunciationDictionaryLocator] | None = None def __init__( self, *, api_key: str, - voice_id: Optional[str] = None, + voice_id: str | None = None, aiohttp_session: aiohttp.ClientSession, - model: Optional[str] = None, + model: str | None = None, base_url: str = "https://api.elevenlabs.io", - sample_rate: Optional[int] = None, - enable_logging: Optional[bool] = None, - pronunciation_dictionary_locators: Optional[List[PronunciationDictionaryLocator]] = None, - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, - text_aggregation_mode: Optional[TextAggregationMode] = None, - aggregate_sentences: Optional[bool] = None, + sample_rate: int | None = None, + enable_logging: bool | None = None, + pronunciation_dictionary_locators: list[PronunciationDictionaryLocator] | None = None, + params: InputParams | None = None, + settings: Settings | None = None, + text_aggregation_mode: TextAggregationMode | None = None, + aggregate_sentences: bool | None = None, **kwargs, ): """Initialize the ElevenLabs HTTP TTS service. @@ -1078,7 +1073,7 @@ class ElevenLabsHttpTTSService(TTSService): self._partial_word = "" self._partial_word_start_time = 0.0 - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: """Convert pipecat Language to ElevenLabs language code. Args: @@ -1147,7 +1142,7 @@ class ElevenLabsHttpTTSService(TTSService): # End of turn - reset previous text self._previous_text = "" - def calculate_word_times(self, alignment_info: Mapping[str, Any]) -> List[Tuple[str, float]]: + def calculate_word_times(self, alignment_info: Mapping[str, Any]) -> list[tuple[str, float]]: """Calculate word timing from character alignment data. This method handles partial words that may span across multiple alignment chunks. @@ -1228,7 +1223,7 @@ class ElevenLabsHttpTTSService(TTSService): # Use the with-timestamps endpoint url = f"{self._base_url}/v1/text-to-speech/{self._settings.voice}/stream/with-timestamps" - payload: Dict[str, Union[str, Dict[str, Union[float, bool]]]] = { + payload: dict[str, str | dict[str, float | bool]] = { "text": text, "model_id": self._settings.model, } diff --git a/src/pipecat/services/fal/image.py b/src/pipecat/services/fal/image.py index 31af55440..8a608de77 100644 --- a/src/pipecat/services/fal/image.py +++ b/src/pipecat/services/fal/image.py @@ -13,8 +13,9 @@ for creating images from text prompts using various AI models. import asyncio import io import os +from collections.abc import AsyncGenerator from dataclasses import dataclass, field -from typing import Any, AsyncGenerator, Dict, Optional, Union +from typing import Any import aiohttp from loguru import logger @@ -44,14 +45,14 @@ class FalImageGenSettings(ImageGenSettings): seed: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) num_inference_steps: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) num_images: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - image_size: str | Dict[str, int] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + image_size: str | dict[str, int] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) expand_prompt: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) enable_safety_checker: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) format: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - def to_api_arguments(self) -> Dict[str, Any]: + def to_api_arguments(self) -> dict[str, Any]: """Build the Fal API arguments dict from settings, excluding None values.""" - args: Dict[str, Any] = {} + args: dict[str, Any] = {} if self.seed is not None: args["seed"] = self.seed args["num_inference_steps"] = self.num_inference_steps @@ -89,10 +90,10 @@ class FalImageGenService(ImageGenService): format: Output image format. Defaults to "png". """ - seed: Optional[int] = None + seed: int | None = None num_inference_steps: int = 8 num_images: int = 1 - image_size: Union[str, Dict[str, int]] = "square_hd" + image_size: str | dict[str, int] = "square_hd" expand_prompt: bool = False enable_safety_checker: bool = True format: str = "png" @@ -102,11 +103,11 @@ class FalImageGenService(ImageGenService): def __init__( self, *, - params: Optional[InputParams] = None, + params: InputParams | None = None, aiohttp_session: aiohttp.ClientSession, - model: Optional[str] = None, - key: Optional[str] = None, - settings: Optional[Settings] = None, + model: str | None = None, + key: str | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize the FalImageGenService. diff --git a/src/pipecat/services/fal/stt.py b/src/pipecat/services/fal/stt.py index 65df7e3ab..1e18c7f84 100644 --- a/src/pipecat/services/fal/stt.py +++ b/src/pipecat/services/fal/stt.py @@ -12,8 +12,8 @@ transcription using segmented audio processing. import base64 import os +from collections.abc import AsyncGenerator from dataclasses import dataclass -from typing import AsyncGenerator, Optional import aiohttp from loguru import logger @@ -28,7 +28,7 @@ from pipecat.utils.time import time_now_iso8601 from pipecat.utils.tracing.service_decorators import traced_stt -def language_to_fal_language(language: Language) -> Optional[str]: +def language_to_fal_language(language: Language) -> str | None: """Convert a Language enum to Fal's Wizper language code. Args: @@ -171,7 +171,7 @@ class FalSTTService(SegmentedSTTService): version: Version of Wizper model to use. Defaults to '3'. """ - language: Optional[Language] = Language.EN + language: Language | None = Language.EN task: str = "transcribe" chunk_level: str = "segment" version: str = "3" @@ -179,15 +179,15 @@ class FalSTTService(SegmentedSTTService): def __init__( self, *, - api_key: Optional[str] = None, - aiohttp_session: Optional[aiohttp.ClientSession] = None, + api_key: str | None = None, + aiohttp_session: aiohttp.ClientSession | None = None, task: str = "transcribe", chunk_level: str = "segment", version: str = "3", - sample_rate: Optional[int] = None, - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, - ttfs_p99_latency: Optional[float] = FAL_TTFS_P99, + sample_rate: int | None = None, + params: InputParams | None = None, + settings: Settings | None = None, + ttfs_p99_latency: float | None = FAL_TTFS_P99, **kwargs, ): """Initialize the FalSTTService with API key and parameters. @@ -266,7 +266,7 @@ class FalSTTService(SegmentedSTTService): """ return True - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: """Convert a Language enum to Fal's service-specific language code. Args: @@ -279,7 +279,7 @@ class FalSTTService(SegmentedSTTService): @traced_stt async def _handle_transcription( - self, transcript: str, is_final: bool, language: Optional[str] = None + self, transcript: str, is_final: bool, language: str | None = None ): """Handle a transcription result with tracing.""" await self.stop_processing_metrics() diff --git a/src/pipecat/services/fireworks/llm.py b/src/pipecat/services/fireworks/llm.py index 7d2997987..c51daf0ea 100644 --- a/src/pipecat/services/fireworks/llm.py +++ b/src/pipecat/services/fireworks/llm.py @@ -7,7 +7,6 @@ """Fireworks AI service implementation using OpenAI-compatible interface.""" from dataclasses import dataclass -from typing import Optional from loguru import logger @@ -37,9 +36,9 @@ class FireworksLLMService(OpenAILLMService): self, *, api_key: str, - model: Optional[str] = None, + model: str | None = None, base_url: str = "https://api.fireworks.ai/inference/v1", - settings: Optional[Settings] = None, + settings: Settings | None = None, **kwargs, ): """Initialize the Fireworks LLM service. diff --git a/src/pipecat/services/fish/tts.py b/src/pipecat/services/fish/tts.py index f6738fbb8..5f4252fb2 100644 --- a/src/pipecat/services/fish/tts.py +++ b/src/pipecat/services/fish/tts.py @@ -10,8 +10,9 @@ This module provides integration with Fish Audio's real-time TTS WebSocket API for streaming text-to-speech synthesis with customizable voice parameters. """ +from collections.abc import AsyncGenerator, Mapping from dataclasses import dataclass, field -from typing import Any, AsyncGenerator, Literal, Mapping, Optional, Self +from typing import Any, Literal, Self from loguru import logger from pydantic import BaseModel @@ -99,22 +100,22 @@ class FishAudioTTSService(InterruptibleTTSService): prosody_volume: Volume adjustment in dB. Defaults to 0. """ - language: Optional[Language] = Language.EN - latency: Optional[str] = "normal" # "normal" or "balanced" - normalize: Optional[bool] = True - prosody_speed: Optional[float] = 1.0 # Speech speed (0.5-2.0) - prosody_volume: Optional[int] = 0 # Volume adjustment in dB + language: Language | None = Language.EN + latency: str | None = "normal" # "normal" or "balanced" + normalize: bool | None = True + prosody_speed: float | None = 1.0 # Speech speed (0.5-2.0) + prosody_volume: int | None = 0 # Volume adjustment in dB def __init__( self, *, api_key: str, - reference_id: Optional[str] = None, # This is the voice ID - model_id: Optional[str] = None, + reference_id: str | None = None, # This is the voice ID + model_id: str | None = None, output_format: FishAudioOutputFormat = "pcm", - sample_rate: Optional[int] = None, - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, + sample_rate: int | None = None, + params: InputParams | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize the Fish Audio TTS service. @@ -321,7 +322,7 @@ class FishAudioTTSService(InterruptibleTTSService): self._websocket = None await self._call_event_handler("on_disconnected") - async def flush_audio(self, context_id: Optional[str] = None): + async def flush_audio(self, context_id: str | None = None): """Flush any buffered audio by sending a flush event to Fish Audio.""" logger.trace(f"{self}: Flushing audio buffers") if not self._websocket or self._websocket.state is State.CLOSED: diff --git a/src/pipecat/services/gladia/config.py b/src/pipecat/services/gladia/config.py index 917309594..dd46927dc 100644 --- a/src/pipecat/services/gladia/config.py +++ b/src/pipecat/services/gladia/config.py @@ -6,7 +6,7 @@ """Configuration for the Gladia STT service.""" -from typing import Any, Dict, List, Optional, Union +from typing import Any from pydantic import BaseModel @@ -19,8 +19,8 @@ class LanguageConfig(BaseModel): code_switching: Whether to auto-detect language changes during transcription """ - languages: Optional[List[str]] = None - code_switching: Optional[bool] = None + languages: list[str] | None = None + code_switching: bool | None = None class PreProcessingConfig(BaseModel): @@ -31,8 +31,8 @@ class PreProcessingConfig(BaseModel): speech_threshold: Sensitivity for speech detection (0-1) """ - audio_enhancer: Optional[bool] = None - speech_threshold: Optional[float] = None + audio_enhancer: bool | None = None + speech_threshold: float | None = None class CustomVocabularyItem(BaseModel): @@ -47,8 +47,8 @@ class CustomVocabularyItem(BaseModel): value: str intensity: float - pronunciations: Optional[List[str]] = None - language: Optional[str] = None + pronunciations: list[str] | None = None + language: str | None = None class CustomVocabularyConfig(BaseModel): @@ -59,8 +59,8 @@ class CustomVocabularyConfig(BaseModel): default_intensity: Default intensity for simple string vocabulary items """ - vocabulary: Optional[List[Union[str, CustomVocabularyItem]]] = None - default_intensity: Optional[float] = None + vocabulary: list[str | CustomVocabularyItem] | None = None + default_intensity: float | None = None class CustomSpellingConfig(BaseModel): @@ -70,7 +70,7 @@ class CustomSpellingConfig(BaseModel): spelling_dictionary: Mapping of correct spellings to phonetic variations """ - spelling_dictionary: Optional[Dict[str, List[str]]] = None + spelling_dictionary: dict[str, list[str]] | None = None class TranslationConfig(BaseModel): @@ -86,13 +86,13 @@ class TranslationConfig(BaseModel): informal: Force informal language forms when available """ - target_languages: Optional[List[str]] = None - model: Optional[str] = None - match_original_utterances: Optional[bool] = None - lipsync: Optional[bool] = None - context_adaptation: Optional[bool] = None - context: Optional[str] = None - informal: Optional[bool] = None + target_languages: list[str] | None = None + model: str | None = None + match_original_utterances: bool | None = None + lipsync: bool | None = None + context_adaptation: bool | None = None + context: str | None = None + informal: bool | None = None class RealtimeProcessingConfig(BaseModel): @@ -110,15 +110,15 @@ class RealtimeProcessingConfig(BaseModel): sentiment_analysis: Whether to enable sentiment analysis """ - words_accurate_timestamps: Optional[bool] = None - custom_vocabulary: Optional[bool] = None - custom_vocabulary_config: Optional[CustomVocabularyConfig] = None - custom_spelling: Optional[bool] = None - custom_spelling_config: Optional[CustomSpellingConfig] = None - translation: Optional[bool] = None - translation_config: Optional[TranslationConfig] = None - named_entity_recognition: Optional[bool] = None - sentiment_analysis: Optional[bool] = None + words_accurate_timestamps: bool | None = None + custom_vocabulary: bool | None = None + custom_vocabulary_config: CustomVocabularyConfig | None = None + custom_spelling: bool | None = None + custom_spelling_config: CustomSpellingConfig | None = None + translation: bool | None = None + translation_config: TranslationConfig | None = None + named_entity_recognition: bool | None = None + sentiment_analysis: bool | None = None class MessagesConfig(BaseModel): @@ -136,15 +136,15 @@ class MessagesConfig(BaseModel): receive_lifecycle_events: Whether to receive lifecycle events """ - receive_partial_transcripts: Optional[bool] = None - receive_final_transcripts: Optional[bool] = None - receive_speech_events: Optional[bool] = None - receive_pre_processing_events: Optional[bool] = None - receive_realtime_processing_events: Optional[bool] = None - receive_post_processing_events: Optional[bool] = None - receive_acknowledgments: Optional[bool] = None - receive_errors: Optional[bool] = None - receive_lifecycle_events: Optional[bool] = None + receive_partial_transcripts: bool | None = None + receive_final_transcripts: bool | None = None + receive_speech_events: bool | None = None + receive_pre_processing_events: bool | None = None + receive_realtime_processing_events: bool | None = None + receive_post_processing_events: bool | None = None + receive_acknowledgments: bool | None = None + receive_errors: bool | None = None + receive_lifecycle_events: bool | None = None class GladiaInputParams(BaseModel): @@ -170,14 +170,14 @@ class GladiaInputParams(BaseModel): and stopped frames. Defaults to False. """ - encoding: Optional[str] = "wav/pcm" - bit_depth: Optional[int] = 16 - channels: Optional[int] = 1 - custom_metadata: Optional[Dict[str, Any]] = None - endpointing: Optional[float] = None - maximum_duration_without_endpointing: Optional[int] = 5 - language_config: Optional[LanguageConfig] = None - pre_processing: Optional[PreProcessingConfig] = None - realtime_processing: Optional[RealtimeProcessingConfig] = None - messages_config: Optional[MessagesConfig] = None + encoding: str | None = "wav/pcm" + bit_depth: int | None = 16 + channels: int | None = 1 + custom_metadata: dict[str, Any] | None = None + endpointing: float | None = None + maximum_duration_without_endpointing: int | None = 5 + language_config: LanguageConfig | None = None + pre_processing: PreProcessingConfig | None = None + realtime_processing: RealtimeProcessingConfig | None = None + messages_config: MessagesConfig | None = None enable_vad: bool = False diff --git a/src/pipecat/services/gladia/stt.py b/src/pipecat/services/gladia/stt.py index f939ff4ba..8c57e47e2 100644 --- a/src/pipecat/services/gladia/stt.py +++ b/src/pipecat/services/gladia/stt.py @@ -13,8 +13,9 @@ supporting multiple languages, custom vocabulary, and various audio processing o import asyncio import base64 import json +from collections.abc import AsyncGenerator from dataclasses import dataclass, field -from typing import Any, AsyncGenerator, Literal, Optional +from typing import Any, Literal import aiohttp from loguru import logger @@ -55,7 +56,7 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") -def language_to_gladia_language(language: Language) -> Optional[str]: +def language_to_gladia_language(language: Language) -> str | None: """Convert a Language enum to Gladia's language code format. Args: @@ -223,13 +224,13 @@ class GladiaSTTService(WebsocketSTTService): encoding: str = "wav/pcm", bit_depth: int = 16, channels: int = 1, - sample_rate: Optional[int] = None, - model: Optional[str] = None, - params: Optional[GladiaInputParams] = None, + sample_rate: int | None = None, + model: str | None = None, + params: GladiaInputParams | None = None, max_buffer_size: int = 1024 * 1024 * 20, # 20MB default buffer should_interrupt: bool = True, - settings: Optional[Settings] = None, - ttfs_p99_latency: Optional[float] = GLADIA_TTFS_P99, + settings: Settings | None = None, + ttfs_p99_latency: float | None = GLADIA_TTFS_P99, **kwargs, ): """Initialize the Gladia STT service. @@ -353,7 +354,7 @@ class GladiaSTTService(WebsocketSTTService): """ return True - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: """Convert pipecat Language enum to Gladia's language code. Args: @@ -587,7 +588,7 @@ class GladiaSTTService(WebsocketSTTService): @traced_stt async def _handle_transcription( - self, transcript: str, is_final: bool, language: Optional[str] = None + self, transcript: str, is_final: bool, language: str | None = None ): await self.stop_processing_metrics() diff --git a/src/pipecat/services/google/frames.py b/src/pipecat/services/google/frames.py index 47fcb3365..fbb32bf01 100644 --- a/src/pipecat/services/google/frames.py +++ b/src/pipecat/services/google/frames.py @@ -12,7 +12,6 @@ models that support web search and fact grounding capabilities. """ from dataclasses import dataclass, field -from typing import List, Optional from pipecat.frames.frames import DataFrame @@ -27,7 +26,7 @@ class LLMSearchResult: """ text: str - confidence: List[float] = field(default_factory=list) + confidence: list[float] = field(default_factory=list) @dataclass @@ -40,9 +39,9 @@ class LLMSearchOrigin: results: List of search results from this origin. """ - site_uri: Optional[str] = None - site_title: Optional[str] = None - results: List[LLMSearchResult] = field(default_factory=list) + site_uri: str | None = None + site_title: str | None = None + results: list[LLMSearchResult] = field(default_factory=list) @dataclass @@ -60,9 +59,9 @@ class LLMSearchResponseFrame(DataFrame): origins: List of search result origins with detailed information. """ - search_result: Optional[str] = None - rendered_content: Optional[str] = None - origins: List[LLMSearchOrigin] = field(default_factory=list) + search_result: str | None = None + rendered_content: str | None = None + origins: list[LLMSearchOrigin] = field(default_factory=list) def __str__(self): """Return string representation of the search response frame. diff --git a/src/pipecat/services/google/gemini_live/file_api.py b/src/pipecat/services/google/gemini_live/file_api.py index 0c9fa49d3..6dcec5b19 100644 --- a/src/pipecat/services/google/gemini_live/file_api.py +++ b/src/pipecat/services/google/gemini_live/file_api.py @@ -12,7 +12,7 @@ this API can be referenced in Gemini generative model calls. """ import mimetypes -from typing import Any, Dict, Optional +from typing import Any import aiohttp from loguru import logger @@ -43,9 +43,7 @@ class GeminiFileAPI: # Upload URL uses the /upload/ path self.upload_base_url = "https://generativelanguage.googleapis.com/upload/v1beta/files" - async def upload_file( - self, file_path: str, display_name: Optional[str] = None - ) -> Dict[str, Any]: + async def upload_file(self, file_path: str, display_name: str | None = None) -> dict[str, Any]: """Upload a file to the Gemini File API using the correct resumable upload protocol. Args: @@ -116,7 +114,7 @@ class GeminiFileAPI: logger.info(f"File uploaded successfully: {file_info.get('file', {}).get('name')}") return file_info - async def get_file(self, name: str) -> Dict[str, Any]: + async def get_file(self, name: str) -> dict[str, Any]: """Get metadata for a file. Args: @@ -140,8 +138,8 @@ class GeminiFileAPI: return file_info async def list_files( - self, page_size: int = 10, page_token: Optional[str] = None - ) -> Dict[str, Any]: + self, page_size: int = 10, page_token: str | None = None + ) -> dict[str, Any]: """List uploaded files. Args: diff --git a/src/pipecat/services/google/gemini_live/llm.py b/src/pipecat/services/google/gemini_live/llm.py index 3e3cbd092..b1674d91e 100644 --- a/src/pipecat/services/google/gemini_live/llm.py +++ b/src/pipecat/services/google/gemini_live/llm.py @@ -17,8 +17,8 @@ import io import time import uuid from dataclasses import dataclass, field -from enum import Enum -from typing import Any, Dict, List, Optional, Union +from enum import StrEnum +from typing import Any from loguru import logger from PIL import Image @@ -109,7 +109,7 @@ MAX_CONSECUTIVE_FAILURES = 3 CONNECTION_ESTABLISHED_THRESHOLD = 10.0 # seconds -def language_to_gemini_language(language: Language) -> Optional[str]: +def language_to_gemini_language(language: Language) -> str | None: """Maps a Language enum value to a Gemini Live supported language code. Source: @@ -206,7 +206,7 @@ def language_to_gemini_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=False) -class GeminiModalities(Enum): +class GeminiModalities(StrEnum): """Supported modalities for Gemini Live. Parameters: @@ -218,7 +218,7 @@ class GeminiModalities(Enum): AUDIO = "AUDIO" -class GeminiMediaResolution(str, Enum): +class GeminiMediaResolution(StrEnum): """Media resolution options for Gemini Live. Parameters: @@ -245,11 +245,11 @@ class GeminiVADParams(BaseModel): silence_duration_ms: Silence duration threshold in milliseconds. Defaults to None. """ - disabled: Optional[bool] = Field(default=None) - start_sensitivity: Optional[StartSensitivity] = Field(default=None) - end_sensitivity: Optional[EndSensitivity] = Field(default=None) - prefix_padding_ms: Optional[int] = Field(default=None) - silence_duration_ms: Optional[int] = Field(default=None) + disabled: bool | None = Field(default=None) + start_sensitivity: StartSensitivity | None = Field(default=None) + end_sensitivity: EndSensitivity | None = Field(default=None) + prefix_padding_ms: int | None = Field(default=None) + silence_duration_ms: int | None = Field(default=None) class ContextWindowCompressionParams(BaseModel): @@ -261,9 +261,7 @@ class ContextWindowCompressionParams(BaseModel): """ enabled: bool = Field(default=False) - trigger_tokens: Optional[int] = Field( - default=None - ) # None = use default (80% of context window) + trigger_tokens: int | None = Field(default=None) # None = use default (80% of context window) class InputParams(BaseModel): @@ -303,23 +301,23 @@ class InputParams(BaseModel): extra: Additional parameters. Defaults to empty dict. """ - frequency_penalty: Optional[float] = Field(default=None, ge=0.0, le=2.0) - max_tokens: Optional[int] = Field(default=4096, ge=1) - presence_penalty: Optional[float] = Field(default=None, ge=0.0, le=2.0) - temperature: Optional[float] = Field(default=None, ge=0.0, le=2.0) - top_k: Optional[int] = Field(default=None, ge=0) - top_p: Optional[float] = Field(default=None, ge=0.0, le=1.0) - modalities: Optional[GeminiModalities] = Field(default=GeminiModalities.AUDIO) - language: Optional[Language] = Field(default=Language.EN_US) - media_resolution: Optional[GeminiMediaResolution] = Field( + frequency_penalty: float | None = Field(default=None, ge=0.0, le=2.0) + max_tokens: int | None = Field(default=4096, ge=1) + presence_penalty: float | None = Field(default=None, ge=0.0, le=2.0) + temperature: float | None = Field(default=None, ge=0.0, le=2.0) + top_k: int | None = Field(default=None, ge=0) + top_p: float | None = Field(default=None, ge=0.0, le=1.0) + modalities: GeminiModalities | None = Field(default=GeminiModalities.AUDIO) + language: Language | None = Field(default=Language.EN_US) + media_resolution: GeminiMediaResolution | None = Field( default=GeminiMediaResolution.UNSPECIFIED ) - vad: Optional[GeminiVADParams] = Field(default=None) - context_window_compression: Optional[ContextWindowCompressionParams] = Field(default=None) - thinking: Optional[ThinkingConfig] = Field(default=None) - enable_affective_dialog: Optional[bool] = Field(default=None) - proactivity: Optional[ProactivityConfig] = Field(default=None) - extra: Optional[Dict[str, Any]] = Field(default_factory=dict) + vad: GeminiVADParams | None = Field(default=None) + context_window_compression: ContextWindowCompressionParams | None = Field(default=None) + thinking: ThinkingConfig | None = Field(default=None) + enable_affective_dialog: bool | None = Field(default=None) + proactivity: ProactivityConfig | None = Field(default=None) + extra: dict[str, Any] | None = Field(default_factory=dict) @dataclass @@ -374,17 +372,17 @@ class GeminiLiveLLMService(LLMService): self, *, api_key: str, - model: Optional[str] = None, + model: str | None = None, voice_id: str = "Charon", start_audio_paused: bool = False, start_video_paused: bool = False, - system_instruction: Optional[str] = None, - tools: Optional[Union[List[dict], ToolsSchema]] = None, - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, + system_instruction: str | None = None, + tools: list[dict] | ToolsSchema | None = None, + params: InputParams | None = None, + settings: Settings | None = None, inference_on_context_initialization: bool = True, file_api_base_url: str = "https://generativelanguage.googleapis.com/v1beta/files", - http_options: Optional[HttpOptions] = None, + http_options: HttpOptions | None = None, **kwargs, ): """Initialize the Gemini Live LLM service. @@ -537,18 +535,18 @@ class GeminiLiveLLMService(LLMService): self._connection_start_time = None self._file_api_base_url = file_api_base_url - self._file_api: Optional[GeminiFileAPI] = None + self._file_api: GeminiFileAPI | None = None # Grounding metadata tracking self._search_result_buffer = "" self._accumulated_grounding_metadata = None # Session resumption - self._session_resumption_handle: Optional[str] = None + self._session_resumption_handle: str | None = None # Bookkeeping for ending gracefully (i.e. after the bot is finished) - self._end_frame_pending_bot_turn_finished: Optional[EndFrame] = None - self._end_frame_deferral_timeout_task: Optional[asyncio.Task] = None + self._end_frame_pending_bot_turn_finished: EndFrame | None = None + self._end_frame_deferral_timeout_task: asyncio.Task | None = None # Initialize the API client. Subclasses can override this if needed. self.create_client() @@ -908,7 +906,7 @@ class GeminiLiveLLMService(LLMService): self._end_frame_deferral_timeout_task.cancel() self._end_frame_deferral_timeout_task = None - def _get_history_config(self) -> Optional[HistoryConfig]: + def _get_history_config(self) -> HistoryConfig | None: """Return the history config for the Live API connection. Subclasses can override this to disable history config (e.g. Vertex AI @@ -916,7 +914,7 @@ class GeminiLiveLLMService(LLMService): """ return HistoryConfig(initial_history_in_client_content=True) - async def _connect(self, session_resumption_handle: Optional[str] = None): + async def _connect(self, session_resumption_handle: str | None = None): """Establish client connection to Gemini Live API.""" if self._session: # Here we assume that if we have a client, we are connected. We @@ -1336,7 +1334,7 @@ class GeminiLiveLLMService(LLMService): @traced_gemini_live(operation="llm_tool_result") async def _tool_result( - self, tool_call_id: str, tool_name: str, tool_result_message: Dict[str, Any] + self, tool_call_id: str, tool_name: str, tool_result_message: dict[str, Any] ): """Send tool result back to the API.""" if self._disconnecting or not self._session: @@ -1513,12 +1511,12 @@ class GeminiLiveLLMService(LLMService): @traced_stt async def _handle_user_transcription( - self, transcript: str, is_final: bool, language: Optional[Language] = None + self, transcript: str, is_final: bool, language: Language | None = None ): """Handle a transcription result with tracing.""" pass - async def _push_user_transcription(self, text: str, result: Optional[LiveServerMessage] = None): + async def _push_user_transcription(self, text: str, result: LiveServerMessage | None = None): """Push a user transcription frame upstream. Helper method to ensure consistent handling of user transcriptions @@ -1697,7 +1695,7 @@ class GeminiLiveLLMService(LLMService): if grounding_metadata.grounding_chunks and grounding_metadata.grounding_supports: # Create a mapping of chunk indices to origins - chunk_to_origin: Dict[int, LLMSearchOrigin] = {} + chunk_to_origin: dict[int, LLMSearchOrigin] = {} for index, chunk in enumerate(grounding_metadata.grounding_chunks): if chunk.web: diff --git a/src/pipecat/services/google/gemini_live/vertex/llm.py b/src/pipecat/services/google/gemini_live/vertex/llm.py index 8466c7f21..44ded852f 100644 --- a/src/pipecat/services/google/gemini_live/vertex/llm.py +++ b/src/pipecat/services/google/gemini_live/vertex/llm.py @@ -13,7 +13,6 @@ streaming responses, and tool usage. import json from dataclasses import dataclass -from typing import List, Optional, Union from loguru import logger @@ -61,21 +60,21 @@ class GeminiLiveVertexLLMService(GeminiLiveLLMService): def __init__( self, *, - credentials: Optional[str] = None, - credentials_path: Optional[str] = None, + credentials: str | None = None, + credentials_path: str | None = None, location: str, project_id: str, - model: Optional[str] = None, + model: str | None = None, voice_id: str = "Charon", start_audio_paused: bool = False, start_video_paused: bool = False, - system_instruction: Optional[str] = None, - tools: Optional[Union[List[dict], ToolsSchema]] = None, - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, + system_instruction: str | None = None, + tools: list[dict] | ToolsSchema | None = None, + params: InputParams | None = None, + settings: Settings | None = None, inference_on_context_initialization: bool = True, file_api_base_url: str = "https://generativelanguage.googleapis.com/v1beta/files", - http_options: Optional[HttpOptions] = None, + http_options: HttpOptions | None = None, **kwargs, ): """Initialize the service for accessing Gemini Live via Google Vertex AI. @@ -234,7 +233,7 @@ class GeminiLiveVertexLLMService(GeminiLiveLLMService): ) @staticmethod - def _get_credentials(credentials: Optional[str], credentials_path: Optional[str]) -> str: + def _get_credentials(credentials: str | None, credentials_path: str | None) -> str: """Retrieve Credentials using Google service account credentials JSON. Supports multiple authentication methods: @@ -252,7 +251,7 @@ class GeminiLiveVertexLLMService(GeminiLiveLLMService): Raises: ValueError: If no valid credentials are provided or found. """ - creds: Optional[service_account.Credentials] = None + creds: service_account.Credentials | None = None if credentials: # Parse and load credentials from JSON string diff --git a/src/pipecat/services/google/image.py b/src/pipecat/services/google/image.py index 9e4ec1b59..c8c33a68d 100644 --- a/src/pipecat/services/google/image.py +++ b/src/pipecat/services/google/image.py @@ -16,8 +16,9 @@ import os # Suppress gRPC fork warnings os.environ["GRPC_ENABLE_FORK_SUPPORT"] = "false" +from collections.abc import AsyncGenerator from dataclasses import dataclass, field -from typing import Any, AsyncGenerator, Optional +from typing import Any from loguru import logger from PIL import Image @@ -76,15 +77,15 @@ class GoogleImageGenService(ImageGenService): number_of_images: int = Field(default=1, ge=1, le=8) model: str = Field(default="imagen-4.0-generate-001") - negative_prompt: Optional[str] = Field(default=None) + negative_prompt: str | None = Field(default=None) def __init__( self, *, api_key: str, - params: Optional[InputParams] = None, - http_options: Optional[Any] = None, - settings: Optional[Settings] = None, + params: InputParams | None = None, + http_options: Any | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize the GoogleImageGenService with API key and parameters. diff --git a/src/pipecat/services/google/llm.py b/src/pipecat/services/google/llm.py index 5e95b99da..6268009ed 100644 --- a/src/pipecat/services/google/llm.py +++ b/src/pipecat/services/google/llm.py @@ -13,8 +13,9 @@ including LLM services, context management, and message aggregation. import io import os import uuid +from collections.abc import AsyncIterator from dataclasses import dataclass, field -from typing import Any, AsyncIterator, Dict, List, Literal, Optional, Union +from typing import Any, Literal, Optional, Union from loguru import logger from PIL import Image @@ -88,15 +89,13 @@ class GoogleThinkingConfig(BaseModel): Today's models default to not including thoughts (False). """ - thinking_budget: Optional[int] = Field(default=None) + thinking_budget: int | None = Field(default=None) # Why `| str` here? To not break compatibility in case Google adds more # levels in the future. - thinking_level: Optional[Literal["low", "high", "medium", "minimal"] | str] = Field( - default=None - ) + thinking_level: Literal["low", "high", "medium", "minimal"] | str | None = Field(default=None) - include_thoughts: Optional[bool] = Field(default=None) + include_thoughts: bool | None = Field(default=None) @dataclass @@ -160,24 +159,24 @@ class GoogleLLMService(LLMService): extra: Additional parameters as a dictionary. """ - max_tokens: Optional[int] = Field(default=4096, ge=1) - temperature: Optional[float] = Field(default=None, ge=0.0, le=2.0) - top_k: Optional[int] = Field(default=None, ge=0) - top_p: Optional[float] = Field(default=None, ge=0.0, le=1.0) + max_tokens: int | None = Field(default=4096, ge=1) + temperature: float | None = Field(default=None, ge=0.0, le=2.0) + top_k: int | None = Field(default=None, ge=0) + top_p: float | None = Field(default=None, ge=0.0, le=1.0) thinking: Optional["GoogleLLMService.ThinkingConfig"] = Field(default=None) - extra: Optional[Dict[str, Any]] = Field(default_factory=dict) + extra: dict[str, Any] | None = Field(default_factory=dict) def __init__( self, *, api_key: str, - model: Optional[str] = None, - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, - system_instruction: Optional[str] = None, - tools: Optional[List[Dict[str, Any]]] = None, - tool_config: Optional[Dict[str, Any]] = None, - http_options: Optional[HttpOptions] = None, + model: str | None = None, + params: InputParams | None = None, + settings: Settings | None = None, + system_instruction: str | None = None, + tools: list[dict[str, Any]] | None = None, + tool_config: dict[str, Any] | None = None, + http_options: HttpOptions | None = None, **kwargs, ): """Initialize the Google LLM service. @@ -272,9 +271,9 @@ class GoogleLLMService(LLMService): async def run_inference( self, context: LLMContext, - max_tokens: Optional[int] = None, - system_instruction: Optional[str] = None, - ) -> Optional[str]: + max_tokens: int | None = None, + system_instruction: str | None = None, + ) -> str | None: """Run a one-shot, out-of-band (i.e. out-of-pipeline) inference with the given LLM context. Args: @@ -327,10 +326,10 @@ class GoogleLLMService(LLMService): def _build_generation_params( self, - system_instruction: Optional[str] = None, - tools: Optional[List] = None, - tool_config: Optional[Dict[str, Any]] = None, - ) -> Dict[str, Any]: + system_instruction: str | None = None, + tools: list | None = None, + tool_config: dict[str, Any] | None = None, + ) -> dict[str, Any]: """Build generation parameters for Google AI API. Args: @@ -367,7 +366,7 @@ class GoogleLLMService(LLMService): return generation_params - def _maybe_unset_thinking_budget(self, generation_params: Dict[str, Any]): + def _maybe_unset_thinking_budget(self, generation_params: dict[str, Any]): try: # If we have an image model, we don't apply a thinking default. if "image" in self._settings.model: diff --git a/src/pipecat/services/google/rtvi.py b/src/pipecat/services/google/rtvi.py index 738b0ab9d..2a829ba7d 100644 --- a/src/pipecat/services/google/rtvi.py +++ b/src/pipecat/services/google/rtvi.py @@ -11,7 +11,7 @@ including models for search responses and an observer for handling Google-specif frame types. """ -from typing import List, Literal, Optional +from typing import Literal from pydantic import BaseModel @@ -29,9 +29,9 @@ class RTVISearchResponseMessageData(BaseModel): origins: List of search result origins with metadata. """ - search_result: Optional[str] - rendered_content: Optional[str] - origins: List[LLMSearchOrigin] + search_result: str | None + rendered_content: str | None + origins: list[LLMSearchOrigin] class RTVIBotLLMSearchResponseMessage(BaseModel): @@ -95,7 +95,7 @@ class GoogleRTVIProcessor(RTVIProcessor): Creates a specific Google RTVI Observer. """ - def create_rtvi_observer(self, *, params: Optional[RTVIObserverParams] = None, **kwargs): + def create_rtvi_observer(self, *, params: RTVIObserverParams | None = None, **kwargs): """Creates a new RTVI Observer. Args: diff --git a/src/pipecat/services/google/stt.py b/src/pipecat/services/google/stt.py index 282389a3a..4665d6309 100644 --- a/src/pipecat/services/google/stt.py +++ b/src/pipecat/services/google/stt.py @@ -23,7 +23,8 @@ from pipecat.utils.tracing.service_decorators import traced_stt # Suppress gRPC fork warnings os.environ["GRPC_ENABLE_FORK_SUPPORT"] = "false" -from typing import Any, AsyncGenerator, List, Optional, Union +from collections.abc import AsyncGenerator +from typing import Any from loguru import logger from pydantic import BaseModel, Field, field_validator @@ -59,7 +60,7 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") -def language_to_google_stt_language(language: Language) -> Optional[str]: +def language_to_google_stt_language(language: Language) -> str | None: """Maps Language enum to Google Speech-to-Text V2 language codes. Args: @@ -383,8 +384,8 @@ class GoogleSTTSettings(STTSettings): enable_voice_activity_events: Detect voice activity in audio. """ - languages: List[Language] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - language_codes: List[str] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + languages: list[Language] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + language_codes: list[str] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) use_separate_recognition_per_channel: bool | _NotGiven = field( default_factory=lambda: NOT_GIVEN ) @@ -443,21 +444,21 @@ class GoogleSTTService(STTService): enable_voice_activity_events: Detect voice activity in audio. """ - languages: Union[Language, List[Language]] = Field(default_factory=lambda: [Language.EN_US]) - model: Optional[str] = "latest_long" - use_separate_recognition_per_channel: Optional[bool] = False - enable_automatic_punctuation: Optional[bool] = True - enable_spoken_punctuation: Optional[bool] = False - enable_spoken_emojis: Optional[bool] = False - profanity_filter: Optional[bool] = False - enable_word_time_offsets: Optional[bool] = False - enable_word_confidence: Optional[bool] = False - enable_interim_results: Optional[bool] = True - enable_voice_activity_events: Optional[bool] = False + languages: Language | list[Language] = Field(default_factory=lambda: [Language.EN_US]) + model: str | None = "latest_long" + use_separate_recognition_per_channel: bool | None = False + enable_automatic_punctuation: bool | None = True + enable_spoken_punctuation: bool | None = False + enable_spoken_emojis: bool | None = False + profanity_filter: bool | None = False + enable_word_time_offsets: bool | None = False + enable_word_confidence: bool | None = False + enable_interim_results: bool | None = True + enable_voice_activity_events: bool | None = False @field_validator("languages", mode="before") @classmethod - def validate_languages(cls, v) -> List[Language]: + def validate_languages(cls, v) -> list[Language]: """Ensure languages is always a list. Args: @@ -471,7 +472,7 @@ class GoogleSTTService(STTService): return v @property - def language_list(self) -> List[Language]: + def language_list(self) -> list[Language]: """Get languages as a guaranteed list. Returns: @@ -483,13 +484,13 @@ class GoogleSTTService(STTService): def __init__( self, *, - credentials: Optional[str] = None, - credentials_path: Optional[str] = None, + credentials: str | None = None, + credentials_path: str | None = None, location: str = "global", - sample_rate: Optional[int] = None, - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, - ttfs_p99_latency: Optional[float] = GOOGLE_TTFS_P99, + sample_rate: int | None = None, + params: InputParams | None = None, + settings: Settings | None = None, + ttfs_p99_latency: float | None = GOOGLE_TTFS_P99, **kwargs, ): """Initialize the Google STT service. @@ -581,7 +582,7 @@ class GoogleSTTService(STTService): client_options = ClientOptions(api_endpoint=f"{self._location}-speech.googleapis.com") # Extract project ID and create client - creds: Optional[service_account.Credentials] = None + creds: service_account.Credentials | None = None if credentials: json_account_info = json.loads(credentials) self._project_id = json_account_info.get("project_id") @@ -616,7 +617,7 @@ class GoogleSTTService(STTService): """ return True - def language_to_service_language(self, language: Language | List[Language]) -> str | List[str]: + def language_to_service_language(self, language: Language | list[Language]) -> str | list[str]: """Convert Language enum(s) to Google STT language code(s). Args: @@ -629,7 +630,7 @@ class GoogleSTTService(STTService): return [language_to_google_stt_language(lang) or "en-US" for lang in language] return language_to_google_stt_language(language) or "en-US" - def _get_language_codes(self) -> List[str]: + def _get_language_codes(self) -> list[str]: """Resolve the current language settings to Google STT language code strings. Prefers ``languages`` (``Language`` enums) over the deprecated @@ -651,7 +652,7 @@ class GoogleSTTService(STTService): await self._disconnect() await self._connect() - async def set_languages(self, languages: List[Language]): + async def set_languages(self, languages: list[Language]): """Update the service's recognition languages. .. deprecated:: 0.0.104 @@ -741,17 +742,17 @@ class GoogleSTTService(STTService): async def update_options( self, *, - languages: Optional[List[Language]] = None, - model: Optional[str] = None, - enable_automatic_punctuation: Optional[bool] = None, - enable_spoken_punctuation: Optional[bool] = None, - enable_spoken_emojis: Optional[bool] = None, - profanity_filter: Optional[bool] = None, - enable_word_time_offsets: Optional[bool] = None, - enable_word_confidence: Optional[bool] = None, - enable_interim_results: Optional[bool] = None, - enable_voice_activity_events: Optional[bool] = None, - location: Optional[str] = None, + languages: list[Language] | None = None, + model: str | None = None, + enable_automatic_punctuation: bool | None = None, + enable_spoken_punctuation: bool | None = None, + enable_spoken_emojis: bool | None = None, + profanity_filter: bool | None = None, + enable_word_time_offsets: bool | None = None, + enable_word_confidence: bool | None = None, + enable_interim_results: bool | None = None, + enable_voice_activity_events: bool | None = None, + location: str | None = None, ) -> None: """Update service options dynamically. @@ -947,7 +948,7 @@ class GoogleSTTService(STTService): @traced_stt async def _handle_transcription( - self, transcript: str, is_final: bool, language: Optional[str] = None + self, transcript: str, is_final: bool, language: str | None = None ): pass diff --git a/src/pipecat/services/google/tts.py b/src/pipecat/services/google/tts.py index 81ede9ef8..f9fe1a87d 100644 --- a/src/pipecat/services/google/tts.py +++ b/src/pipecat/services/google/tts.py @@ -22,8 +22,9 @@ from pipecat.utils.tracing.service_decorators import traced_tts # Suppress gRPC fork warnings os.environ["GRPC_ENABLE_FORK_SUPPORT"] = "false" +from collections.abc import AsyncGenerator from dataclasses import dataclass, field -from typing import Any, AsyncGenerator, List, Literal, Optional +from typing import Any, Literal from loguru import logger from pydantic import BaseModel @@ -58,7 +59,7 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") -def language_to_google_tts_language(language: Language) -> Optional[str]: +def language_to_google_tts_language(language: Language) -> str | None: """Convert a Language enum to Google TTS language code. Source: @@ -217,7 +218,7 @@ def language_to_google_tts_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=False) -def language_to_gemini_tts_language(language: Language) -> Optional[str]: +def language_to_gemini_tts_language(language: Language) -> str | None: """Convert a Language enum to Gemini TTS language code. Source: @@ -575,25 +576,25 @@ class GoogleHttpTTSService(TTSService): google_style: Google-specific voice style. """ - pitch: Optional[str] = None - rate: Optional[str] = None - speaking_rate: Optional[float] = None - volume: Optional[str] = None - emphasis: Optional[Literal["strong", "moderate", "reduced", "none"]] = None - language: Optional[Language] = Language.EN - gender: Optional[Literal["male", "female", "neutral"]] = None - google_style: Optional[Literal["apologetic", "calm", "empathetic", "firm", "lively"]] = None + pitch: str | None = None + rate: str | None = None + speaking_rate: float | None = None + volume: str | None = None + emphasis: Literal["strong", "moderate", "reduced", "none"] | None = None + language: Language | None = Language.EN + gender: Literal["male", "female", "neutral"] | None = None + google_style: Literal["apologetic", "calm", "empathetic", "firm", "lively"] | None = None def __init__( self, *, - credentials: Optional[str] = None, - credentials_path: Optional[str] = None, - location: Optional[str] = None, - voice_id: Optional[str] = None, - sample_rate: Optional[int] = None, - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, + credentials: str | None = None, + credentials_path: str | None = None, + location: str | None = None, + voice_id: str | None = None, + sample_rate: int | None = None, + params: InputParams | None = None, + settings: Settings | None = None, **kwargs, ): """Initializes the Google HTTP TTS service. @@ -675,7 +676,7 @@ class GoogleHttpTTSService(TTSService): ) def _create_client( - self, credentials: Optional[str], credentials_path: Optional[str] + self, credentials: str | None, credentials_path: str | None ) -> texttospeech_v1.TextToSpeechAsyncClient: """Create authenticated Google Text-to-Speech client. @@ -689,7 +690,7 @@ class GoogleHttpTTSService(TTSService): Raises: ValueError: If no valid credentials are provided. """ - creds: Optional[service_account.Credentials] = None + creds: service_account.Credentials | None = None if credentials: # Use provided credentials JSON string @@ -727,7 +728,7 @@ class GoogleHttpTTSService(TTSService): """ return True - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: """Convert a Language enum to Google TTS language format. Args: @@ -874,7 +875,7 @@ class GoogleBaseTTSService(TTSService): """ def _create_client( - self, credentials: Optional[str], credentials_path: Optional[str] + self, credentials: str | None, credentials_path: str | None ) -> texttospeech_v1.TextToSpeechAsyncClient: """Create authenticated Google Text-to-Speech client. @@ -888,7 +889,7 @@ class GoogleBaseTTSService(TTSService): Raises: ValueError: If no valid credentials are provided. """ - creds: Optional[service_account.Credentials] = None + creds: service_account.Credentials | None = None if credentials: # Use provided credentials JSON string @@ -926,7 +927,7 @@ class GoogleBaseTTSService(TTSService): """ return True - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: """Convert a Language enum to Google TTS language format. Args: @@ -942,7 +943,7 @@ class GoogleBaseTTSService(TTSService): streaming_config: texttospeech_v1.StreamingSynthesizeConfig, text: str, context_id: str, - prompt: Optional[str] = None, + prompt: str | None = None, ) -> AsyncGenerator[Frame, None]: """Shared streaming synthesis logic. @@ -1032,20 +1033,20 @@ class GoogleTTSService(GoogleBaseTTSService): speaking_rate: The speaking rate, in the range [0.25, 2.0]. """ - language: Optional[Language] = Language.EN - speaking_rate: Optional[float] = None + language: Language | None = Language.EN + speaking_rate: float | None = None def __init__( self, *, - credentials: Optional[str] = None, - credentials_path: Optional[str] = None, - location: Optional[str] = None, - voice_id: Optional[str] = None, - voice_cloning_key: Optional[str] = None, - sample_rate: Optional[int] = None, - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, + credentials: str | None = None, + credentials_path: str | None = None, + location: str | None = None, + voice_id: str | None = None, + voice_cloning_key: str | None = None, + sample_rate: int | None = None, + params: InputParams | None = None, + settings: Settings | None = None, **kwargs, ): """Initializes the Google streaming TTS service. @@ -1249,22 +1250,22 @@ class GeminiTTSService(GoogleBaseTTSService): speaker_configs: List of speaker configurations for multi-speaker mode. """ - language: Optional[Language] = Language.EN - prompt: Optional[str] = None + language: Language | None = Language.EN + prompt: str | None = None multi_speaker: bool = False - speaker_configs: Optional[List[dict]] = None + speaker_configs: list[dict] | None = None def __init__( self, *, - model: Optional[str] = None, - credentials: Optional[str] = None, - credentials_path: Optional[str] = None, - location: Optional[str] = None, - voice_id: Optional[str] = None, - sample_rate: Optional[int] = None, - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, + model: str | None = None, + credentials: str | None = None, + credentials_path: str | None = None, + location: str | None = None, + voice_id: str | None = None, + sample_rate: int | None = None, + params: InputParams | None = None, + settings: Settings | None = None, **kwargs, ): """Initializes the Gemini TTS service. @@ -1353,7 +1354,7 @@ class GeminiTTSService(GoogleBaseTTSService): credentials, credentials_path ) - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: """Convert a Language enum to Gemini TTS language format. Args: diff --git a/src/pipecat/services/google/utils.py b/src/pipecat/services/google/utils.py index 9f712a5ad..3707fbc28 100644 --- a/src/pipecat/services/google/utils.py +++ b/src/pipecat/services/google/utils.py @@ -6,12 +6,12 @@ """Utility functions for Google services.""" -from typing import Any, Dict, Optional, Union +from typing import Any from pipecat import version as pipecat_version -def update_google_client_http_options(http_options: Optional[Union[Dict[str, Any], Any]]) -> Any: +def update_google_client_http_options(http_options: dict[str, Any] | Any | None) -> Any: """Updates http_options with the x-goog-api-client header. Args: diff --git a/src/pipecat/services/google/vertex/llm.py b/src/pipecat/services/google/vertex/llm.py index b8b83cb24..c5954d6d8 100644 --- a/src/pipecat/services/google/vertex/llm.py +++ b/src/pipecat/services/google/vertex/llm.py @@ -17,7 +17,6 @@ from dataclasses import dataclass # Suppress gRPC fork warnings os.environ["GRPC_ENABLE_FORK_SUPPORT"] = "false" -from typing import Optional from loguru import logger @@ -64,17 +63,17 @@ class GoogleVertexLLMService(GoogleLLMService): def __init__( self, *, - credentials: Optional[str] = None, - credentials_path: Optional[str] = None, - model: Optional[str] = None, + credentials: str | None = None, + credentials_path: str | None = None, + model: str | None = None, location: str = "us-east4", project_id: str, - params: Optional[GoogleLLMService.InputParams] = None, - settings: Optional[Settings] = None, - system_instruction: Optional[str] = None, - tools: Optional[list] = None, - tool_config: Optional[dict] = None, - http_options: Optional[HttpOptions] = None, + params: GoogleLLMService.InputParams | None = None, + settings: Settings | None = None, + system_instruction: str | None = None, + tools: list | None = None, + tool_config: dict | None = None, + http_options: HttpOptions | None = None, **kwargs, ): """Initializes the VertexLLMService. @@ -186,7 +185,7 @@ class GoogleVertexLLMService(GoogleLLMService): ) @staticmethod - def _get_credentials(credentials: Optional[str], credentials_path: Optional[str]): + def _get_credentials(credentials: str | None, credentials_path: str | None): """Retrieve Credentials using Google service account credentials. Supports multiple authentication methods: @@ -204,7 +203,7 @@ class GoogleVertexLLMService(GoogleLLMService): Raises: ValueError: If no valid credentials are provided or found. """ - creds: Optional[service_account.Credentials] = None + creds: service_account.Credentials | None = None if credentials: # Parse and load credentials from JSON string diff --git a/src/pipecat/services/gradium/stt.py b/src/pipecat/services/gradium/stt.py index 5dea2c824..223941bd0 100644 --- a/src/pipecat/services/gradium/stt.py +++ b/src/pipecat/services/gradium/stt.py @@ -13,8 +13,9 @@ WebSocket API for streaming audio transcription. import asyncio import base64 import json +from collections.abc import AsyncGenerator from dataclasses import dataclass, field -from typing import Any, AsyncGenerator, Optional +from typing import Any from loguru import logger from pydantic import BaseModel @@ -78,7 +79,7 @@ def _input_format_from_encoding(encoding: str, sample_rate: int) -> str: return encoding -def language_to_gradium_language(language: Language) -> Optional[str]: +def language_to_gradium_language(language: Language) -> str | None: """Convert a Language enum to Gradium's language code format. Args: @@ -109,7 +110,7 @@ class GradiumSTTSettings(STTSettings): Default is 10 (800ms). Lower values like 7-8 give faster response. """ - delay_in_frames: Optional[int] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + delay_in_frames: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) class GradiumSTTService(WebsocketSTTService): @@ -139,8 +140,8 @@ class GradiumSTTService(WebsocketSTTService): Default is 10 (800ms). Lower values like 7-8 give faster response. """ - language: Optional[Language] = None - delay_in_frames: Optional[int] = None + language: Language | None = None + delay_in_frames: int | None = None def __init__( self, @@ -148,11 +149,11 @@ class GradiumSTTService(WebsocketSTTService): api_key: str, api_endpoint_base_url: str = "wss://eu.api.gradium.ai/api/speech/asr", encoding: str = "pcm", - sample_rate: Optional[int] = None, - params: Optional[InputParams] = None, - json_config: Optional[str] = None, - settings: Optional[Settings] = None, - ttfs_p99_latency: Optional[float] = GRADIUM_TTFS_P99, + sample_rate: int | None = None, + params: InputParams | None = None, + json_config: str | None = None, + settings: Settings | None = None, + ttfs_p99_latency: float | None = GRADIUM_TTFS_P99, **kwargs, ): """Initialize the Gradium STT service. @@ -239,7 +240,7 @@ class GradiumSTTService(WebsocketSTTService): # and pushed as a TranscriptionFrame. self._accumulated_text: list[str] = [] self._flush_counter = 0 - self._transcript_aggregation_task: Optional[asyncio.Task] = None + self._transcript_aggregation_task: asyncio.Task | None = None def can_generate_metrics(self) -> bool: """Check if the service can generate metrics. diff --git a/src/pipecat/services/gradium/tts.py b/src/pipecat/services/gradium/tts.py index e8d69a7da..12cc3ddc1 100644 --- a/src/pipecat/services/gradium/tts.py +++ b/src/pipecat/services/gradium/tts.py @@ -6,8 +6,9 @@ import base64 import json +from collections.abc import AsyncGenerator from dataclasses import dataclass -from typing import Any, AsyncGenerator, Optional +from typing import Any from loguru import logger from pydantic import BaseModel @@ -60,18 +61,18 @@ class GradiumTTSService(WebsocketTTSService): temp: Temperature to be used for generation, defaults to 0.6. """ - temp: Optional[float] = 0.6 + temp: float | None = 0.6 def __init__( self, *, api_key: str, - voice_id: Optional[str] = None, + voice_id: str | None = None, url: str = "wss://eu.api.gradium.ai/api/speech/tts", - model: Optional[str] = None, - json_config: Optional[str] = None, - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, + model: str | None = None, + json_config: str | None = None, + params: InputParams | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize the Gradium TTS service. @@ -280,7 +281,7 @@ class GradiumTTSService(WebsocketTTSService): return self._websocket raise Exception("Websocket not connected") - async def flush_audio(self, context_id: Optional[str] = None): + async def flush_audio(self, context_id: str | None = None): """Flush any pending audio synthesis.""" flush_id = context_id or self.get_active_audio_context_id() if not flush_id or not self._websocket: diff --git a/src/pipecat/services/groq/llm.py b/src/pipecat/services/groq/llm.py index d36b52ab8..af6dc193a 100644 --- a/src/pipecat/services/groq/llm.py +++ b/src/pipecat/services/groq/llm.py @@ -7,7 +7,6 @@ """Groq LLM Service implementation using OpenAI-compatible interface.""" from dataclasses import dataclass -from typing import Optional from loguru import logger @@ -37,8 +36,8 @@ class GroqLLMService(OpenAILLMService): *, api_key: str, base_url: str = "https://api.groq.com/openai/v1", - model: Optional[str] = None, - settings: Optional[Settings] = None, + model: str | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize Groq LLM service. diff --git a/src/pipecat/services/groq/stt.py b/src/pipecat/services/groq/stt.py index 3f6c23774..444fb5094 100644 --- a/src/pipecat/services/groq/stt.py +++ b/src/pipecat/services/groq/stt.py @@ -7,7 +7,6 @@ """Groq speech-to-text service implementation using Whisper models.""" from dataclasses import dataclass -from typing import Optional from pipecat.services.stt_latency import GROQ_TTFS_P99 from pipecat.services.whisper.base_stt import ( @@ -41,14 +40,14 @@ class GroqSTTService(BaseWhisperSTTService): def __init__( self, *, - model: Optional[str] = None, - api_key: Optional[str] = None, + model: str | None = None, + api_key: str | None = None, base_url: str = "https://api.groq.com/openai/v1", - language: Optional[Language] = None, - prompt: Optional[str] = None, - temperature: Optional[float] = None, - settings: Optional[Settings] = None, - ttfs_p99_latency: Optional[float] = GROQ_TTFS_P99, + language: Language | None = None, + prompt: str | None = None, + temperature: float | None = None, + settings: Settings | None = None, + ttfs_p99_latency: float | None = GROQ_TTFS_P99, **kwargs, ): """Initialize Groq STT service. diff --git a/src/pipecat/services/groq/tts.py b/src/pipecat/services/groq/tts.py index 00ff3ef84..49a4d7930 100644 --- a/src/pipecat/services/groq/tts.py +++ b/src/pipecat/services/groq/tts.py @@ -8,8 +8,8 @@ import io import wave +from collections.abc import AsyncGenerator from dataclasses import dataclass, field -from typing import AsyncGenerator, Optional from loguru import logger from pydantic import BaseModel @@ -65,8 +65,8 @@ class GroqTTSService(TTSService): speed: Speech speed multiplier. Defaults to 1.0. """ - language: Optional[Language] = Language.EN - speed: Optional[float] = 1.0 + language: Language | None = Language.EN + speed: float | None = 1.0 GROQ_SAMPLE_RATE = 48000 # Groq TTS only supports 48kHz sample rate @@ -75,11 +75,11 @@ class GroqTTSService(TTSService): *, api_key: str, output_format: str = "wav", - params: Optional[InputParams] = None, - model_name: Optional[str] = None, - voice_id: Optional[str] = None, - sample_rate: Optional[int] = GROQ_SAMPLE_RATE, - settings: Optional[Settings] = None, + params: InputParams | None = None, + model_name: str | None = None, + voice_id: str | None = None, + sample_rate: int | None = GROQ_SAMPLE_RATE, + settings: Settings | None = None, **kwargs, ): """Initialize Groq TTS service. diff --git a/src/pipecat/services/heygen/api_interactive_avatar.py b/src/pipecat/services/heygen/api_interactive_avatar.py index 26d2553ce..c0cbfb1cf 100644 --- a/src/pipecat/services/heygen/api_interactive_avatar.py +++ b/src/pipecat/services/heygen/api_interactive_avatar.py @@ -9,8 +9,8 @@ API to communicate with HeyGen Streaming API. """ -from enum import Enum -from typing import Any, Dict, Literal, Optional +from enum import StrEnum +from typing import Any, Literal import aiohttp from loguru import logger @@ -19,7 +19,7 @@ from pydantic import BaseModel, Field from pipecat.services.heygen.base_api import BaseAvatarApi, StandardSessionResponse -class AvatarQuality(str, Enum): +class AvatarQuality(StrEnum): """Enum representing different avatar quality levels.""" low = "low" @@ -27,14 +27,14 @@ class AvatarQuality(str, Enum): high = "high" -class VideoEncoding(str, Enum): +class VideoEncoding(StrEnum): """Enum representing the video encoding.""" H264 = "H264" VP8 = "VP8" -class VoiceEmotion(str, Enum): +class VoiceEmotion(StrEnum): """Enum representing different voice emotion types.""" EXCITED = "excited" @@ -55,11 +55,11 @@ class ElevenLabsSettings(BaseModel): use_speaker_boost (Optional[bool]): Flag to enable speaker boost. """ - stability: Optional[float] = None - similarity_boost: Optional[float] = None - model_id: Optional[str] = None - style: Optional[int] = None - use_speaker_boost: Optional[bool] = None + stability: float | None = None + similarity_boost: float | None = None + model_id: str | None = None + style: int | None = None + use_speaker_boost: bool | None = None class VoiceSettings(BaseModel): @@ -72,10 +72,10 @@ class VoiceSettings(BaseModel): elevenlabs_settings (Optional[ElevenLabsSettings]): Details for ElevenLabs configuration. """ - voice_id: Optional[str] = Field(None, alias="voiceId") - rate: Optional[float] = None - emotion: Optional[VoiceEmotion] = None - elevenlabs_settings: Optional[ElevenLabsSettings] = Field(None, alias="elevenlabsSettings") + voice_id: str | None = Field(None, alias="voiceId") + rate: float | None = None + emotion: VoiceEmotion | None = None + elevenlabs_settings: ElevenLabsSettings | None = Field(None, alias="elevenlabsSettings") class NewSessionRequest(BaseModel): @@ -93,15 +93,15 @@ class NewSessionRequest(BaseModel): activity_idle_timeout (Optional[int]): Timeout in seconds for activity-based idle detection. """ - quality: Optional[AvatarQuality] = None - avatar_id: Optional[str] = None - voice: Optional[VoiceSettings] = None - video_encoding: Optional[VideoEncoding] = None - knowledge_id: Optional[str] = None - knowledge_base: Optional[str] = None + quality: AvatarQuality | None = None + avatar_id: str | None = None + voice: VoiceSettings | None = None + video_encoding: VideoEncoding | None = None + knowledge_id: str | None = None + knowledge_base: str | None = None version: Literal["v2"] = "v2" - disable_idle_timeout: Optional[bool] = None - activity_idle_timeout: Optional[int] = None + disable_idle_timeout: bool | None = None + activity_idle_timeout: int | None = None class HeyGenSession(BaseModel): @@ -153,7 +153,7 @@ class HeyGenApi(BaseAvatarApi): self.api_key = api_key self.session = session - async def _request(self, path: str, params: Dict[str, Any], expect_data: bool = True) -> Any: + async def _request(self, path: str, params: dict[str, Any], expect_data: bool = True) -> Any: """Make a POST request to the HeyGen API. Args: diff --git a/src/pipecat/services/heygen/api_liveavatar.py b/src/pipecat/services/heygen/api_liveavatar.py index 7b9119542..14e941852 100644 --- a/src/pipecat/services/heygen/api_liveavatar.py +++ b/src/pipecat/services/heygen/api_liveavatar.py @@ -9,8 +9,8 @@ API to communicate with LiveAvatar Streaming API. """ -from enum import Enum -from typing import Any, Dict, Optional +from enum import StrEnum +from typing import Any import aiohttp from loguru import logger @@ -28,8 +28,8 @@ class AvatarPersona(BaseModel): language (str): Language code for the avatar (default: "en"). """ - voice_id: Optional[str] = None - context_id: Optional[str] = None + voice_id: str | None = None + context_id: str | None = None language: str = "en" @@ -47,14 +47,14 @@ class CustomSDKLiveKitConfig(BaseModel): livekit_client_token: str -class VideoEncoding(str, Enum): +class VideoEncoding(StrEnum): """Enum representing the video encoding.""" H264 = "H264" VP8 = "VP8" -class VideoQuality(str, Enum): +class VideoQuality(StrEnum): """Enum representing different avatar quality levels.""" low = "low" @@ -84,10 +84,10 @@ class LiveAvatarNewSessionRequest(BaseModel): mode: str = "LITE" avatar_id: str - video_settings: Optional[VideoSettings] = VideoSettings(encoding=VideoEncoding.VP8) - is_sandbox: Optional[bool] = False - avatar_persona: Optional[AvatarPersona] = None - livekit_config: Optional[CustomSDKLiveKitConfig] = None + video_settings: VideoSettings | None = VideoSettings(encoding=VideoEncoding.VP8) + is_sandbox: bool | None = False + avatar_persona: AvatarPersona | None = None + livekit_config: CustomSDKLiveKitConfig | None = None class SessionTokenData(BaseModel): @@ -186,8 +186,8 @@ class LiveAvatarApi(BaseAvatarApi): self, method: str, path: str, - params: Optional[Dict[str, Any]] = None, - bearer_token: Optional[str] = None, + params: dict[str, Any] | None = None, + bearer_token: str | None = None, ) -> Any: """Make a request to the LiveAvatar API. diff --git a/src/pipecat/services/heygen/client.py b/src/pipecat/services/heygen/client.py index 7f99502b6..cc6451a25 100644 --- a/src/pipecat/services/heygen/client.py +++ b/src/pipecat/services/heygen/client.py @@ -16,8 +16,8 @@ import base64 import json import time import uuid +from collections.abc import Awaitable, Callable from enum import Enum -from typing import Awaitable, Callable, Optional, Union import aiohttp from loguru import logger @@ -93,8 +93,8 @@ class HeyGenClient: api_key: str, session: aiohttp.ClientSession, params: TransportParams, - session_request: Optional[Union[LiveAvatarNewSessionRequest, NewSessionRequest]] = None, - service_type: Optional[ServiceType] = None, + session_request: LiveAvatarNewSessionRequest | NewSessionRequest | None = None, + service_type: ServiceType | None = None, callbacks: HeyGenCallbacks, connect_as_user: bool = False, ) -> None: @@ -149,16 +149,16 @@ class HeyGenClient: else: self._api = LiveAvatarApi(api_key, session=session) - self._heyGen_session: Optional[StandardSessionResponse] = None + self._heyGen_session: StandardSessionResponse | None = None self._websocket = None - self._task_manager: Optional[BaseTaskManager] = None + self._task_manager: BaseTaskManager | None = None self._params = params self._in_sample_rate = 0 self._out_sample_rate = 0 self._connected = False self._session_request = session_request self._callbacks = callbacks - self._event_queue: Optional[asyncio.Queue] = None + self._event_queue: asyncio.Queue | None = None self._event_task = None # Currently supporting to capture the audio and video from a single participant self._video_task = None diff --git a/src/pipecat/services/heygen/video.py b/src/pipecat/services/heygen/video.py index 9a20f35ef..11bc06ca2 100644 --- a/src/pipecat/services/heygen/video.py +++ b/src/pipecat/services/heygen/video.py @@ -13,7 +13,6 @@ audio/video streaming capabilities through the HeyGen API. import asyncio from dataclasses import dataclass -from typing import Optional, Union import aiohttp from loguru import logger @@ -90,9 +89,9 @@ class HeyGenVideoService(AIService): *, api_key: str, session: aiohttp.ClientSession, - session_request: Optional[Union[LiveAvatarNewSessionRequest, NewSessionRequest]] = None, - service_type: Optional[ServiceType] = None, - settings: Optional[Settings] = None, + session_request: LiveAvatarNewSessionRequest | NewSessionRequest | None = None, + service_type: ServiceType | None = None, + settings: Settings | None = None, **kwargs, ) -> None: """Initialize the HeyGen video service. @@ -113,8 +112,8 @@ class HeyGenVideoService(AIService): super().__init__(settings=default_settings, **kwargs) self._api_key = api_key self._session = session - self._client: Optional[HeyGenClient] = None - self._send_task: Optional[asyncio.Task] = None + self._client: HeyGenClient | None = None + self._send_task: asyncio.Task | None = None self._resampler = create_stream_resampler() self._is_interrupting = False self._session_request = session_request @@ -367,7 +366,7 @@ class HeyGenVideoService(AIService): await self._client.agent_speak(bytes(chunk), self._event_id) self._queue.task_done() - except asyncio.TimeoutError: + except TimeoutError: # Bot has stopped speaking if self._event_id is not None: await self._client.agent_speak_end(self._event_id) diff --git a/src/pipecat/services/hume/tts.py b/src/pipecat/services/hume/tts.py index ea43a530d..ff82a9fc2 100644 --- a/src/pipecat/services/hume/tts.py +++ b/src/pipecat/services/hume/tts.py @@ -7,8 +7,9 @@ import base64 import os import warnings +from collections.abc import AsyncGenerator from dataclasses import dataclass, field -from typing import Any, AsyncGenerator, Optional +from typing import Any import httpx from loguru import logger @@ -93,18 +94,18 @@ class HumeTTSService(TTSService): trailing_silence: Seconds of silence to append at the end (0-5). """ - description: Optional[str] = None - speed: Optional[float] = None - trailing_silence: Optional[float] = None + description: str | None = None + speed: float | None = None + trailing_silence: float | None = None def __init__( self, *, - api_key: Optional[str] = None, - voice_id: Optional[str] = None, - params: Optional[InputParams] = None, - sample_rate: Optional[int] = HUME_SAMPLE_RATE, - settings: Optional[Settings] = None, + api_key: str | None = None, + voice_id: str | None = None, + params: InputParams | None = None, + sample_rate: int | None = HUME_SAMPLE_RATE, + settings: Settings | None = None, **kwargs, ) -> None: """Initialize the HumeTTSService. diff --git a/src/pipecat/services/image_service.py b/src/pipecat/services/image_service.py index f99909444..df8ef66fe 100644 --- a/src/pipecat/services/image_service.py +++ b/src/pipecat/services/image_service.py @@ -11,7 +11,7 @@ text prompts into images. """ from abc import abstractmethod -from typing import AsyncGenerator, Optional +from collections.abc import AsyncGenerator from pipecat.frames.frames import Frame, TextFrame from pipecat.processors.frame_processor import FrameDirection @@ -27,7 +27,7 @@ class ImageGenService(AIService): generation functionality using their specific AI service. """ - def __init__(self, *, settings: Optional[ImageGenSettings] = None, **kwargs): + def __init__(self, *, settings: ImageGenSettings | None = None, **kwargs): """Initialize the image generation service. Args: diff --git a/src/pipecat/services/inworld/realtime/events.py b/src/pipecat/services/inworld/realtime/events.py index 55f7f28bd..6ee3f05b3 100644 --- a/src/pipecat/services/inworld/realtime/events.py +++ b/src/pipecat/services/inworld/realtime/events.py @@ -12,7 +12,7 @@ https://docs.inworld.ai/api-reference/realtimeAPI/realtime/realtime-websocket import json import uuid -from typing import Any, Dict, List, Literal, Optional, Union +from typing import Any, Literal from pydantic import BaseModel, ConfigDict, Field @@ -84,10 +84,10 @@ class TurnDetection(BaseModel): interrupt_response: Whether user speech interrupts the current response. """ - type: Optional[Literal["server_vad", "semantic_vad"]] = "semantic_vad" - eagerness: Optional[str] = None - create_response: Optional[bool] = None - interrupt_response: Optional[bool] = None + type: Literal["server_vad", "semantic_vad"] | None = "semantic_vad" + eagerness: str | None = None + create_response: bool | None = None + interrupt_response: bool | None = None class InputTranscription(BaseModel): @@ -97,7 +97,7 @@ class InputTranscription(BaseModel): model: The STT model to use for transcription. """ - model: Optional[str] = None + model: str | None = None # @@ -114,9 +114,9 @@ class AudioInput(BaseModel): turn_detection: Configuration for turn detection. """ - format: Optional[Union[PCMAudioFormat, PCMUAudioFormat, PCMAAudioFormat]] = None - transcription: Optional[InputTranscription] = None - turn_detection: Optional[TurnDetection] = None + format: PCMAudioFormat | PCMUAudioFormat | PCMAAudioFormat | None = None + transcription: InputTranscription | None = None + turn_detection: TurnDetection | None = None class AudioOutput(BaseModel): @@ -128,9 +128,9 @@ class AudioOutput(BaseModel): voice: The voice ID to use (e.g. "Sarah", "Clive"). """ - format: Optional[Union[PCMAudioFormat, PCMUAudioFormat, PCMAAudioFormat]] = None - model: Optional[str] = None - voice: Optional[str] = None + format: PCMAudioFormat | PCMUAudioFormat | PCMAAudioFormat | None = None + model: str | None = None + voice: str | None = None class AudioConfiguration(BaseModel): @@ -141,8 +141,8 @@ class AudioConfiguration(BaseModel): output: Configuration for output audio. """ - input: Optional[AudioInput] = None - output: Optional[AudioOutput] = None + input: AudioInput | None = None + output: AudioOutput | None = None # @@ -163,11 +163,11 @@ class FunctionTool(BaseModel): type: Literal["function"] = "function" name: str description: str - parameters: Dict[str, Any] + parameters: dict[str, Any] # Union type for Inworld tools -InworldTool = Union[FunctionTool, Dict[str, Any]] +InworldTool = FunctionTool | dict[str, Any] # @@ -191,15 +191,15 @@ class SessionProperties(BaseModel): # Needed to support ToolSchema in tools field. model_config = ConfigDict(arbitrary_types_allowed=True) - type: Optional[str] = "realtime" - model: Optional[str] = None - instructions: Optional[str] = None - temperature: Optional[float] = None - output_modalities: Optional[List[str]] = None - audio: Optional[AudioConfiguration] = None + type: str | None = "realtime" + model: str | None = None + instructions: str | None = None + temperature: float | None = None + output_modalities: list[str] | None = None + audio: AudioConfiguration | None = None # Tools can be ToolsSchema when provided by user, or list of dicts for API - tools: Optional[ToolsSchema | List[InworldTool]] = None - provider_data: Optional[Dict[str, Any]] = None + tools: ToolsSchema | list[InworldTool] | None = None + provider_data: dict[str, Any] | None = None # @@ -218,9 +218,9 @@ class ItemContent(BaseModel): """ type: Literal["text", "audio", "input_text", "input_audio", "output_text", "output_audio"] - text: Optional[str] = None - audio: Optional[str] = None # base64-encoded audio - transcript: Optional[str] = None + text: str | None = None + audio: str | None = None # base64-encoded audio + transcript: str | None = None class ConversationItem(BaseModel): @@ -240,15 +240,15 @@ class ConversationItem(BaseModel): """ id: str = Field(default_factory=lambda: str(uuid.uuid4().hex)) - object: Optional[Literal["realtime.item"]] = None + object: Literal["realtime.item"] | None = None type: Literal["message", "function_call", "function_call_output"] - status: Optional[Literal["completed", "in_progress", "incomplete"]] = None - role: Optional[Literal["user", "assistant", "system", "tool"]] = None - content: Optional[List[ItemContent]] = None - call_id: Optional[str] = None - name: Optional[str] = None - arguments: Optional[str] = None - output: Optional[str] = None + status: Literal["completed", "in_progress", "incomplete"] | None = None + role: Literal["user", "assistant", "system", "tool"] | None = None + content: list[ItemContent] | None = None + call_id: str | None = None + name: str | None = None + arguments: str | None = None + output: str | None = None class RealtimeConversation(BaseModel): @@ -270,7 +270,7 @@ class ResponseProperties(BaseModel): modalities: Output modalities for the response (text, audio, or both). """ - modalities: Optional[List[Literal["text", "audio"]]] = ["text", "audio"] + modalities: list[Literal["text", "audio"]] | None = ["text", "audio"] # @@ -289,11 +289,11 @@ class RealtimeError(BaseModel): event_id: Event ID associated with the error, if applicable. """ - type: Optional[str] = None - code: Optional[str] = "" + type: str | None = None + code: str | None = "" message: str - param: Optional[str] = None - event_id: Optional[str] = None + param: str | None = None + event_id: str | None = None # @@ -367,7 +367,7 @@ class ConversationItemCreateEvent(ClientEvent): """ type: Literal["conversation.item.create"] = "conversation.item.create" - previous_item_id: Optional[str] = None + previous_item_id: str | None = None item: ConversationItem @@ -380,7 +380,7 @@ class ResponseCreateEvent(ClientEvent): """ type: Literal["response.create"] = "response.create" - response: Optional[ResponseProperties] = None + response: ResponseProperties | None = None class ResponseCancelEvent(ClientEvent): @@ -423,7 +423,7 @@ class SessionCreatedEvent(ServerEvent): """ type: Literal["session.created"] - session: Optional[SessionProperties] = None + session: SessionProperties | None = None class SessionUpdatedEvent(ServerEvent): @@ -462,7 +462,7 @@ class ConversationItemAdded(ServerEvent): """ type: Literal["conversation.item.added"] - previous_item_id: Optional[str] = None + previous_item_id: str | None = None item: ConversationItem @@ -492,7 +492,7 @@ class ConversationItemInputAudioTranscriptionDelta(ServerEvent): type: Literal["conversation.item.input_audio_transcription.delta"] item_id: str - content_index: Optional[int] = None + content_index: int | None = None delta: str @@ -534,7 +534,7 @@ class InputAudioBufferCommitted(ServerEvent): """ type: Literal["input_audio_buffer.committed"] - previous_item_id: Optional[str] = None + previous_item_id: str | None = None item_id: str @@ -653,11 +653,11 @@ class ResponseFunctionCallArgumentsDelta(ServerEvent): """ type: Literal["response.function_call_arguments.delta"] - response_id: Optional[str] = None - item_id: Optional[str] = None + response_id: str | None = None + item_id: str | None = None call_id: str delta: str - previous_item_id: Optional[str] = None + previous_item_id: str | None = None class ResponseFunctionCallArgumentsDone(ServerEvent): @@ -673,7 +673,7 @@ class ResponseFunctionCallArgumentsDone(ServerEvent): type: Literal["response.function_call_arguments.done"] call_id: str - name: Optional[str] = None + name: str | None = None arguments: str @@ -686,9 +686,9 @@ class Usage(BaseModel): output_tokens: Number of output tokens used. """ - total_tokens: Optional[int] = None - input_tokens: Optional[int] = None - output_tokens: Optional[int] = None + total_tokens: int | None = None + input_tokens: int | None = None + output_tokens: int | None = None class Response(BaseModel): @@ -705,9 +705,9 @@ class Response(BaseModel): id: str object: Literal["realtime.response"] status: Literal["completed", "in_progress", "incomplete", "cancelled", "failed"] - status_details: Optional[Any] = None - output: List[ConversationItem] - usage: Optional[Usage] = None + status_details: Any | None = None + output: list[ConversationItem] + usage: Usage | None = None class ResponseDone(ServerEvent): @@ -721,7 +721,7 @@ class ResponseDone(ServerEvent): type: Literal["response.done"] response: Response - usage: Optional[Usage] = None + usage: Usage | None = None class ResponseOutputItemDone(ServerEvent): @@ -749,7 +749,7 @@ class ContentPart(BaseModel): """ type: str - transcript: Optional[str] = None + transcript: str | None = None class ResponseContentPartAdded(ServerEvent): diff --git a/src/pipecat/services/inworld/realtime/llm.py b/src/pipecat/services/inworld/realtime/llm.py index 79ee186ca..0c9ea623d 100644 --- a/src/pipecat/services/inworld/realtime/llm.py +++ b/src/pipecat/services/inworld/realtime/llm.py @@ -14,9 +14,10 @@ import base64 import json import time import urllib.parse +from collections.abc import Mapping from dataclasses import dataclass, field from dataclasses import fields as dataclass_fields -from typing import Any, Dict, Literal, Mapping, Optional, Type +from typing import Any, Literal from loguru import logger @@ -117,7 +118,7 @@ class InworldRealtimeLLMSettings(LLMSettings): # -- apply_update override ----------------------------------------------- - def apply_update(self, delta: "InworldRealtimeLLMService.Settings") -> Dict[str, Any]: + def apply_update(self, delta: "InworldRealtimeLLMService.Settings") -> dict[str, Any]: """Merge a delta, keeping ``model``/``system_instruction`` in sync with SP. When the delta contains ``session_properties``, it **replaces** the @@ -147,7 +148,7 @@ class InworldRealtimeLLMSettings(LLMSettings): @classmethod def from_mapping( - cls: Type["InworldRealtimeLLMService.Settings"], settings: Mapping[str, Any] + cls: type["InworldRealtimeLLMService.Settings"], settings: Mapping[str, Any] ) -> "InworldRealtimeLLMService.Settings": """Build a delta from a plain dict, routing SP keys into ``session_properties``. @@ -157,9 +158,9 @@ class InworldRealtimeLLMSettings(LLMSettings): """ own_field_names = {f.name for f in dataclass_fields(cls)} - {"extra"} - top: Dict[str, Any] = {} - sp_dict: Dict[str, Any] = {} - extra: Dict[str, Any] = {} + top: dict[str, Any] = {} + sp_dict: dict[str, Any] = {} + extra: dict[str, Any] = {} sp_keys = set(events.SessionProperties.model_fields.keys()) - {"model"} @@ -249,13 +250,13 @@ class InworldRealtimeLLMService(LLMService): self, *, api_key: str, - llm_model: Optional[str] = None, - voice: Optional[str] = None, - tts_model: Optional[str] = None, - stt_model: Optional[str] = None, + llm_model: str | None = None, + voice: str | None = None, + tts_model: str | None = None, + stt_model: str | None = None, base_url: str = "wss://api.inworld.ai/api/v1/realtime/session", auth_type: Literal["basic", "bearer"] = "basic", - settings: Optional[Settings] = None, + settings: Settings | None = None, start_audio_paused: bool = False, **kwargs, ): @@ -375,7 +376,7 @@ class InworldRealtimeLLMService(LLMService): """ self._audio_input_paused = paused - def _get_configured_sample_rate(self, direction: str) -> Optional[int]: + def _get_configured_sample_rate(self, direction: str) -> int | None: """Get manually configured sample rate for input or output. Args: diff --git a/src/pipecat/services/inworld/tts.py b/src/pipecat/services/inworld/tts.py index 215c0f747..65922e3a1 100644 --- a/src/pipecat/services/inworld/tts.py +++ b/src/pipecat/services/inworld/tts.py @@ -17,19 +17,13 @@ import asyncio import base64 import json import uuid +from collections.abc import AsyncGenerator, Mapping from dataclasses import dataclass, field from typing import ( Any, - AsyncGenerator, ClassVar, - Dict, - List, Literal, - Mapping, - Optional, Self, - Set, - Tuple, ) import aiohttp @@ -81,7 +75,7 @@ class InworldTTSSettings(TTSSettings): speaking_rate: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) temperature: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - _aliases: ClassVar[Dict[str, str]] = { + _aliases: ClassVar[dict[str, str]] = { "voiceId": "voice", "modelId": "model", } @@ -118,23 +112,23 @@ class InworldHttpTTSService(TTSService): timestamp_transport_strategy: The strategy to use for timestamp transport. """ - temperature: Optional[float] = None - speaking_rate: Optional[float] = None - timestamp_transport_strategy: Optional[Literal["ASYNC", "SYNC"]] = "ASYNC" + temperature: float | None = None + speaking_rate: float | None = None + timestamp_transport_strategy: Literal["ASYNC", "SYNC"] | None = "ASYNC" def __init__( self, *, api_key: str, aiohttp_session: aiohttp.ClientSession, - voice_id: Optional[str] = None, - model: Optional[str] = None, + voice_id: str | None = None, + model: str | None = None, streaming: bool = True, - sample_rate: Optional[int] = None, + sample_rate: int | None = None, encoding: str = "LINEAR16", - timestamp_transport_strategy: Optional[Literal["ASYNC", "SYNC"]] = "ASYNC", - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, + timestamp_transport_strategy: Literal["ASYNC", "SYNC"] | None = "ASYNC", + params: InputParams | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize the Inworld TTS service. @@ -255,8 +249,8 @@ class InworldHttpTTSService(TTSService): def _calculate_word_times( self, - timestamp_info: Dict[str, Any], - ) -> Tuple[List[Tuple[str, float]], float]: + timestamp_info: dict[str, Any], + ) -> tuple[list[tuple[str, float]], float]: """Calculate word timestamps from Inworld HTTP API word-level response. Note: Inworld HTTP provides timestamps that reset for each request. @@ -269,7 +263,7 @@ class InworldHttpTTSService(TTSService): Tuple of (word_times, chunk_end_time) where chunk_end_time is the end time of the last word in this chunk (not cumulative). """ - word_times: List[Tuple[str, float]] = [] + word_times: list[tuple[str, float]] = [] chunk_end_time = 0.0 alignment = timestamp_info.get("wordAlignment", {}) @@ -534,30 +528,30 @@ class InworldTTSService(WebsocketTTSService): timestamp_transport_strategy: The strategy to use for timestamp transport. """ - temperature: Optional[float] = None - speaking_rate: Optional[float] = None - apply_text_normalization: Optional[str] = None - max_buffer_delay_ms: Optional[int] = None - buffer_char_threshold: Optional[int] = None - auto_mode: Optional[bool] = True - timestamp_transport_strategy: Optional[Literal["ASYNC", "SYNC"]] = "ASYNC" + temperature: float | None = None + speaking_rate: float | None = None + apply_text_normalization: str | None = None + max_buffer_delay_ms: int | None = None + buffer_char_threshold: int | None = None + auto_mode: bool | None = True + timestamp_transport_strategy: Literal["ASYNC", "SYNC"] | None = "ASYNC" def __init__( self, *, api_key: str, - voice_id: Optional[str] = None, - model: Optional[str] = None, + voice_id: str | None = None, + model: str | None = None, url: str = "wss://api.inworld.ai/tts/v1/voice:streamBidirectional", - sample_rate: Optional[int] = None, + sample_rate: int | None = None, encoding: str = "LINEAR16", - auto_mode: Optional[bool] = None, - apply_text_normalization: Optional[str] = None, - timestamp_transport_strategy: Optional[Literal["ASYNC", "SYNC"]] = "ASYNC", - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, - aggregate_sentences: Optional[bool] = None, - text_aggregation_mode: Optional[TextAggregationMode] = None, + auto_mode: bool | None = None, + apply_text_normalization: str | None = None, + timestamp_transport_strategy: Literal["ASYNC", "SYNC"] | None = "ASYNC", + params: InputParams | None = None, + settings: Settings | None = None, + aggregate_sentences: bool | None = None, + text_aggregation_mode: TextAggregationMode | None = None, append_trailing_space: bool = True, **kwargs: Any, ): @@ -684,8 +678,8 @@ class InworldTTSService(WebsocketTTSService): # Fallback tracking for when timestamps are not received. Without # timestamps, interruptions commit the full text rather than only the # portion that was spoken. - self._context_texts: Dict[str, str] = {} - self._contexts_with_timestamps: Set[str] = set() + self._context_texts: dict[str, str] = {} + self._contexts_with_timestamps: set[str] = set() # Init-only config (not runtime-updatable). self._audio_encoding = encoding @@ -730,7 +724,7 @@ class InworldTTSService(WebsocketTTSService): await super().cancel(frame) await self._disconnect() - async def flush_audio(self, context_id: Optional[str] = None): + async def flush_audio(self, context_id: str | None = None): """Flush any pending audio without closing the context. This triggers synthesis of all accumulated text in the buffer while @@ -758,7 +752,7 @@ class InworldTTSService(WebsocketTTSService): except Exception as e: logger.warning(f"{self}: Failed to pre-open context: {e}") - def _calculate_word_times(self, timestamp_info: Dict[str, Any]) -> List[Tuple[str, float]]: + def _calculate_word_times(self, timestamp_info: dict[str, Any]) -> list[tuple[str, float]]: """Calculate word timestamps from Inworld WebSocket API response. Adds cumulative time offset to maintain monotonically increasing timestamps @@ -771,7 +765,7 @@ class InworldTTSService(WebsocketTTSService): Returns: List of (word, timestamp) tuples with cumulative offset applied. """ - word_times: List[Tuple[str, float]] = [] + word_times: list[tuple[str, float]] = [] alignment = timestamp_info.get("wordAlignment", {}) words = alignment.get("words", []) @@ -1079,7 +1073,7 @@ class InworldTTSService(WebsocketTTSService): if self._settings.speaking_rate is not None: audio_config["speakingRate"] = self._settings.speaking_rate - create_config: Dict[str, Any] = { + create_config: dict[str, Any] = { "voiceId": self._settings.voice, "modelId": self._settings.model, "audioConfig": audio_config, diff --git a/src/pipecat/services/kokoro/tts.py b/src/pipecat/services/kokoro/tts.py index 4756d4e74..9e4dc1869 100644 --- a/src/pipecat/services/kokoro/tts.py +++ b/src/pipecat/services/kokoro/tts.py @@ -7,9 +7,9 @@ """Kokoro TTS service implementation using kokoro-onnx.""" import os +from collections.abc import AsyncGenerator from dataclasses import dataclass from pathlib import Path -from typing import AsyncGenerator, Optional import numpy as np from loguru import logger @@ -119,11 +119,11 @@ class KokoroTTSService(TTSService): def __init__( self, *, - voice_id: Optional[str] = None, - model_path: Optional[str] = None, - voices_path: Optional[str] = None, - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, + voice_id: str | None = None, + model_path: str | None = None, + voices_path: str | None = None, + params: InputParams | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize the Kokoro TTS service. diff --git a/src/pipecat/services/llm_service.py b/src/pipecat/services/llm_service.py index 036818370..979d5c430 100644 --- a/src/pipecat/services/llm_service.py +++ b/src/pipecat/services/llm_service.py @@ -6,22 +6,17 @@ """Base classes for Large Language Model services with function calling support.""" +from __future__ import annotations + import asyncio import json import uuid import warnings +from collections.abc import Awaitable, Callable, Mapping, Sequence from dataclasses import dataclass from typing import ( Any, - Awaitable, - Callable, - Dict, - List, - Mapping, - Optional, Protocol, - Sequence, - Type, ) from loguru import logger @@ -86,7 +81,7 @@ class FunctionCallResultCallback(Protocol): """ async def __call__( - self, result: Any, *, properties: Optional[FunctionCallResultProperties] = None + self, result: Any, *, properties: FunctionCallResultProperties | None = None ) -> None: """Call the result callback. @@ -117,7 +112,7 @@ class FunctionCallParams: function_name: str tool_call_id: str arguments: Mapping[str, Any] - llm: "LLMService" + llm: LLMService context: LLMContext result_callback: FunctionCallResultCallback @@ -139,10 +134,10 @@ class FunctionCallRegistryItem: ``function_call_timeout_secs`` for this specific function. """ - function_name: Optional[str] - handler: FunctionCallHandler | "DirectFunctionWrapper" + function_name: str | None + handler: FunctionCallHandler | DirectFunctionWrapper cancel_on_interruption: bool - timeout_secs: Optional[float] = None + timeout_secs: float | None = None @dataclass @@ -168,8 +163,8 @@ class FunctionCallRunnerItem: tool_call_id: str arguments: Mapping[str, Any] context: LLMContext - run_llm: Optional[bool] = None - group_id: Optional[str] = None + run_llm: bool | None = None + group_id: str | None = None class LLMService(UserTurnCompletionLLMServiceMixin, AIService): @@ -207,15 +202,15 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): # OpenAILLMAdapter is used as the default adapter since it aligns with most LLM implementations. # However, subclasses should override this with a more specific adapter when necessary. - adapter_class: Type[BaseLLMAdapter] = OpenAILLMAdapter + adapter_class: type[BaseLLMAdapter] = OpenAILLMAdapter def __init__( self, run_in_parallel: bool = True, group_parallel_tools: bool = True, - function_call_timeout_secs: Optional[float] = None, + function_call_timeout_secs: float | None = None, enable_async_tool_cancellation: bool = False, - settings: Optional[LLMSettings] = None, + settings: LLMSettings | None = None, **kwargs, ): """Initialize the LLM service. @@ -250,13 +245,13 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): self._enable_async_tool_cancellation: bool = enable_async_tool_cancellation self._filter_incomplete_user_turns: bool = False self._async_tool_cancellation_enabled: bool = False - self._base_system_instruction: Optional[str] = None + self._base_system_instruction: str | None = None self._adapter = self.adapter_class() - self._functions: Dict[Optional[str], FunctionCallRegistryItem] = {} - self._function_call_tasks: Dict[Optional[asyncio.Task], FunctionCallRunnerItem] = {} - self._sequential_runner_task: Optional[asyncio.Task] = None - self._skip_tts: Optional[bool] = None - self._summary_task: Optional[asyncio.Task] = None + self._functions: dict[str | None, FunctionCallRegistryItem] = {} + self._function_call_tasks: dict[asyncio.Task | None, FunctionCallRunnerItem] = {} + self._sequential_runner_task: asyncio.Task | None = None + self._skip_tts: bool | None = None + self._summary_task: asyncio.Task | None = None self._register_event_handler("on_function_calls_started") self._register_event_handler("on_function_calls_cancelled") @@ -284,9 +279,9 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): async def run_inference( self, context: LLMContext, - max_tokens: Optional[int] = None, - system_instruction: Optional[str] = None, - ) -> Optional[str]: + max_tokens: int | None = None, + system_instruction: str | None = None, + ) -> str | None: """Run a one-shot, out-of-band (i.e. out-of-pipeline) inference with the given LLM context. Must be implemented by subclasses. @@ -495,7 +490,7 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): self._generate_summary(frame), timeout=timeout, ) - except asyncio.TimeoutError: + except TimeoutError: await self.push_error(error_msg=f"Context summarization timed out after {timeout}s") except Exception as e: error = f"Error generating context summary: {e}" @@ -580,11 +575,11 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): def register_function( self, - function_name: Optional[str], + function_name: str | None, handler: Any, *, cancel_on_interruption: bool = True, - timeout_secs: Optional[float] = None, + timeout_secs: float | None = None, ): """Register a function handler for LLM function calls. @@ -621,7 +616,7 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): handler: DirectFunction, *, cancel_on_interruption: bool = True, - timeout_secs: Optional[float] = None, + timeout_secs: float | None = None, ): """Register a direct function handler for LLM function calls. @@ -653,7 +648,7 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): timeout_secs=timeout_secs, ) - def unregister_function(self, function_name: Optional[str]): + def unregister_function(self, function_name: str | None): """Remove a registered function handler. Args: @@ -806,12 +801,12 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): group_id=runner_item.group_id, ) - timeout_task: Optional[asyncio.Task] = None + timeout_task: asyncio.Task | None = None # Single callback for both intermediate updates and final results. # Pass properties=FunctionCallResultProperties(is_final=False) for updates. async def function_call_result_callback( - result: Any, *, properties: Optional[FunctionCallResultProperties] = None + result: Any, *, properties: FunctionCallResultProperties | None = None ): is_final = properties.is_final if properties else True if not is_final and item.cancel_on_interruption: @@ -940,7 +935,7 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): self._functions.pop(CANCEL_ASYNC_TOOL_NAME, None) self._compose_system_instruction() - async def _cancel_async_tool_call_handler(self, params: "FunctionCallParams"): + async def _cancel_async_tool_call_handler(self, params: FunctionCallParams): """Handle a ``cancel_async_tool_call`` invocation from the LLM. Args: @@ -948,7 +943,7 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): """ logger.debug(f"{self}: cancel_async_tool_call invoked") - tool_call_id: Optional[str] = params.arguments.get("tool_call_id") + tool_call_id: str | None = params.arguments.get("tool_call_id") if not tool_call_id: logger.warning(f"{self} cancel_async_tool_call called with no tool_call_id") await params.result_callback({"cancelled": None}) @@ -1003,7 +998,7 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): if cancelled_items: await self._call_event_handler("on_function_calls_cancelled", cancelled_items) - async def _cancel_function_call(self, function_name: Optional[str]): + async def _cancel_function_call(self, function_name: str | None): cancelled_tasks = set() cancelled_items = [] for task, runner_item in self._function_call_tasks.items(): diff --git a/src/pipecat/services/lmnt/tts.py b/src/pipecat/services/lmnt/tts.py index 0df70fd19..8daa1902a 100644 --- a/src/pipecat/services/lmnt/tts.py +++ b/src/pipecat/services/lmnt/tts.py @@ -7,8 +7,9 @@ """LMNT text-to-speech service implementation.""" import json +from collections.abc import AsyncGenerator from dataclasses import dataclass -from typing import Any, AsyncGenerator, Optional +from typing import Any from loguru import logger @@ -36,7 +37,7 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") -def language_to_lmnt_language(language: Language) -> Optional[str]: +def language_to_lmnt_language(language: Language) -> str | None: """Convert a Language enum to LMNT language code. Args: @@ -94,12 +95,12 @@ class LmntTTSService(InterruptibleTTSService): self, *, api_key: str, - voice_id: Optional[str] = None, - sample_rate: Optional[int] = None, + voice_id: str | None = None, + sample_rate: int | None = None, language: Language = Language.EN, output_format: str = "pcm_s16le", - model: Optional[str] = None, - settings: Optional[Settings] = None, + model: str | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize the LMNT TTS service. @@ -173,7 +174,7 @@ class LmntTTSService(InterruptibleTTSService): """ return True - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: """Convert a Language enum to LMNT service language format. Args: @@ -300,7 +301,7 @@ class LmntTTSService(InterruptibleTTSService): return self._websocket raise Exception("Websocket not connected") - async def flush_audio(self, context_id: Optional[str] = None): + async def flush_audio(self, context_id: str | None = None): """Flush any pending audio synthesis.""" if not self._websocket or self._websocket.state is State.CLOSED: return diff --git a/src/pipecat/services/mcp_service.py b/src/pipecat/services/mcp_service.py index 275569714..ee962cb86 100644 --- a/src/pipecat/services/mcp_service.py +++ b/src/pipecat/services/mcp_service.py @@ -7,8 +7,9 @@ """MCP (Model Context Protocol) client for integrating external tools with LLMs.""" import json +from collections.abc import Callable from contextlib import AsyncExitStack -from typing import Any, Callable, Dict, List, Optional, TypeAlias +from typing import Any, TypeAlias from loguru import logger @@ -53,8 +54,8 @@ class MCPClient(BaseObject): def __init__( self, server_params: ServerParameters, - tools_filter: Optional[List[str]] = None, - tools_output_filters: Optional[Dict[str, Callable[[Any], Any]]] = None, + tools_filter: list[str] | None = None, + tools_output_filters: dict[str, Callable[[Any], Any]] | None = None, **kwargs, ): """Initialize the MCP client with server parameters. @@ -70,8 +71,8 @@ class MCPClient(BaseObject): self._server_params = server_params self._tools_filter = tools_filter self._tools_output_filters = tools_output_filters or {} - self._exit_stack: Optional[AsyncExitStack] = None - self._active_session: Optional[ClientSession] = None + self._exit_stack: AsyncExitStack | None = None + self._active_session: ClientSession | None = None if not isinstance( server_params, @@ -195,7 +196,7 @@ class MCPClient(BaseObject): llm.register_function(function_schema.name, self._tool_wrapper) def _convert_mcp_schema_to_pipecat( - self, tool_name: str, tool_schema: Dict[str, Any] + self, tool_name: str, tool_schema: dict[str, Any] ) -> FunctionSchema: """Convert an mcp tool schema to Pipecat's FunctionSchema format. @@ -276,7 +277,7 @@ class MCPClient(BaseObject): async def _list_tools_helper(self, session): available_tools = await session.list_tools() - tool_schemas: List[FunctionSchema] = [] + tool_schemas: list[FunctionSchema] = [] logger.debug(f"Found {len(available_tools.tools)} available tools") diff --git a/src/pipecat/services/mem0/memory.py b/src/pipecat/services/mem0/memory.py index 91396cab4..5359284f5 100644 --- a/src/pipecat/services/mem0/memory.py +++ b/src/pipecat/services/mem0/memory.py @@ -12,7 +12,7 @@ historical information. """ import asyncio -from typing import Any, Dict, List, Optional +from typing import Any from loguru import logger from pydantic import BaseModel, Field @@ -61,13 +61,13 @@ class Mem0MemoryService(FrameProcessor): def __init__( self, *, - api_key: Optional[str] = None, - local_config: Optional[Dict[str, Any]] = None, - user_id: Optional[str] = None, - agent_id: Optional[str] = None, - run_id: Optional[str] = None, - params: Optional[InputParams] = None, - host: Optional[str] = None, + api_key: str | None = None, + local_config: dict[str, Any] | None = None, + user_id: str | None = None, + agent_id: str | None = None, + run_id: str | None = None, + params: InputParams | None = None, + host: str | None = None, ): """Initialize the Mem0 memory service. @@ -109,7 +109,7 @@ class Mem0MemoryService(FrameProcessor): self.last_query = None logger.info(f"Initialized Mem0MemoryService with {user_id=}, {agent_id=}, {run_id=}") - async def get_memories(self) -> List[Dict[str, Any]]: + async def get_memories(self) -> list[dict[str, Any]]: """Retrieve all stored memories for the configured user/agent/run IDs. This is a convenience method for accessing memories outside the pipeline, @@ -148,7 +148,7 @@ class Mem0MemoryService(FrameProcessor): logger.error(f"Error retrieving memories from Mem0: {e}") return [] - async def _store_messages(self, messages: List[Dict[str, Any]]): + async def _store_messages(self, messages: list[dict[str, Any]]): """Store messages in Mem0. Runs the blocking Mem0 API call in a background thread to avoid @@ -174,7 +174,7 @@ class Mem0MemoryService(FrameProcessor): except Exception as e: logger.error(f"Error storing messages in Mem0: {e}") - async def _retrieve_memories(self, query: str) -> List[Dict[str, Any]]: + async def _retrieve_memories(self, query: str) -> list[dict[str, Any]]: """Retrieve relevant memories from Mem0. Runs the blocking Mem0 API call in a background thread to avoid diff --git a/src/pipecat/services/minimax/tts.py b/src/pipecat/services/minimax/tts.py index a197bdcbe..25b7feade 100644 --- a/src/pipecat/services/minimax/tts.py +++ b/src/pipecat/services/minimax/tts.py @@ -11,8 +11,9 @@ for streaming text-to-speech synthesis. """ import json +from collections.abc import AsyncGenerator, Mapping from dataclasses import dataclass, field -from typing import Any, AsyncGenerator, Mapping, Optional, Self +from typing import Any, Self import aiohttp from loguru import logger @@ -30,7 +31,7 @@ from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts -def language_to_minimax_language(language: Language) -> Optional[str]: +def language_to_minimax_language(language: Language) -> str | None: """Convert a Language enum to MiniMax language format. Args: @@ -162,14 +163,14 @@ class MiniMaxHttpTTSService(TTSService): exclude_aggregated_audio: Whether to exclude aggregated audio in final chunk. """ - language: Optional[Language] = Language.EN - speed: Optional[float] = 1.0 - volume: Optional[float] = 1.0 - pitch: Optional[int] = 0 - emotion: Optional[str] = None - text_normalization: Optional[bool] = None - latex_read: Optional[bool] = None - exclude_aggregated_audio: Optional[bool] = None + language: Language | None = Language.EN + speed: float | None = 1.0 + volume: float | None = 1.0 + pitch: int | None = 0 + emotion: str | None = None + text_normalization: bool | None = None + latex_read: bool | None = None + exclude_aggregated_audio: bool | None = None def __init__( self, @@ -177,13 +178,13 @@ class MiniMaxHttpTTSService(TTSService): api_key: str, base_url: str = "https://api.minimax.io/v1/t2a_v2", group_id: str, - model: Optional[str] = None, - voice_id: Optional[str] = None, + model: str | None = None, + voice_id: str | None = None, aiohttp_session: aiohttp.ClientSession, - sample_rate: Optional[int] = None, + sample_rate: int | None = None, stream: bool = True, - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, + params: InputParams | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize the MiniMax TTS service. @@ -312,7 +313,7 @@ class MiniMaxHttpTTSService(TTSService): """ return True - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: """Convert a Language enum to MiniMax service language format. Args: diff --git a/src/pipecat/services/mistral/llm.py b/src/pipecat/services/mistral/llm.py index d280aaada..c1ac8b652 100644 --- a/src/pipecat/services/mistral/llm.py +++ b/src/pipecat/services/mistral/llm.py @@ -6,8 +6,8 @@ """Mistral LLM service implementation using OpenAI-compatible interface.""" +from collections.abc import Sequence from dataclasses import dataclass -from typing import List, Optional, Sequence from loguru import logger from openai.types.chat import ChatCompletionMessageParam @@ -44,8 +44,8 @@ class MistralLLMService(OpenAILLMService): *, api_key: str, base_url: str = "https://api.mistral.ai/v1", - model: Optional[str] = None, - settings: Optional[Settings] = None, + model: str | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize the Mistral LLM service. @@ -93,8 +93,8 @@ class MistralLLMService(OpenAILLMService): return super().create_client(api_key, base_url, **kwargs) def _apply_mistral_fixups( - self, messages: List[ChatCompletionMessageParam] - ) -> List[ChatCompletionMessageParam]: + self, messages: list[ChatCompletionMessageParam] + ) -> list[ChatCompletionMessageParam]: """Apply fixups to messages to meet Mistral-specific requirements. 1. A "tool"-role message must be followed by an assistant message. diff --git a/src/pipecat/services/mistral/stt.py b/src/pipecat/services/mistral/stt.py index c41768d15..3200bc76e 100644 --- a/src/pipecat/services/mistral/stt.py +++ b/src/pipecat/services/mistral/stt.py @@ -10,8 +10,9 @@ This module provides a real-time STT service that integrates with Mistral's Voxtral Realtime transcription API using the Mistral SDK's RealtimeConnection. """ +from collections.abc import AsyncGenerator from dataclasses import dataclass -from typing import Any, AsyncGenerator, Optional +from typing import Any from loguru import logger @@ -88,12 +89,12 @@ class MistralSTTService(STTService): def __init__( self, *, - api_key: Optional[str] = None, - base_url: Optional[str] = None, - sample_rate: Optional[int] = None, - target_streaming_delay_ms: Optional[int] = None, - ttfs_p99_latency: Optional[float] = MISTRAL_TTFS_P99, - settings: Optional[Settings] = None, + api_key: str | None = None, + base_url: str | None = None, + sample_rate: int | None = None, + target_streaming_delay_ms: int | None = None, + ttfs_p99_latency: float | None = MISTRAL_TTFS_P99, + settings: Settings | None = None, **kwargs, ): """Initialize Mistral STT service. @@ -128,10 +129,10 @@ class MistralSTTService(STTService): self._client = Mistral(api_key=api_key, server_url=base_url) self._target_streaming_delay_ms = target_streaming_delay_ms - self._connection: Optional[RealtimeConnection] = None + self._connection: RealtimeConnection | None = None self._receive_task = None self._accumulated_text = "" - self._detected_language: Optional[str] = None + self._detected_language: str | None = None def can_generate_metrics(self) -> bool: """Check if the service can generate processing metrics. @@ -292,7 +293,7 @@ class MistralSTTService(STTService): @traced_stt async def _handle_transcription( - self, transcript: str, is_final: bool, language: Optional[str] = None + self, transcript: str, is_final: bool, language: str | None = None ): """Handle a transcription result with tracing.""" pass diff --git a/src/pipecat/services/mistral/tts.py b/src/pipecat/services/mistral/tts.py index d00b98a95..6e660e6e2 100644 --- a/src/pipecat/services/mistral/tts.py +++ b/src/pipecat/services/mistral/tts.py @@ -12,8 +12,8 @@ generating speech from text input using HTTP streaming with Server-Sent Events. import base64 import struct +from collections.abc import AsyncGenerator from dataclasses import dataclass -from typing import AsyncGenerator, Optional from loguru import logger @@ -63,9 +63,9 @@ class MistralTTSService(TTSService): def __init__( self, *, - api_key: Optional[str] = None, - sample_rate: Optional[int] = None, - settings: Optional[Settings] = None, + api_key: str | None = None, + sample_rate: int | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize Mistral TTS service. diff --git a/src/pipecat/services/moondream/vision.py b/src/pipecat/services/moondream/vision.py index 6eeff19cd..70cbe89f5 100644 --- a/src/pipecat/services/moondream/vision.py +++ b/src/pipecat/services/moondream/vision.py @@ -11,8 +11,8 @@ for image analysis and description generation. """ import asyncio +from collections.abc import AsyncGenerator from dataclasses import dataclass -from typing import AsyncGenerator, Optional from loguru import logger from PIL import Image @@ -85,10 +85,10 @@ class MoondreamService(VisionService): def __init__( self, *, - model: Optional[str] = None, + model: str | None = None, revision="2025-01-09", use_cpu=False, - settings: Optional[Settings] = None, + settings: Settings | None = None, **kwargs, ): """Initialize the Moondream service. @@ -149,7 +149,7 @@ class MoondreamService(VisionService): logger.debug(f"Analyzing image (bytes length: {len(frame.image)})") - def get_image_description(image_bytes: bytes, text: Optional[str]) -> str: + def get_image_description(image_bytes: bytes, text: str | None) -> str: image = Image.frombytes(frame.format, frame.size, image_bytes) image_embeds = self._model.encode_image(image) description = self._model.query(image_embeds, text)["answer"] diff --git a/src/pipecat/services/nebius/llm.py b/src/pipecat/services/nebius/llm.py index aa26e776f..f2473b719 100644 --- a/src/pipecat/services/nebius/llm.py +++ b/src/pipecat/services/nebius/llm.py @@ -7,7 +7,6 @@ """Nebius LLM service implementation using OpenAI-compatible interface.""" from dataclasses import dataclass -from typing import Optional from loguru import logger @@ -41,7 +40,7 @@ class NebiusLLMService(OpenAILLMService): *, api_key: str, base_url: str = "https://api.tokenfactory.nebius.com/v1/", - settings: Optional[Settings] = None, + settings: Settings | None = None, **kwargs, ): """Initialize the Nebius LLM service. diff --git a/src/pipecat/services/neuphonic/tts.py b/src/pipecat/services/neuphonic/tts.py index 927a8b7b2..5ee19fb3b 100644 --- a/src/pipecat/services/neuphonic/tts.py +++ b/src/pipecat/services/neuphonic/tts.py @@ -13,8 +13,9 @@ text-to-speech API for real-time audio synthesis. import asyncio import base64 import json +from collections.abc import AsyncGenerator from dataclasses import dataclass, field -from typing import Any, AsyncGenerator, Optional +from typing import Any import aiohttp from loguru import logger @@ -43,7 +44,7 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") -def language_to_neuphonic_lang_code(language: Language) -> Optional[str]: +def language_to_neuphonic_lang_code(language: Language) -> str | None: """Convert a Language enum to Neuphonic language code. Args: @@ -101,21 +102,21 @@ class NeuphonicTTSService(InterruptibleTTSService): speed: Speech speed multiplier. Defaults to 1.0. """ - language: Optional[Language] = Language.EN - speed: Optional[float] = 1.0 + language: Language | None = Language.EN + speed: float | None = 1.0 def __init__( self, *, api_key: str, - voice_id: Optional[str] = None, + voice_id: str | None = None, url: str = "wss://api.neuphonic.com", - sample_rate: Optional[int] = 22050, + sample_rate: int | None = 22050, encoding: str = "pcm_linear", - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, - aggregate_sentences: Optional[bool] = None, - text_aggregation_mode: Optional[TextAggregationMode] = None, + params: InputParams | None = None, + settings: Settings | None = None, + aggregate_sentences: bool | None = None, + text_aggregation_mode: TextAggregationMode | None = None, **kwargs, ): """Initialize the Neuphonic TTS service. @@ -197,7 +198,7 @@ class NeuphonicTTSService(InterruptibleTTSService): """ return True - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: """Convert a Language enum to Neuphonic service language format. Args: @@ -244,7 +245,7 @@ class NeuphonicTTSService(InterruptibleTTSService): await super().cancel(frame) await self._disconnect() - async def flush_audio(self, context_id: Optional[str] = None): + async def flush_audio(self, context_id: str | None = None): """Flush any pending audio synthesis by sending stop command.""" if self._websocket: msg = {"text": ""} @@ -417,20 +418,20 @@ class NeuphonicHttpTTSService(TTSService): speed: Speech speed multiplier. Defaults to 1.0. """ - language: Optional[Language] = Language.EN - speed: Optional[float] = 1.0 + language: Language | None = Language.EN + speed: float | None = 1.0 def __init__( self, *, api_key: str, - voice_id: Optional[str] = None, + voice_id: str | None = None, aiohttp_session: aiohttp.ClientSession, url: str = "https://api.neuphonic.com", - sample_rate: Optional[int] = 22050, - encoding: Optional[str] = "pcm_linear", - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, + sample_rate: int | None = 22050, + encoding: str | None = "pcm_linear", + params: InputParams | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize the Neuphonic HTTP TTS service. @@ -502,7 +503,7 @@ class NeuphonicHttpTTSService(TTSService): """ return True - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: """Convert a Language enum to Neuphonic service language format. Args: @@ -521,7 +522,7 @@ class NeuphonicHttpTTSService(TTSService): """ await super().start(frame) - async def flush_audio(self, context_id: Optional[str] = None): + async def flush_audio(self, context_id: str | None = None): """Flush any pending audio synthesis. Note: diff --git a/src/pipecat/services/novita/llm.py b/src/pipecat/services/novita/llm.py index dbe6ff23e..e28b563da 100644 --- a/src/pipecat/services/novita/llm.py +++ b/src/pipecat/services/novita/llm.py @@ -7,7 +7,6 @@ """Novita AI LLM service implementation using OpenAI-compatible interface.""" from dataclasses import dataclass -from typing import Optional from loguru import logger @@ -37,7 +36,7 @@ class NovitaLLMService(OpenAILLMService): *, api_key: str, base_url: str = "https://api.novita.ai/openai", - settings: Optional[Settings] = None, + settings: Settings | None = None, **kwargs, ): """Initialize Novita AI LLM service. diff --git a/src/pipecat/services/nvidia/llm.py b/src/pipecat/services/nvidia/llm.py index a06dfd4da..28b635a62 100644 --- a/src/pipecat/services/nvidia/llm.py +++ b/src/pipecat/services/nvidia/llm.py @@ -11,7 +11,6 @@ Microservice) API while maintaining compatibility with the OpenAI-style interfac """ from dataclasses import dataclass -from typing import Optional from pipecat.metrics.metrics import LLMTokenUsage from pipecat.processors.aggregators.llm_context import LLMContext @@ -42,8 +41,8 @@ class NvidiaLLMService(OpenAILLMService): *, api_key: str, base_url: str = "https://integrate.api.nvidia.com/v1", - model: Optional[str] = None, - settings: Optional[Settings] = None, + model: str | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize the NvidiaLLMService. diff --git a/src/pipecat/services/nvidia/stt.py b/src/pipecat/services/nvidia/stt.py index e8ef3dc08..3181b53ba 100644 --- a/src/pipecat/services/nvidia/stt.py +++ b/src/pipecat/services/nvidia/stt.py @@ -7,9 +7,10 @@ """NVIDIA Riva Speech-to-Text service implementations for real-time and batch transcription.""" import asyncio +from collections.abc import AsyncGenerator, Mapping from concurrent.futures import CancelledError as FuturesCancelledError from dataclasses import dataclass, field -from typing import Any, AsyncGenerator, List, Mapping, Optional +from typing import Any from loguru import logger from pydantic import BaseModel @@ -39,7 +40,7 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") -def language_to_nvidia_riva_language(language: Language) -> Optional[str]: +def language_to_nvidia_riva_language(language: Language) -> str | None: """Maps Language enum to NVIDIA Riva ASR language codes. Source: @@ -113,7 +114,7 @@ class NvidiaSegmentedSTTSettings(STTSettings): profanity_filter: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) automatic_punctuation: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) verbatim_transcripts: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - boosted_lm_words: List[str] | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + boosted_lm_words: list[str] | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) boosted_lm_score: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) @@ -138,7 +139,7 @@ class NvidiaSTTService(STTService): language: Target language for transcription. Defaults to EN_US. """ - language: Optional[Language] = Language.EN_US + language: Language | None = Language.EN_US def __init__( self, @@ -149,11 +150,11 @@ class NvidiaSTTService(STTService): "function_id": "1598d209-5e27-4d3c-8079-4751568b1081", "model_name": "parakeet-ctc-1.1b-asr", }, - sample_rate: Optional[int] = None, - params: Optional[InputParams] = None, + sample_rate: int | None = None, + params: InputParams | None = None, use_ssl: bool = True, - settings: Optional[Settings] = None, - ttfs_p99_latency: Optional[float] = NVIDIA_TTFS_P99, + settings: Settings | None = None, + ttfs_p99_latency: float | None = NVIDIA_TTFS_P99, **kwargs, ): """Initialize the NVIDIA Riva STT service. @@ -355,7 +356,7 @@ class NvidiaSTTService(STTService): @traced_stt async def _handle_transcription( - self, transcript: str, is_final: bool, language: Optional[Language] = None + self, transcript: str, is_final: bool, language: Language | None = None ): """Handle a transcription result with tracing.""" pass @@ -460,11 +461,11 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): boosted_lm_score: Score boost for specified words. """ - language: Optional[Language] = Language.EN_US + language: Language | None = Language.EN_US profanity_filter: bool = False automatic_punctuation: bool = True verbatim_transcripts: bool = False - boosted_lm_words: Optional[List[str]] = None + boosted_lm_words: list[str] | None = None boosted_lm_score: float = 4.0 def __init__( @@ -476,11 +477,11 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): "function_id": "ee8dc628-76de-4acc-8595-1836e7e857bd", "model_name": "canary-1b-asr", }, - sample_rate: Optional[int] = None, - params: Optional[InputParams] = None, + sample_rate: int | None = None, + params: InputParams | None = None, use_ssl: bool = True, - settings: Optional[Settings] = None, - ttfs_p99_latency: Optional[float] = NVIDIA_TTFS_P99, + settings: Settings | None = None, + ttfs_p99_latency: float | None = NVIDIA_TTFS_P99, **kwargs, ): """Initialize the NVIDIA Riva segmented STT service. @@ -555,7 +556,7 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): self._config = None self._asr_service = None - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: """Convert pipecat Language enum to NVIDIA Riva's language code. Args: @@ -655,7 +656,7 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): @traced_stt async def _handle_transcription( - self, transcript: str, is_final: bool, language: Optional[Language] = None + self, transcript: str, is_final: bool, language: Language | None = None ): """Handle a transcription result with tracing.""" pass diff --git a/src/pipecat/services/nvidia/tts.py b/src/pipecat/services/nvidia/tts.py index daa4e0bd7..0edc0c5f0 100644 --- a/src/pipecat/services/nvidia/tts.py +++ b/src/pipecat/services/nvidia/tts.py @@ -20,8 +20,9 @@ import os import queue import textwrap import threading +from collections.abc import AsyncGenerator, Mapping from dataclasses import dataclass, field -from typing import Any, AsyncGenerator, Mapping, Optional +from typing import Any from pipecat.utils.tracing.service_decorators import traced_tts @@ -77,8 +78,8 @@ class _SynthesisStreamState: response_queue: asyncio.Queue stop_event: threading.Event rpc_call: Any = None - synth_task: Optional[asyncio.Task] = None - response_task: Optional[asyncio.Task] = None + synth_task: asyncio.Task | None = None + response_task: asyncio.Task | None = None class NvidiaTTSService(TTSService): @@ -104,25 +105,25 @@ class NvidiaTTSService(TTSService): quality: Audio quality setting (0-100). Defaults to 20. """ - language: Optional[Language] = Language.EN_US - quality: Optional[int] = 20 + language: Language | None = Language.EN_US + quality: int | None = 20 def __init__( self, *, - api_key: Optional[str] = None, + api_key: str | None = None, server: str = "grpc.nvcf.nvidia.com:443", - voice_id: Optional[str] = None, - sample_rate: Optional[int] = None, + voice_id: str | None = None, + sample_rate: int | None = None, model_function_map: Mapping[str, str] = { "function_id": "877104f7-e885-42b9-8de8-f6e4c6303969", "model_name": "magpie-tts-multilingual", }, - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, + params: InputParams | None = None, + settings: Settings | None = None, use_ssl: bool = True, - custom_dictionary: Optional[dict] = None, - encoding: Optional[AudioEncoding] = AudioEncoding.LINEAR_PCM, + custom_dictionary: dict | None = None, + encoding: AudioEncoding | None = AudioEncoding.LINEAR_PCM, **kwargs, ): """Initialize the NVIDIA Nemotron Speech TTS service. @@ -195,7 +196,7 @@ class NvidiaTTSService(TTSService): self._function_id = model_function_map.get("function_id") self._use_ssl = use_ssl - self._custom_dictionary: Optional[str] = None + self._custom_dictionary: str | None = None if custom_dictionary: entries = [f"{k} {v}" for k, v in custom_dictionary.items()] self._custom_dictionary = ",".join(entries) @@ -205,7 +206,7 @@ class NvidiaTTSService(TTSService): self._config = None # Runtime state for the active streaming turn. - self._stream_state: Optional[_SynthesisStreamState] = None + self._stream_state: _SynthesisStreamState | None = None def can_generate_metrics(self) -> bool: """Check if this service can generate metrics. @@ -478,7 +479,7 @@ class NvidiaTTSService(TTSService): if self._stream_state is state: self._stream_state = None - async def flush_audio(self, context_id: Optional[str] = None): + async def flush_audio(self, context_id: str | None = None): """Flush any pending audio and finalize the current context. Args: diff --git a/src/pipecat/services/ollama/llm.py b/src/pipecat/services/ollama/llm.py index 89488787e..12967272b 100644 --- a/src/pipecat/services/ollama/llm.py +++ b/src/pipecat/services/ollama/llm.py @@ -7,7 +7,6 @@ """OLLama LLM service implementation for Pipecat AI framework.""" from dataclasses import dataclass -from typing import Optional from loguru import logger @@ -40,9 +39,9 @@ class OLLamaLLMService(OpenAILLMService): def __init__( self, *, - model: Optional[str] = None, + model: str | None = None, base_url: str = "http://localhost:11434/v1", - settings: Optional[Settings] = None, + settings: Settings | None = None, **kwargs, ): """Initialize OLLama LLM service. diff --git a/src/pipecat/services/openai/base_llm.py b/src/pipecat/services/openai/base_llm.py index f8b131511..2933c528d 100644 --- a/src/pipecat/services/openai/base_llm.py +++ b/src/pipecat/services/openai/base_llm.py @@ -8,9 +8,10 @@ import asyncio import json +from collections.abc import Mapping from contextlib import asynccontextmanager from dataclasses import dataclass, field -from typing import Any, Dict, Mapping, Optional +from typing import Any import httpx from loguru import logger @@ -93,37 +94,33 @@ class BaseOpenAILLMService(LLMService): extra: Additional model-specific parameters. """ - frequency_penalty: Optional[float] = Field( - default_factory=lambda: NOT_GIVEN, ge=-2.0, le=2.0 - ) - presence_penalty: Optional[float] = Field( - default_factory=lambda: NOT_GIVEN, ge=-2.0, le=2.0 - ) - seed: Optional[int] = Field(default_factory=lambda: NOT_GIVEN, ge=0) - temperature: Optional[float] = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=2.0) + frequency_penalty: float | None = Field(default_factory=lambda: NOT_GIVEN, ge=-2.0, le=2.0) + presence_penalty: float | None = Field(default_factory=lambda: NOT_GIVEN, ge=-2.0, le=2.0) + seed: int | None = Field(default_factory=lambda: NOT_GIVEN, ge=0) + temperature: float | None = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=2.0) # Note: top_k is currently not supported by the OpenAI client library, # so top_k is ignored right now. - top_k: Optional[int] = Field(default=None, ge=0) - top_p: Optional[float] = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=1.0) - max_tokens: Optional[int] = Field(default_factory=lambda: NOT_GIVEN, ge=1) - max_completion_tokens: Optional[int] = Field(default_factory=lambda: NOT_GIVEN, ge=1) - service_tier: Optional[str] = Field(default_factory=lambda: NOT_GIVEN) - extra: Optional[Dict[str, Any]] = Field(default_factory=dict) + top_k: int | None = Field(default=None, ge=0) + top_p: float | None = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=1.0) + max_tokens: int | None = Field(default_factory=lambda: NOT_GIVEN, ge=1) + max_completion_tokens: int | None = Field(default_factory=lambda: NOT_GIVEN, ge=1) + service_tier: str | None = Field(default_factory=lambda: NOT_GIVEN) + extra: dict[str, Any] | None = Field(default_factory=dict) def __init__( self, *, - model: Optional[str] = None, + model: str | None = None, api_key=None, base_url=None, organization=None, project=None, - default_headers: Optional[Mapping[str, str]] = None, - service_tier: Optional[str] = None, - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, - retry_timeout_secs: Optional[float] = 5.0, - retry_on_timeout: Optional[bool] = False, + default_headers: Mapping[str, str] | None = None, + service_tier: str | None = None, + params: InputParams | None = None, + settings: Settings | None = None, + retry_timeout_secs: float | None = 5.0, + retry_on_timeout: bool | None = False, **kwargs, ): """Initialize the BaseOpenAILLMService. @@ -296,7 +293,7 @@ class BaseOpenAILLMService(LLMService): self._client.chat.completions.create(**params), timeout=self._retry_timeout_secs ) return chunks - except (APITimeoutError, asyncio.TimeoutError): + except (TimeoutError, APITimeoutError): # Retry, this time without a timeout so we get a response logger.debug(f"{self}: Retrying chat completion due to timeout") chunks = await self._client.chat.completions.create(**params) @@ -342,9 +339,9 @@ class BaseOpenAILLMService(LLMService): async def run_inference( self, context: LLMContext, - max_tokens: Optional[int] = None, - system_instruction: Optional[str] = None, - ) -> Optional[str]: + max_tokens: int | None = None, + system_instruction: str | None = None, + ) -> str | None: """Run a one-shot, out-of-band (i.e. out-of-pipeline) inference with the given LLM context. Args: diff --git a/src/pipecat/services/openai/image.py b/src/pipecat/services/openai/image.py index de010247b..eb3cd429d 100644 --- a/src/pipecat/services/openai/image.py +++ b/src/pipecat/services/openai/image.py @@ -11,8 +11,9 @@ for creating images from text prompts. """ import io +from collections.abc import AsyncGenerator from dataclasses import dataclass, field -from typing import AsyncGenerator, Literal, Optional +from typing import Literal import aiohttp from loguru import logger @@ -55,13 +56,12 @@ class OpenAIImageGenService(ImageGenService): self, *, api_key: str, - base_url: Optional[str] = None, + base_url: str | None = None, aiohttp_session: aiohttp.ClientSession, - image_size: Optional[ - Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"] - ] = None, - model: Optional[str] = None, - settings: Optional[Settings] = None, + image_size: Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"] + | None = None, + model: str | None = None, + settings: Settings | None = None, ): """Initialize the OpenAI image generation service. diff --git a/src/pipecat/services/openai/llm.py b/src/pipecat/services/openai/llm.py index 7f5bb7ca3..21894b93c 100644 --- a/src/pipecat/services/openai/llm.py +++ b/src/pipecat/services/openai/llm.py @@ -6,8 +6,6 @@ """OpenAI LLM service implementation with context aggregators.""" -from typing import Optional - from openai import NOT_GIVEN from pipecat.services.openai.base_llm import BaseOpenAILLMService @@ -26,10 +24,10 @@ class OpenAILLMService(BaseOpenAILLMService): def __init__( self, *, - model: Optional[str] = None, - service_tier: Optional[str] = None, - params: Optional[BaseOpenAILLMService.InputParams] = None, - settings: Optional[Settings] = None, + model: str | None = None, + service_tier: str | None = None, + params: BaseOpenAILLMService.InputParams | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize OpenAI LLM service. diff --git a/src/pipecat/services/openai/realtime/events.py b/src/pipecat/services/openai/realtime/events.py index 0aa1355e6..57b53c565 100644 --- a/src/pipecat/services/openai/realtime/events.py +++ b/src/pipecat/services/openai/realtime/events.py @@ -8,7 +8,7 @@ import json import uuid -from typing import Any, Dict, List, Literal, Optional, Union +from typing import Any, Literal from pydantic import BaseModel, ConfigDict, Field @@ -61,14 +61,14 @@ class InputAudioTranscription(BaseModel): """Configuration for audio transcription settings.""" model: str = "gpt-4o-transcribe" - language: Optional[str] - prompt: Optional[str] + language: str | None + prompt: str | None def __init__( self, - model: Optional[str] = "gpt-4o-transcribe", - language: Optional[str] = None, - prompt: Optional[str] = None, + model: str | None = "gpt-4o-transcribe", + language: str | None = None, + prompt: str | None = None, ): """Initialize InputAudioTranscription. @@ -90,10 +90,10 @@ class TurnDetection(BaseModel): silence_duration_ms: Silence duration to detect speech end in milliseconds. Defaults to 500. """ - type: Optional[Literal["server_vad"]] = "server_vad" - threshold: Optional[float] = 0.5 - prefix_padding_ms: Optional[int] = 300 - silence_duration_ms: Optional[int] = 500 + type: Literal["server_vad"] | None = "server_vad" + threshold: float | None = 0.5 + prefix_padding_ms: int | None = 300 + silence_duration_ms: int | None = 500 class SemanticTurnDetection(BaseModel): @@ -106,10 +106,10 @@ class SemanticTurnDetection(BaseModel): interrupt_response: Whether to interrupt ongoing responses on turn detection. """ - type: Optional[Literal["semantic_vad"]] = "semantic_vad" - eagerness: Optional[Literal["low", "medium", "high", "auto"]] = None - create_response: Optional[bool] = None - interrupt_response: Optional[bool] = None + type: Literal["semantic_vad"] | None = "semantic_vad" + eagerness: Literal["low", "medium", "high", "auto"] | None = None + create_response: bool | None = None + interrupt_response: bool | None = None class InputAudioNoiseReduction(BaseModel): @@ -119,7 +119,7 @@ class InputAudioNoiseReduction(BaseModel): type: Noise reduction type for different microphone scenarios. """ - type: Optional[Literal["near_field", "far_field"]] + type: Literal["near_field", "far_field"] | None class AudioInput(BaseModel): @@ -132,10 +132,10 @@ class AudioInput(BaseModel): turn_detection: Configuration for turn detection, or False to disable. """ - format: Optional[Union[PCMAudioFormat, PCMUAudioFormat, PCMAAudioFormat]] = None - transcription: Optional[InputAudioTranscription] = None - noise_reduction: Optional[InputAudioNoiseReduction] = None - turn_detection: Optional[Union[TurnDetection, SemanticTurnDetection, bool]] = None + format: PCMAudioFormat | PCMUAudioFormat | PCMAAudioFormat | None = None + transcription: InputAudioTranscription | None = None + noise_reduction: InputAudioNoiseReduction | None = None + turn_detection: TurnDetection | SemanticTurnDetection | bool | None = None class AudioOutput(BaseModel): @@ -147,9 +147,9 @@ class AudioOutput(BaseModel): speed: The speed of the model's spoken response. """ - format: Optional[Union[PCMAudioFormat, PCMUAudioFormat, PCMAAudioFormat]] = None - voice: Optional[str] = None - speed: Optional[float] = None + format: PCMAudioFormat | PCMUAudioFormat | PCMAAudioFormat | None = None + voice: str | None = None + speed: float | None = None class AudioConfiguration(BaseModel): @@ -160,8 +160,8 @@ class AudioConfiguration(BaseModel): output: Configuration for output audio. """ - input: Optional[AudioInput] = None - output: Optional[AudioOutput] = None + input: AudioInput | None = None + output: AudioOutput | None = None class SessionProperties(BaseModel): @@ -189,23 +189,23 @@ class SessionProperties(BaseModel): # Needed to support ToolSchema in tools field. model_config = ConfigDict(arbitrary_types_allowed=True) - type: Optional[Literal["realtime"]] = "realtime" - object: Optional[Literal["realtime.session"]] = None - id: Optional[str] = None - model: Optional[str] = None - output_modalities: Optional[List[Literal["text", "audio"]]] = None - instructions: Optional[str] = None - audio: Optional[AudioConfiguration] = None + type: Literal["realtime"] | None = "realtime" + object: Literal["realtime.session"] | None = None + id: str | None = None + model: str | None = None + output_modalities: list[Literal["text", "audio"]] | None = None + instructions: str | None = None + audio: AudioConfiguration | None = None # Tools can only be ToolsSchema when provided by the user, in either the # OpenAIRealtimeLLMService constructor or through LLMUpdateSettingsFrame. # We'll never serialize/deserialize ToolsSchema when talking to the server. - tools: Optional[ToolsSchema | List[Dict]] = None - tool_choice: Optional[Literal["auto", "none", "required"]] = None - max_output_tokens: Optional[Union[int, Literal["inf"]]] = None - tracing: Optional[Union[Literal["auto"], Dict]] = None - prompt: Optional[Dict] = None - expires_at: Optional[int] = None - include: Optional[List[str]] = None + tools: ToolsSchema | list[dict] | None = None + tool_choice: Literal["auto", "none", "required"] | None = None + max_output_tokens: int | Literal["inf"] | None = None + tracing: Literal["auto"] | dict | None = None + prompt: dict | None = None + expires_at: int | None = None + include: list[str] | None = None # @@ -228,11 +228,11 @@ class ItemContent(BaseModel): type: Literal[ "text", "audio", "input_text", "input_audio", "input_image", "output_text", "output_audio" ] - text: Optional[str] = None - audio: Optional[str] = None # base64-encoded audio - transcript: Optional[str] = None - image_url: Optional[str] = None # base64-encoded image as data URI - detail: Optional[Literal["auto", "low", "high"]] = None + text: str | None = None + audio: str | None = None # base64-encoded audio + transcript: str | None = None + image_url: str | None = None # base64-encoded image as data URI + detail: Literal["auto", "low", "high"] | None = None class ConversationItem(BaseModel): @@ -252,17 +252,17 @@ class ConversationItem(BaseModel): """ id: str = Field(default_factory=lambda: str(uuid.uuid4().hex)) - object: Optional[Literal["realtime.item"]] = None + object: Literal["realtime.item"] | None = None type: Literal["message", "function_call", "function_call_output"] - status: Optional[Literal["completed", "in_progress", "incomplete"]] = None + status: Literal["completed", "in_progress", "incomplete"] | None = None # role and content are present for message items - role: Optional[Literal["user", "assistant", "system"]] = None - content: Optional[List[ItemContent]] = None + role: Literal["user", "assistant", "system"] | None = None + content: list[ItemContent] | None = None # these four fields are present for function_call items - call_id: Optional[str] = None - name: Optional[str] = None - arguments: Optional[str] = None - output: Optional[str] = None + call_id: str | None = None + name: str | None = None + arguments: str | None = None + output: str | None = None class RealtimeConversation(BaseModel): @@ -290,13 +290,13 @@ class ResponseProperties(BaseModel): max_output_tokens: Maximum tokens for this response. """ - output_modalities: Optional[List[Literal["text", "audio"]]] = ["audio"] - instructions: Optional[str] = None - audio: Optional[AudioConfiguration] = None - tools: Optional[List[Dict]] = None - tool_choice: Optional[Literal["auto", "none", "required"]] = None - temperature: Optional[float] = None - max_output_tokens: Optional[Union[int, Literal["inf"]]] = None + output_modalities: list[Literal["text", "audio"]] | None = ["audio"] + instructions: str | None = None + audio: AudioConfiguration | None = None + tools: list[dict] | None = None + tool_choice: Literal["auto", "none", "required"] | None = None + temperature: float | None = None + max_output_tokens: int | Literal["inf"] | None = None # @@ -314,10 +314,10 @@ class RealtimeError(BaseModel): """ type: str - code: Optional[str] = "" + code: str | None = "" message: str - param: Optional[str] = None - event_id: Optional[str] = None + param: str | None = None + event_id: str | None = None # @@ -346,7 +346,7 @@ class SessionUpdateEvent(ClientEvent): type: Literal["session.update"] = "session.update" session: SessionProperties - def model_dump(self, *args, **kwargs) -> Dict[str, Any]: + def model_dump(self, *args, **kwargs) -> dict[str, Any]: """Serialize the event to a dictionary. Handles special serialization for turn_detection where False becomes null. @@ -412,7 +412,7 @@ class ConversationItemCreateEvent(ClientEvent): """ type: Literal["conversation.item.create"] = "conversation.item.create" - previous_item_id: Optional[str] = None + previous_item_id: str | None = None item: ConversationItem @@ -465,7 +465,7 @@ class ResponseCreateEvent(ClientEvent): """ type: Literal["response.create"] = "response.create" - response: Optional[ResponseProperties] = None + response: ResponseProperties | None = None class ResponseCancelEvent(ClientEvent): @@ -543,7 +543,7 @@ class ConversationItemAdded(ServerEvent): """ type: Literal["conversation.item.added"] - previous_item_id: Optional[str] = None + previous_item_id: str | None = None item: ConversationItem @@ -557,7 +557,7 @@ class ConversationItemDone(ServerEvent): """ type: Literal["conversation.item.done"] - previous_item_id: Optional[str] = None + previous_item_id: str | None = None item: ConversationItem @@ -941,7 +941,7 @@ class InputAudioBufferCommitted(ServerEvent): """ type: Literal["input_audio_buffer.committed"] - previous_item_id: Optional[str] = None + previous_item_id: str | None = None item_id: str @@ -976,7 +976,7 @@ class RateLimitsUpdated(ServerEvent): """ type: Literal["rate_limits.updated"] - rate_limits: List[Dict[str, Any]] + rate_limits: list[dict[str, Any]] class CachedTokensDetails(BaseModel): @@ -987,8 +987,8 @@ class CachedTokensDetails(BaseModel): audio_tokens: Number of cached audio tokens. """ - text_tokens: Optional[int] = 0 - audio_tokens: Optional[int] = 0 + text_tokens: int | None = 0 + audio_tokens: int | None = 0 class TokenDetails(BaseModel): @@ -1004,11 +1004,11 @@ class TokenDetails(BaseModel): model_config = ConfigDict(extra="allow") - cached_tokens: Optional[int] = 0 - text_tokens: Optional[int] = 0 - audio_tokens: Optional[int] = 0 - cached_tokens_details: Optional[CachedTokensDetails] = None - image_tokens: Optional[int] = 0 + cached_tokens: int | None = 0 + text_tokens: int | None = 0 + audio_tokens: int | None = 0 + cached_tokens_details: CachedTokensDetails | None = None + image_tokens: int | None = 0 class Usage(BaseModel): @@ -1052,14 +1052,14 @@ class Response(BaseModel): object: Literal["realtime.response"] status: Literal["completed", "in_progress", "incomplete", "cancelled", "failed"] status_details: Any - output: List[ConversationItem] - output_modalities: Optional[List[Literal["text", "audio"]]] = None - max_output_tokens: Optional[Union[int, Literal["inf"]]] = None - audio: Optional[AudioConfiguration] = None - usage: Optional[Usage] = None - voice: Optional[str] = None - temperature: Optional[float] = None - output_audio_format: Optional[str] = None + output: list[ConversationItem] + output_modalities: list[Literal["text", "audio"]] | None = None + max_output_tokens: int | Literal["inf"] | None = None + audio: AudioConfiguration | None = None + usage: Usage | None = None + voice: str | None = None + temperature: float | None = None + output_audio_format: str | None = None _server_event_types = { diff --git a/src/pipecat/services/openai/realtime/llm.py b/src/pipecat/services/openai/realtime/llm.py index 524a0fe73..c34345ae3 100644 --- a/src/pipecat/services/openai/realtime/llm.py +++ b/src/pipecat/services/openai/realtime/llm.py @@ -10,9 +10,10 @@ import base64 import io import json import time +from collections.abc import Mapping from dataclasses import dataclass, field from dataclasses import fields as dataclass_fields -from typing import Any, Dict, Mapping, Optional, Type +from typing import Any from loguru import logger from PIL import Image @@ -117,7 +118,7 @@ class OpenAIRealtimeLLMSettings(LLMSettings): # -- apply_update override ----------------------------------------------- - def apply_update(self, delta: "OpenAIRealtimeLLMService.Settings") -> Dict[str, Any]: + def apply_update(self, delta: "OpenAIRealtimeLLMService.Settings") -> dict[str, Any]: """Merge a delta, keeping ``model``/``system_instruction`` in sync with SP. When the delta contains ``session_properties``, it **replaces** the @@ -155,7 +156,7 @@ class OpenAIRealtimeLLMSettings(LLMSettings): @classmethod def from_mapping( - cls: Type["OpenAIRealtimeLLMService.Settings"], settings: Mapping[str, Any] + cls: type["OpenAIRealtimeLLMService.Settings"], settings: Mapping[str, Any] ) -> "OpenAIRealtimeLLMService.Settings": """Build a delta from a plain dict, routing SP keys into ``session_properties``. @@ -166,9 +167,9 @@ class OpenAIRealtimeLLMSettings(LLMSettings): # Determine which keys belong to our own dataclass fields. own_field_names = {f.name for f in dataclass_fields(cls)} - {"extra"} - top: Dict[str, Any] = {} - sp_dict: Dict[str, Any] = {} - extra: Dict[str, Any] = {} + top: dict[str, Any] = {} + sp_dict: dict[str, Any] = {} + extra: dict[str, Any] = {} # Build the SP field set without instantiating (avoid __post_init__ # cost for every from_mapping call). @@ -210,10 +211,10 @@ class OpenAIRealtimeLLMService(LLMService): self, *, api_key: str, - model: Optional[str] = None, + model: str | None = None, base_url: str = "wss://api.openai.com/v1/realtime", - session_properties: Optional[events.SessionProperties] = None, - settings: Optional[Settings] = None, + session_properties: events.SessionProperties | None = None, + settings: Settings | None = None, start_audio_paused: bool = False, start_video_paused: bool = False, video_frame_detail: str = "auto", @@ -807,7 +808,7 @@ class OpenAIRealtimeLLMService(LLMService): @traced_stt async def _handle_user_transcription( - self, transcript: str, is_final: bool, language: Optional[Language] = None + self, transcript: str, is_final: bool, language: Language | None = None ): """Handle a transcription result with tracing.""" pass diff --git a/src/pipecat/services/openai/responses/llm.py b/src/pipecat/services/openai/responses/llm.py index e1b4ace78..c7959f05a 100644 --- a/src/pipecat/services/openai/responses/llm.py +++ b/src/pipecat/services/openai/responses/llm.py @@ -10,9 +10,10 @@ import asyncio import hashlib import json import os +from collections.abc import Mapping from contextlib import asynccontextmanager from dataclasses import dataclass, field -from typing import Any, Dict, List, Mapping, Optional +from typing import Any import httpx from loguru import logger @@ -124,9 +125,9 @@ class _BaseOpenAIResponsesLLMService(LLMService): base_url=None, organization=None, project=None, - default_headers: Optional[Mapping[str, str]] = None, - service_tier: Optional[str] = None, - settings: Optional[Settings] = None, + default_headers: Mapping[str, str] | None = None, + service_tier: str | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize the OpenAI Responses API LLM service. @@ -227,7 +228,7 @@ class _BaseOpenAIResponsesLLMService(LLMService): Returns: Dictionary of parameters for the Responses API call. """ - params: Dict[str, Any] = { + params: dict[str, Any] = { "model": self._settings.model, "stream": True, # store=False avoids OpenAI-side 30-day conversation storage. @@ -268,9 +269,9 @@ class _BaseOpenAIResponsesLLMService(LLMService): async def run_inference( self, context: LLMContext, - max_tokens: Optional[int] = None, - system_instruction: Optional[str] = None, - ) -> Optional[str]: + max_tokens: int | None = None, + system_instruction: str | None = None, + ) -> str | None: """Run a one-shot, out-of-band inference with the given LLM context. Always uses the HTTP client regardless of transport variant. @@ -304,8 +305,8 @@ class _BaseOpenAIResponsesLLMService(LLMService): def _process_function_calls( self, context: LLMContext, - function_calls: Dict[str, Dict[str, str]], - ) -> List[FunctionCallFromLLM]: + function_calls: dict[str, dict[str, str]], + ) -> list[FunctionCallFromLLM]: """Convert accumulated function call data into FunctionCallFromLLM list. Args: @@ -315,7 +316,7 @@ class _BaseOpenAIResponsesLLMService(LLMService): Returns: List of parsed function call objects. """ - fc_list: List[FunctionCallFromLLM] = [] + fc_list: list[FunctionCallFromLLM] = [] for item_id, fc in function_calls.items(): try: arguments = json.loads(fc["arguments"]) if fc["arguments"] else {} @@ -388,13 +389,13 @@ class OpenAIResponsesLLMService(_BaseOpenAIResponsesLLMService, WebsocketLLMServ self._ws_url = ws_url # State for previous_response_id optimization - self._previous_response_id: Optional[str] = None - self._previous_input_hash: Optional[str] = None - self._previous_input_length: Optional[int] = None - self._previous_response_output: Optional[list] = None + self._previous_response_id: str | None = None + self._previous_input_hash: str | None = None + self._previous_input_length: int | None = None + self._previous_response_output: list | None = None # Response cancellation state - self._current_response_id: Optional[str] = None # ID of current non-cancelled response + self._current_response_id: str | None = None # ID of current non-cancelled response self._cancel_pending_response: bool = False self._needs_drain: bool = False @@ -659,7 +660,7 @@ class OpenAIResponsesLLMService(_BaseOpenAIResponsesLLMService, WebsocketLLMServ ) self._clear_cancellation_state() return - except (asyncio.TimeoutError, WebsocketReconnectedError, ConnectionClosed) as e: + except (TimeoutError, WebsocketReconnectedError, ConnectionClosed) as e: logger.warning(f"{self}: Error draining cancelled response: {e}") self._clear_cancellation_state() @@ -815,8 +816,8 @@ class OpenAIResponsesLLMService(_BaseOpenAIResponsesLLMService, WebsocketLLMServ WebsocketReconnectedError: Connection was lost and auto-recovered. ConnectionClosed: Connection was lost and could not be recovered. """ - function_calls: Dict[str, Dict[str, str]] = {} - current_arguments: Dict[str, str] = {} + function_calls: dict[str, dict[str, str]] = {} + current_arguments: dict[str, str] = {} while True: event = await self._ws_recv() @@ -991,8 +992,8 @@ class OpenAIResponsesHttpLLMService(_BaseOpenAIResponsesLLMService): stream: AsyncStream[ResponseStreamEvent] = await self._client.responses.create(**params) # Track function calls across stream events - function_calls: Dict[str, Dict[str, str]] = {} # item_id -> {name, call_id, arguments} - current_arguments: Dict[str, str] = {} # item_id -> accumulated arguments + function_calls: dict[str, dict[str, str]] = {} # item_id -> {name, call_id, arguments} + current_arguments: dict[str, str] = {} # item_id -> accumulated arguments # Ensure stream and its async iterator are closed on cancellation/exception # to prevent socket leaks and uvloop crashes. Closing the iterator first diff --git a/src/pipecat/services/openai/stt.py b/src/pipecat/services/openai/stt.py index fd98dbf49..ca0537fef 100644 --- a/src/pipecat/services/openai/stt.py +++ b/src/pipecat/services/openai/stt.py @@ -16,8 +16,9 @@ Provides two STT services: import base64 import json +from collections.abc import AsyncGenerator from dataclasses import dataclass, field -from typing import Any, AsyncGenerator, Literal, Optional, Union +from typing import Any, Literal from loguru import logger @@ -74,14 +75,14 @@ class OpenAISTTService(BaseWhisperSTTService): def __init__( self, *, - model: Optional[str] = None, - api_key: Optional[str] = None, - base_url: Optional[str] = None, - language: Optional[Language] = Language.EN, - prompt: Optional[str] = None, - temperature: Optional[float] = None, - settings: Optional[Settings] = None, - ttfs_p99_latency: Optional[float] = OPENAI_TTFS_P99, + model: str | None = None, + api_key: str | None = None, + base_url: str | None = None, + language: Language | None = Language.EN, + prompt: str | None = None, + temperature: float | None = None, + settings: Settings | None = None, + ttfs_p99_latency: float | None = OPENAI_TTFS_P99, **kwargs, ): """Initialize OpenAI STT service. @@ -239,15 +240,15 @@ class OpenAIRealtimeSTTService(WebsocketSTTService): self, *, api_key: str, - model: Optional[str] = None, + model: str | None = None, base_url: str = "wss://api.openai.com/v1/realtime", - language: Optional[Language] = Language.EN, - prompt: Optional[str] = None, - turn_detection: Optional[Union[dict, Literal[False]]] = False, - noise_reduction: Optional[Literal["near_field", "far_field"]] = None, + language: Language | None = Language.EN, + prompt: str | None = None, + turn_detection: dict | Literal[False] | None = False, + noise_reduction: Literal["near_field", "far_field"] | None = None, should_interrupt: bool = True, - settings: Optional[Settings] = None, - ttfs_p99_latency: Optional[float] = OPENAI_REALTIME_TTFS_P99, + settings: Settings | None = None, + ttfs_p99_latency: float | None = OPENAI_REALTIME_TTFS_P99, **kwargs, ): """Initialize the OpenAI Realtime STT service. @@ -712,7 +713,7 @@ class OpenAIRealtimeSTTService(WebsocketSTTService): self, transcript: str, is_final: bool, - language: Optional[Language] = None, + language: Language | None = None, ): """Record transcription result for tracing. diff --git a/src/pipecat/services/openai/tts.py b/src/pipecat/services/openai/tts.py index 074792b33..6825d6968 100644 --- a/src/pipecat/services/openai/tts.py +++ b/src/pipecat/services/openai/tts.py @@ -10,8 +10,9 @@ This module provides integration with OpenAI's text-to-speech API for generating high-quality synthetic speech from text input. """ +from collections.abc import AsyncGenerator from dataclasses import dataclass, field -from typing import AsyncGenerator, Dict, Literal, Optional +from typing import Literal from loguru import logger from openai import AsyncOpenAI, BadRequestError @@ -43,7 +44,7 @@ ValidVoice = Literal[ "verse", ] -VALID_VOICES: Dict[str, ValidVoice] = { +VALID_VOICES: dict[str, ValidVoice] = { "alloy": "alloy", "ash": "ash", "ballad": "ballad", @@ -97,21 +98,21 @@ class OpenAITTSService(TTSService): speed: Voice speed control (0.25 to 4.0, default 1.0). """ - instructions: Optional[str] = None - speed: Optional[float] = None + instructions: str | None = None + speed: float | None = None def __init__( self, *, - api_key: Optional[str] = None, - base_url: Optional[str] = None, - voice: Optional[str] = None, - model: Optional[str] = None, - sample_rate: Optional[int] = None, - instructions: Optional[str] = None, - speed: Optional[float] = None, - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, + api_key: str | None = None, + base_url: str | None = None, + voice: str | None = None, + model: str | None = None, + sample_rate: int | None = None, + instructions: str | None = None, + speed: float | None = None, + params: InputParams | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize OpenAI TTS service. diff --git a/src/pipecat/services/openrouter/llm.py b/src/pipecat/services/openrouter/llm.py index f92fb5e3b..d257e4cc9 100644 --- a/src/pipecat/services/openrouter/llm.py +++ b/src/pipecat/services/openrouter/llm.py @@ -11,7 +11,7 @@ extending the base OpenAI LLM service functionality. """ from dataclasses import dataclass -from typing import Any, Dict, Optional +from typing import Any from loguru import logger @@ -39,10 +39,10 @@ class OpenRouterLLMService(OpenAILLMService): def __init__( self, *, - api_key: Optional[str] = None, - model: Optional[str] = None, + api_key: str | None = None, + model: str | None = None, base_url: str = "https://openrouter.ai/api/v1", - settings: Optional[Settings] = None, + settings: Settings | None = None, **kwargs, ): """Initialize the OpenRouter LLM service. @@ -95,7 +95,7 @@ class OpenRouterLLMService(OpenAILLMService): logger.debug(f"Creating OpenRouter client with api {base_url}") return super().create_client(api_key, base_url, **kwargs) - def build_chat_completion_params(self, params_from_context: Dict[str, Any]) -> Dict[str, Any]: + def build_chat_completion_params(self, params_from_context: dict[str, Any]) -> dict[str, Any]: """Builds chat parameters, handling model-specific constraints. Args: diff --git a/src/pipecat/services/perplexity/llm.py b/src/pipecat/services/perplexity/llm.py index 453db7f2d..9a2852fbf 100644 --- a/src/pipecat/services/perplexity/llm.py +++ b/src/pipecat/services/perplexity/llm.py @@ -12,7 +12,6 @@ reporting patterns while maintaining compatibility with the Pipecat framework. """ from dataclasses import dataclass -from typing import Optional from pipecat.adapters.services.open_ai_adapter import OpenAILLMInvocationParams from pipecat.adapters.services.perplexity_adapter import PerplexityLLMAdapter @@ -50,8 +49,8 @@ class PerplexityLLMService(OpenAILLMService): *, api_key: str, base_url: str = "https://api.perplexity.ai", - model: Optional[str] = None, - settings: Optional[Settings] = None, + model: str | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize the Perplexity LLM service. diff --git a/src/pipecat/services/piper/tts.py b/src/pipecat/services/piper/tts.py index 1b0037abb..ab055c76d 100644 --- a/src/pipecat/services/piper/tts.py +++ b/src/pipecat/services/piper/tts.py @@ -7,9 +7,10 @@ """Piper TTS service implementation.""" import asyncio +from collections.abc import AsyncGenerator, AsyncIterator from dataclasses import dataclass from pathlib import Path -from typing import Any, AsyncGenerator, AsyncIterator, Optional +from typing import Any import aiohttp from loguru import logger @@ -53,11 +54,11 @@ class PiperTTSService(TTSService): def __init__( self, *, - voice_id: Optional[str] = None, - download_dir: Optional[Path] = None, + voice_id: str | None = None, + download_dir: Path | None = None, force_redownload: bool = False, use_cuda: bool = False, - settings: Optional[Settings] = None, + settings: Settings | None = None, **kwargs, ): """Initialize the Piper TTS service. @@ -209,8 +210,8 @@ class PiperHttpTTSService(TTSService): *, base_url: str, aiohttp_session: aiohttp.ClientSession, - voice_id: Optional[str] = None, - settings: Optional[Settings] = None, + voice_id: str | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize the Piper TTS service. diff --git a/src/pipecat/services/qwen/llm.py b/src/pipecat/services/qwen/llm.py index df07467ba..5d6ecbb1b 100644 --- a/src/pipecat/services/qwen/llm.py +++ b/src/pipecat/services/qwen/llm.py @@ -7,7 +7,6 @@ """Qwen LLM service implementation using OpenAI-compatible interface.""" from dataclasses import dataclass -from typing import Optional from loguru import logger @@ -41,8 +40,8 @@ class QwenLLMService(OpenAILLMService): *, api_key: str, base_url: str = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1", - model: Optional[str] = None, - settings: Optional[Settings] = None, + model: str | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize the Qwen LLM service. diff --git a/src/pipecat/services/resembleai/tts.py b/src/pipecat/services/resembleai/tts.py index d4bb77fb0..595b295e3 100644 --- a/src/pipecat/services/resembleai/tts.py +++ b/src/pipecat/services/resembleai/tts.py @@ -8,8 +8,8 @@ import base64 import json +from collections.abc import AsyncGenerator from dataclasses import dataclass -from typing import AsyncGenerator, Optional from loguru import logger @@ -58,12 +58,12 @@ class ResembleAITTSService(WebsocketTTSService): self, *, api_key: str, - voice_id: Optional[str] = None, + voice_id: str | None = None, url: str = "wss://websocket.cluster.resemble.ai/stream", - precision: Optional[str] = "PCM_16", - output_format: Optional[str] = "wav", - sample_rate: Optional[int] = 22050, - settings: Optional[Settings] = None, + precision: str | None = "PCM_16", + output_format: str | None = "wav", + sample_rate: int | None = 22050, + settings: Settings | None = None, **kwargs, ): """Initialize the Resemble AI TTS service. @@ -269,7 +269,7 @@ class ResembleAITTSService(WebsocketTTSService): """ await super().on_audio_context_completed(context_id) - async def flush_audio(self, context_id: Optional[str] = None): + async def flush_audio(self, context_id: str | None = None): """Flush any pending audio and finalize the current context.""" logger.trace(f"{self}: flushing audio") # For Resemble AI, we just wait for the audio_end message diff --git a/src/pipecat/services/rime/tts.py b/src/pipecat/services/rime/tts.py index 41045688c..2745f2cf4 100644 --- a/src/pipecat/services/rime/tts.py +++ b/src/pipecat/services/rime/tts.py @@ -12,8 +12,9 @@ using Rime's API for streaming and batch audio synthesis. import base64 import json +from collections.abc import AsyncGenerator from dataclasses import dataclass, field -from typing import Any, AsyncGenerator, ClassVar, Dict, Optional +from typing import Any, ClassVar import aiohttp from loguru import logger @@ -98,7 +99,7 @@ class RimeTTSSettings(TTSSettings): temperature: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) top_p: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - _aliases: ClassVar[Dict[str, str]] = {"speaker": "voice"} + _aliases: ClassVar[dict[str, str]] = {"speaker": "voice"} @dataclass @@ -117,7 +118,7 @@ class RimeNonJsonTTSSettings(TTSSettings): temperature: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) top_p: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - _aliases: ClassVar[Dict[str, str]] = {"speaker": "voice"} + _aliases: ClassVar[dict[str, str]] = {"speaker": "voice"} class RimeTTSService(WebsocketTTSService): @@ -151,32 +152,32 @@ class RimeTTSService(WebsocketTTSService): save_oovs: Whether to save out-of-vocabulary words (mistv2 only). """ - language: Optional[Language] = Language.EN - segment: Optional[str] = None - speed_alpha: Optional[float] = None + language: Language | None = Language.EN + segment: str | None = None + speed_alpha: float | None = None # Arcana params - repetition_penalty: Optional[float] = None - temperature: Optional[float] = None - top_p: Optional[float] = None + repetition_penalty: float | None = None + temperature: float | None = None + top_p: float | None = None # Mistv2 params - reduce_latency: Optional[bool] = None - pause_between_brackets: Optional[bool] = None - phonemize_between_brackets: Optional[bool] = None - no_text_normalization: Optional[bool] = None - save_oovs: Optional[bool] = None + reduce_latency: bool | None = None + pause_between_brackets: bool | None = None + phonemize_between_brackets: bool | None = None + no_text_normalization: bool | None = None + save_oovs: bool | None = None def __init__( self, *, api_key: str, - voice_id: Optional[str] = None, + voice_id: str | None = None, url: str = "wss://users-ws.rime.ai/ws3", - model: Optional[str] = None, - sample_rate: Optional[int] = None, - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, - text_aggregation_mode: Optional[TextAggregationMode] = None, - aggregate_sentences: Optional[bool] = None, + model: str | None = None, + sample_rate: int | None = None, + params: InputParams | None = None, + settings: Settings | None = None, + text_aggregation_mode: TextAggregationMode | None = None, + aggregate_sentences: bool | None = None, **kwargs, ): """Initialize Rime TTS service. @@ -545,7 +546,7 @@ class RimeTTSService(WebsocketTTSService): return word_pairs - async def flush_audio(self, context_id: Optional[str] = None): + async def flush_audio(self, context_id: str | None = None): """Flush any pending audio synthesis.""" flush_id = context_id or self.get_active_audio_context_id() if not flush_id or not self._websocket: @@ -663,23 +664,23 @@ class RimeHttpTTSService(TTSService): reduce_latency: Whether to reduce latency at potential quality cost. """ - language: Optional[Language] = Language.EN - pause_between_brackets: Optional[bool] = False - phonemize_between_brackets: Optional[bool] = False - inline_speed_alpha: Optional[str] = None - speed_alpha: Optional[float] = 1.0 - reduce_latency: Optional[bool] = False + language: Language | None = Language.EN + pause_between_brackets: bool | None = False + phonemize_between_brackets: bool | None = False + inline_speed_alpha: str | None = None + speed_alpha: float | None = 1.0 + reduce_latency: bool | None = False def __init__( self, *, api_key: str, - voice_id: Optional[str] = None, + voice_id: str | None = None, aiohttp_session: aiohttp.ClientSession, - model: Optional[str] = None, - sample_rate: Optional[int] = None, - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, + model: str | None = None, + sample_rate: int | None = None, + params: InputParams | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize Rime HTTP TTS service. @@ -886,26 +887,26 @@ class RimeNonJsonTTSService(InterruptibleTTSService): extra: Additional parameters to pass to the API (for future compatibility). """ - language: Optional[Language] = None - segment: Optional[str] = None - repetition_penalty: Optional[float] = None - temperature: Optional[float] = None - top_p: Optional[float] = None - extra: Optional[dict[str, Any]] = None + language: Language | None = None + segment: str | None = None + repetition_penalty: float | None = None + temperature: float | None = None + top_p: float | None = None + extra: dict[str, Any] | None = None def __init__( self, *, api_key: str, - voice_id: Optional[str] = None, + voice_id: str | None = None, url: str = "wss://users.rime.ai/ws", - model: Optional[str] = None, + model: str | None = None, audio_format: str = "pcm", - sample_rate: Optional[int] = None, - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, - aggregate_sentences: Optional[bool] = None, - text_aggregation_mode: Optional[TextAggregationMode] = None, + sample_rate: int | None = None, + params: InputParams | None = None, + settings: Settings | None = None, + aggregate_sentences: bool | None = None, + text_aggregation_mode: TextAggregationMode | None = None, **kwargs, ): """Initialize Rime Non-JSON WebSocket TTS service. @@ -1113,7 +1114,7 @@ class RimeNonJsonTTSService(InterruptibleTTSService): return self._websocket raise Exception("Websocket not connected") - async def flush_audio(self, context_id: Optional[str] = None): + async def flush_audio(self, context_id: str | None = None): """Flush any pending audio synthesis.""" if not self._websocket: return diff --git a/src/pipecat/services/sambanova/llm.py b/src/pipecat/services/sambanova/llm.py index 66f3d52bf..4efc5e7ed 100644 --- a/src/pipecat/services/sambanova/llm.py +++ b/src/pipecat/services/sambanova/llm.py @@ -8,7 +8,7 @@ import json from dataclasses import dataclass -from typing import Any, Dict, Optional +from typing import Any from loguru import logger from openai import AsyncStream @@ -51,10 +51,10 @@ class SambaNovaLLMService(OpenAILLMService): # type: ignore self, *, api_key: str, - model: Optional[str] = None, + model: str | None = None, base_url: str = "https://api.sambanova.ai/v1", - settings: Optional[Settings] = None, - **kwargs: Dict[Any, Any], + settings: Settings | None = None, + **kwargs: dict[Any, Any], ) -> None: """Initialize SambaNova LLM service. @@ -88,9 +88,9 @@ class SambaNovaLLMService(OpenAILLMService): # type: ignore def create_client( self, - api_key: Optional[str] = None, - base_url: Optional[str] = None, - **kwargs: Dict[Any, Any], + api_key: str | None = None, + base_url: str | None = None, + **kwargs: dict[Any, Any], ) -> Any: """Create OpenAI-compatible client for SambaNova API endpoint. diff --git a/src/pipecat/services/sarvam/_sdk.py b/src/pipecat/services/sarvam/_sdk.py index 31085d289..45988c49d 100644 --- a/src/pipecat/services/sarvam/_sdk.py +++ b/src/pipecat/services/sarvam/_sdk.py @@ -5,12 +5,11 @@ # import platform -from typing import Dict from pipecat import version as pipecat_version -def sdk_headers() -> Dict[str, str]: +def sdk_headers() -> dict[str, str]: """SDK identification headers for upstream providers.""" return { "User-Agent": f"Pipecat/{pipecat_version()} Python/{platform.python_version()}", diff --git a/src/pipecat/services/sarvam/llm.py b/src/pipecat/services/sarvam/llm.py index ce353ff2b..d86ba1874 100644 --- a/src/pipecat/services/sarvam/llm.py +++ b/src/pipecat/services/sarvam/llm.py @@ -6,8 +6,9 @@ """Sarvam LLM service implementation using OpenAI-compatible interface.""" +from collections.abc import Mapping from dataclasses import dataclass, field -from typing import Literal, Mapping, Optional +from typing import Literal from loguru import logger from openai import NOT_GIVEN @@ -57,8 +58,8 @@ class SarvamLLMService(OpenAILLMService): *, api_key: str, base_url: str = "https://api.sarvam.ai/v1", - settings: Optional[Settings] = None, - default_headers: Optional[Mapping[str, str]] = None, + settings: Settings | None = None, + default_headers: Mapping[str, str] | None = None, **kwargs, ): """Initialize Sarvam LLM service. diff --git a/src/pipecat/services/sarvam/stt.py b/src/pipecat/services/sarvam/stt.py index 7271a3e5c..faadd914b 100644 --- a/src/pipecat/services/sarvam/stt.py +++ b/src/pipecat/services/sarvam/stt.py @@ -12,8 +12,9 @@ can handle multiple audio formats for Indian language speech recognition. """ import base64 +from collections.abc import AsyncGenerator from dataclasses import dataclass, field -from typing import Any, AsyncGenerator, Dict, Literal, Optional +from typing import Any, Literal from loguru import logger from pydantic import BaseModel @@ -99,13 +100,13 @@ class ModelConfig: supports_prompt: bool supports_mode: bool supports_language: bool - default_language: Optional[str] - default_mode: Optional[str] + default_language: str | None + default_mode: str | None use_translate_endpoint: bool use_translate_method: bool -MODEL_CONFIGS: Dict[str, ModelConfig] = { +MODEL_CONFIGS: dict[str, ModelConfig] = { "saarika:v2.5": ModelConfig( supports_prompt=False, supports_mode=False, @@ -192,26 +193,24 @@ class SarvamSTTService(STTService): high_vad_sensitivity: Enable high VAD (Voice Activity Detection) sensitivity. Defaults to None. """ - language: Optional[Language] = None - prompt: Optional[str] = None - mode: Optional[Literal["transcribe", "translate", "verbatim", "translit", "codemix"]] = None - vad_signals: Optional[bool] = None - high_vad_sensitivity: Optional[bool] = None + language: Language | None = None + prompt: str | None = None + mode: Literal["transcribe", "translate", "verbatim", "translit", "codemix"] | None = None + vad_signals: bool | None = None + high_vad_sensitivity: bool | None = None def __init__( self, *, api_key: str, - model: Optional[str] = None, - mode: Optional[ - Literal["transcribe", "translate", "verbatim", "translit", "codemix"] - ] = None, - sample_rate: Optional[int] = None, + model: str | None = None, + mode: Literal["transcribe", "translate", "verbatim", "translit", "codemix"] | None = None, + sample_rate: int | None = None, input_audio_codec: str = "wav", - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, - ttfs_p99_latency: Optional[float] = SARVAM_TTFS_P99, - keepalive_timeout: Optional[float] = None, + params: InputParams | None = None, + settings: Settings | None = None, + ttfs_p99_latency: float | None = SARVAM_TTFS_P99, + keepalive_timeout: float | None = None, keepalive_interval: float = 5.0, **kwargs, ): @@ -339,7 +338,7 @@ class SarvamSTTService(STTService): """ return language_to_sarvam_language(language) - def _get_language_string(self) -> Optional[str]: + def _get_language_string(self) -> str | None: """Resolve the current language setting to a Sarvam language code string.""" if self._settings.language: return language_to_sarvam_language(self._settings.language) @@ -408,7 +407,7 @@ class SarvamSTTService(STTService): return changed - async def set_prompt(self, prompt: Optional[str]): + async def set_prompt(self, prompt: str | None): """Set the transcription/translation prompt and reconnect. .. deprecated:: 0.0.104 @@ -731,7 +730,7 @@ class SarvamSTTService(STTService): @traced_stt async def _handle_transcription( - self, transcript: str, is_final: bool, language: Optional[Language] = None + self, transcript: str, is_final: bool, language: Language | None = None ): """Handle a transcription result with tracing. diff --git a/src/pipecat/services/sarvam/tts.py b/src/pipecat/services/sarvam/tts.py index ca9a38223..01b62b87d 100644 --- a/src/pipecat/services/sarvam/tts.py +++ b/src/pipecat/services/sarvam/tts.py @@ -40,9 +40,10 @@ See https://docs.sarvam.ai/api-reference-docs/text-to-speech/stream for full API import asyncio import base64 import json +from collections.abc import AsyncGenerator from dataclasses import dataclass, field -from enum import Enum -from typing import Any, AsyncGenerator, ClassVar, Dict, List, Optional, Tuple +from enum import StrEnum +from typing import Any, ClassVar import aiohttp from loguru import logger @@ -72,7 +73,7 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") -class SarvamTTSModel(str, Enum): +class SarvamTTSModel(StrEnum): """Available Sarvam TTS models. Parameters: @@ -92,7 +93,7 @@ class SarvamTTSModel(str, Enum): BULBUL_V3 = "bulbul:v3" -class SarvamTTSSpeakerV2(str, Enum): +class SarvamTTSSpeakerV2(StrEnum): """Available speakers for bulbul:v2 model. Female voices: anushka, manisha, vidya, arya @@ -108,7 +109,7 @@ class SarvamTTSSpeakerV2(str, Enum): HITESH = "hitesh" -class SarvamTTSSpeakerV3(str, Enum): +class SarvamTTSSpeakerV3(StrEnum): """Available speakers for bulbul:v3-beta model. Includes a wider variety of voices with different characteristics. @@ -161,12 +162,12 @@ class TTSModelConfig: supports_temperature: bool default_sample_rate: int default_speaker: str - pace_range: Tuple[float, float] + pace_range: tuple[float, float] preprocessing_always_enabled: bool - speakers: Tuple[str, ...] + speakers: tuple[str, ...] -TTS_MODEL_CONFIGS: Dict[str, TTSModelConfig] = { +TTS_MODEL_CONFIGS: dict[str, TTSModelConfig] = { "bulbul:v2": TTSModelConfig( supports_pitch=True, supports_loudness=True, @@ -200,7 +201,7 @@ TTS_MODEL_CONFIGS: Dict[str, TTSModelConfig] = { } -def get_speakers_for_model(model: str) -> List[str]: +def get_speakers_for_model(model: str) -> list[str]: """Get the list of available speakers for a given model. Args: @@ -215,7 +216,7 @@ def get_speakers_for_model(model: str) -> List[str]: return list(TTS_MODEL_CONFIGS["bulbul:v2"].speakers) -def language_to_sarvam_language(language: Language) -> Optional[str]: +def language_to_sarvam_language(language: Language) -> str | None: """Convert Pipecat Language enum to Sarvam AI language codes. Args: @@ -291,7 +292,7 @@ class SarvamTTSSettings(SarvamHttpTTSSettings): Controls memory usage and processing efficiency. Defaults to 150. """ - _aliases: ClassVar[Dict[str, str]] = {"target_language_code": "language"} + _aliases: ClassVar[dict[str, str]] = {"target_language_code": "language"} min_buffer_size: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) max_chunk_length: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) @@ -374,30 +375,30 @@ class SarvamHttpTTSService(TTSService): **Note:** Only supported for bulbul:v3-beta. Ignored for v2. """ - language: Optional[Language] = Language.EN - pitch: Optional[float] = Field( + language: Language | None = Language.EN + pitch: float | None = Field( default=0.0, ge=-0.75, le=0.75, description="Voice pitch adjustment. Only for bulbul:v2.", ) - pace: Optional[float] = Field( + pace: float | None = Field( default=1.0, ge=0.3, le=3.0, description="Speech pace. v2: 0.3-3.0, v3: 0.5-2.0.", ) - loudness: Optional[float] = Field( + loudness: float | None = Field( default=1.0, ge=0.3, le=3.0, description="Volume multiplier. Only for bulbul:v2.", ) - enable_preprocessing: Optional[bool] = Field( + enable_preprocessing: bool | None = Field( default=False, description="Enable text preprocessing. Always enabled for v3-beta model.", ) - temperature: Optional[float] = Field( + temperature: float | None = Field( default=0.6, ge=0.01, le=1.0, @@ -409,12 +410,12 @@ class SarvamHttpTTSService(TTSService): *, api_key: str, aiohttp_session: aiohttp.ClientSession, - voice_id: Optional[str] = None, - model: Optional[str] = None, + voice_id: str | None = None, + model: str | None = None, base_url: str = "https://api.sarvam.ai", - sample_rate: Optional[int] = None, - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, + sample_rate: int | None = None, + params: InputParams | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize the Sarvam TTS service. @@ -548,7 +549,7 @@ class SarvamHttpTTSService(TTSService): """ return True - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: """Convert a Language enum to Sarvam AI language format. Args: @@ -754,46 +755,46 @@ class SarvamTTSService(InterruptibleTTSService): roopa, kabir, aayan, shubh, ashutosh, advait, amelia, sophia """ - pitch: Optional[float] = Field( + pitch: float | None = Field( default=0.0, ge=-0.75, le=0.75, description="Voice pitch adjustment. Only for bulbul:v2.", ) - pace: Optional[float] = Field( + pace: float | None = Field( default=1.0, ge=0.3, le=3.0, description="Speech pace. v2: 0.3-3.0, v3: 0.5-2.0.", ) - loudness: Optional[float] = Field( + loudness: float | None = Field( default=1.0, ge=0.3, le=3.0, description="Volume multiplier. Only for bulbul:v2.", ) - enable_preprocessing: Optional[bool] = Field( + enable_preprocessing: bool | None = Field( default=False, description="Enable text preprocessing. Always enabled for v3 models.", ) - min_buffer_size: Optional[int] = Field( + min_buffer_size: int | None = Field( default=50, description="Minimum characters to buffer before TTS processing.", ) - max_chunk_length: Optional[int] = Field( + max_chunk_length: int | None = Field( default=150, description="Maximum length for sentence splitting.", ) - output_audio_codec: Optional[str] = Field( + output_audio_codec: str | None = Field( default="linear16", description="Audio codec: linear16, mulaw, alaw, opus, flac, aac, wav, mp3.", ) - output_audio_bitrate: Optional[str] = Field( + output_audio_bitrate: str | None = Field( default="128k", description="Audio bitrate: 32k, 64k, 96k, 128k, 192k.", ) - language: Optional[Language] = Language.EN - temperature: Optional[float] = Field( + language: Language | None = Language.EN + temperature: float | None = Field( default=0.6, ge=0.01, le=1.0, @@ -804,14 +805,14 @@ class SarvamTTSService(InterruptibleTTSService): self, *, api_key: str, - model: Optional[str] = None, - voice_id: Optional[str] = None, + model: str | None = None, + voice_id: str | None = None, url: str = "wss://api.sarvam.ai/text-to-speech/ws", - aggregate_sentences: Optional[bool] = None, - text_aggregation_mode: Optional[TextAggregationMode] = None, - sample_rate: Optional[int] = None, - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, + aggregate_sentences: bool | None = None, + text_aggregation_mode: TextAggregationMode | None = None, + sample_rate: int | None = None, + params: InputParams | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize the Sarvam TTS service with voice and transport configuration. @@ -979,7 +980,7 @@ class SarvamTTSService(InterruptibleTTSService): """ return True - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: """Convert a Language enum to Sarvam AI language format. Args: @@ -1020,7 +1021,7 @@ class SarvamTTSService(InterruptibleTTSService): await super().cancel(frame) await self._disconnect() - async def flush_audio(self, context_id: Optional[str] = None): + async def flush_audio(self, context_id: str | None = None): """Flush any pending audio synthesis by sending flush command.""" try: if self._websocket: diff --git a/src/pipecat/services/settings.py b/src/pipecat/services/settings.py index 54465b041..585053277 100644 --- a/src/pipecat/services/settings.py +++ b/src/pipecat/services/settings.py @@ -37,8 +37,9 @@ Key helpers: from __future__ import annotations import copy +from collections.abc import Mapping from dataclasses import dataclass, field, fields -from typing import TYPE_CHECKING, Any, ClassVar, Dict, Mapping, Optional, Type, TypeVar +from typing import TYPE_CHECKING, Any, ClassVar, TypeVar from loguru import logger @@ -65,7 +66,7 @@ class _NotGiven: ``validate_complete()``. """ - _instance: Optional[_NotGiven] = None + _instance: _NotGiven | None = None def __new__(cls) -> _NotGiven: if cls._instance is None: @@ -153,12 +154,12 @@ class ServiceSettings: model string or ``None`` if the service has no model concept. """ - extra: Dict[str, Any] = field(default_factory=dict) + extra: dict[str, Any] = field(default_factory=dict) """Catch-all for service-specific keys that have no declared field.""" # -- class-level configuration ------------------------------------------- - _aliases: ClassVar[Dict[str, str]] = {} + _aliases: ClassVar[dict[str, str]] = {} """Map of alternative key names to canonical field names. For example ``{"voice_id": "voice"}`` lets callers use either spelling. @@ -167,7 +168,7 @@ class ServiceSettings: # -- public API ---------------------------------------------------------- - def given_fields(self) -> Dict[str, Any]: + def given_fields(self) -> dict[str, Any]: """Return a dict of only the fields that are not ``NOT_GIVEN``. Primarily useful for delta-mode objects to inspect which fields were @@ -180,7 +181,7 @@ class ServiceSettings: Returns: Dictionary mapping field names to their provided values. """ - result: Dict[str, Any] = {} + result: dict[str, Any] = {} for f in fields(self): if f.name == "extra": continue @@ -190,7 +191,7 @@ class ServiceSettings: result.update(self.extra) return result - def apply_update(self: _S, delta: _S) -> Dict[str, Any]: + def apply_update(self: _S, delta: _S) -> dict[str, Any]: """Merge a delta-mode object into this store-mode object. Only fields in *delta* that are **given** (i.e. not ``NOT_GIVEN``) @@ -218,7 +219,7 @@ class ServiceSettings: # changed == {"voice": "alice"} # current.voice == "bob", current.language == "en" """ - changed: Dict[str, Any] = {} + changed: dict[str, Any] = {} for f in fields(self): if f.name == "extra": continue @@ -240,7 +241,7 @@ class ServiceSettings: return changed @classmethod - def from_mapping(cls: Type[_S], settings: Mapping[str, Any]) -> _S: + def from_mapping(cls: type[_S], settings: Mapping[str, Any]) -> _S: """Build a **delta-mode** settings object from a plain dictionary. This exists for backward compatibility with code that passes plain @@ -266,8 +267,8 @@ class ServiceSettings: # delta.extra == {"speed": 1.2} """ field_names = {f.name for f in fields(cls)} - {"extra"} - kwargs: Dict[str, Any] = {} - extra: Dict[str, Any] = {} + kwargs: dict[str, Any] = {} + extra: dict[str, Any] = {} for key, value in settings.items(): # Resolve aliases first @@ -410,7 +411,7 @@ class TTSSettings(ServiceSettings): voice: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) language: Language | str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) - _aliases: ClassVar[Dict[str, str]] = {"voice_id": "voice"} + _aliases: ClassVar[dict[str, str]] = {"voice_id": "voice"} @dataclass diff --git a/src/pipecat/services/simli/video.py b/src/pipecat/services/simli/video.py index ce681d6e8..d6ca15dc0 100644 --- a/src/pipecat/services/simli/video.py +++ b/src/pipecat/services/simli/video.py @@ -8,7 +8,6 @@ import asyncio from dataclasses import dataclass -from typing import Optional import numpy as np from loguru import logger @@ -71,9 +70,9 @@ class SimliVideoService(AIService): before the avatar disconnects. """ - enable_logging: Optional[bool] = None - max_session_length: Optional[int] = None - max_idle_time: Optional[int] = None + enable_logging: bool | None = None + max_session_length: int | None = None + max_idle_time: int | None = None def __init__( self, @@ -82,11 +81,11 @@ class SimliVideoService(AIService): face_id: str, simli_url: str = "https://api.simli.ai", is_trinity_avatar: bool = False, - params: Optional[InputParams] = None, - max_session_length: Optional[int] = None, - max_idle_time: Optional[int] = None, - enable_logging: Optional[bool] = None, - settings: Optional[Settings] = None, + params: InputParams | None = None, + max_session_length: int | None = None, + max_idle_time: int | None = None, + enable_logging: bool | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize the Simli video service. diff --git a/src/pipecat/services/smallest/stt.py b/src/pipecat/services/smallest/stt.py index 78fa0e44d..8673bd040 100644 --- a/src/pipecat/services/smallest/stt.py +++ b/src/pipecat/services/smallest/stt.py @@ -14,9 +14,10 @@ This module provides a STT service using Smallest AI's Waves API: import asyncio import json +from collections.abc import AsyncGenerator from dataclasses import dataclass, field -from enum import Enum -from typing import Any, AsyncGenerator, Optional +from enum import StrEnum +from typing import Any from urllib.parse import urlencode from loguru import logger @@ -97,7 +98,7 @@ def language_to_smallest_stt_language(language: Language) -> str: return resolve_language(language, LANGUAGE_MAP) -class SmallestSTTModel(str, Enum): +class SmallestSTTModel(StrEnum): """Available Smallest AI STT models.""" PULSE = "pulse" @@ -156,9 +157,9 @@ class SmallestSTTService(WebsocketSTTService): api_key: str, base_url: str = "wss://api.smallest.ai", encoding: str = "linear16", - sample_rate: Optional[int] = None, - settings: Optional[Settings] = None, - ttfs_p99_latency: Optional[float] = SMALLEST_TTFS_P99, + sample_rate: int | None = None, + settings: Settings | None = None, + ttfs_p99_latency: float | None = SMALLEST_TTFS_P99, **kwargs, ): """Initialize the Smallest AI STT service. @@ -207,7 +208,7 @@ class SmallestSTTService(WebsocketSTTService): """Check if this service can generate processing metrics.""" return True - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: """Convert a Language enum to Smallest service language format. Args: @@ -406,7 +407,7 @@ class SmallestSTTService(WebsocketSTTService): @traced_stt async def _handle_transcription( - self, transcript: str, is_final: bool, language: Optional[str] = None + self, transcript: str, is_final: bool, language: str | None = None ): """Handle a transcription result with tracing.""" pass diff --git a/src/pipecat/services/smallest/tts.py b/src/pipecat/services/smallest/tts.py index 5ef4aaa49..33c492c01 100644 --- a/src/pipecat/services/smallest/tts.py +++ b/src/pipecat/services/smallest/tts.py @@ -13,9 +13,10 @@ Waves API for real-time text-to-speech synthesis. import asyncio import base64 import json +from collections.abc import AsyncGenerator from dataclasses import dataclass, field -from enum import Enum -from typing import Any, AsyncGenerator, Optional +from enum import StrEnum +from typing import Any from loguru import logger @@ -43,14 +44,14 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") -class SmallestTTSModel(str, Enum): +class SmallestTTSModel(StrEnum): """Available Smallest AI TTS models.""" LIGHTNING_V2 = "lightning-v2" LIGHTNING_V3_1 = "lightning-v3.1" -def language_to_smallest_tts_language(language: Language) -> Optional[str]: +def language_to_smallest_tts_language(language: Language) -> str | None: """Convert a Language enum to a Smallest TTS language string. Args: @@ -125,8 +126,8 @@ class SmallestTTSService(InterruptibleTTSService): *, api_key: str, base_url: str = "wss://waves-api.smallest.ai", - sample_rate: Optional[int] = None, - settings: Optional[Settings] = None, + sample_rate: int | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize the Smallest AI WebSocket TTS service. @@ -173,7 +174,7 @@ class SmallestTTSService(InterruptibleTTSService): """ return True - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: """Convert a Language enum to Smallest service language format. Args: @@ -354,7 +355,7 @@ class SmallestTTSService(InterruptibleTTSService): msg = {"flush": True} await self._websocket.send(json.dumps(msg)) - async def flush_audio(self, context_id: Optional[str] = None): + async def flush_audio(self, context_id: str | None = None): """Flush any pending audio synthesis.""" if not self._websocket or self._websocket.state is State.CLOSED: return diff --git a/src/pipecat/services/soniox/stt.py b/src/pipecat/services/soniox/stt.py index 5163ef113..66f27d40f 100644 --- a/src/pipecat/services/soniox/stt.py +++ b/src/pipecat/services/soniox/stt.py @@ -8,8 +8,9 @@ import json import time +from collections.abc import AsyncGenerator from dataclasses import dataclass, field -from typing import Any, AsyncGenerator, List, Optional +from typing import Any from loguru import logger from pydantic import BaseModel @@ -70,10 +71,10 @@ class SonioxContextObject(BaseModel): https://soniox.com/docs/stt/concepts/context """ - general: Optional[List[SonioxContextGeneralItem]] = None - text: Optional[str] = None - terms: Optional[List[str]] = None - translation_terms: Optional[List[SonioxContextTranslationTerm]] = None + general: list[SonioxContextGeneralItem] | None = None + text: str | None = None + terms: list[str] | None = None + translation_terms: list[SonioxContextTranslationTerm] | None = None class SonioxInputParams(BaseModel): @@ -99,17 +100,17 @@ class SonioxInputParams(BaseModel): model: str = "stt-rt-v4" - audio_format: Optional[str] = "pcm_s16le" - num_channels: Optional[int] = 1 + audio_format: str | None = "pcm_s16le" + num_channels: int | None = 1 - language_hints: Optional[List[Language]] = None - language_hints_strict: Optional[bool] = None - context: Optional[SonioxContextObject | str] = None + language_hints: list[Language] | None = None + language_hints_strict: bool | None = None + context: SonioxContextObject | str | None = None - enable_speaker_diarization: Optional[bool] = False - enable_language_identification: Optional[bool] = False + enable_speaker_diarization: bool | None = False + enable_language_identification: bool | None = False - client_reference_id: Optional[str] = None + client_reference_id: str | None = None def is_end_token(token: dict) -> bool: @@ -190,8 +191,8 @@ def language_to_soniox_language(language: Language) -> str: def _prepare_language_hints( - language_hints: Optional[List[Language]], -) -> Optional[List[str]]: + language_hints: list[Language] | None, +) -> list[str] | None: if language_hints is None: return None @@ -215,7 +216,7 @@ class SonioxSTTSettings(STTSettings): client_reference_id: Client reference ID to use for transcription. """ - language_hints: List[Language] | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + language_hints: list[Language] | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) language_hints_strict: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) context: SonioxContextObject | str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) enable_speaker_diarization: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) @@ -243,14 +244,14 @@ class SonioxSTTService(WebsocketSTTService): *, api_key: str, url: str = "wss://stt-rt.soniox.com/transcribe-websocket", - sample_rate: Optional[int] = None, - model: Optional[str] = None, + sample_rate: int | None = None, + model: str | None = None, audio_format: str = "pcm_s16le", num_channels: int = 1, - params: Optional[SonioxInputParams] = None, + params: SonioxInputParams | None = None, vad_force_turn_endpoint: bool = True, - settings: Optional[Settings] = None, - ttfs_p99_latency: Optional[float] = SONIOX_TTFS_P99, + settings: Settings | None = None, + ttfs_p99_latency: float | None = SONIOX_TTFS_P99, **kwargs, ): """Initialize the Soniox STT service. @@ -337,7 +338,7 @@ class SonioxSTTService(WebsocketSTTService): self._num_channels = num_channels self._final_transcription_buffer = [] - self._last_tokens_received: Optional[float] = None + self._last_tokens_received: float | None = None self._receive_task = None @@ -417,7 +418,7 @@ class SonioxSTTService(WebsocketSTTService): @traced_stt async def _handle_transcription( - self, transcript: str, is_final: bool, language: Optional[Language] = None + self, transcript: str, is_final: bool, language: Language | None = None ): """Handle a transcription result with tracing.""" pass diff --git a/src/pipecat/services/speechmatics/stt.py b/src/pipecat/services/speechmatics/stt.py index ae8e35850..6f049b29f 100644 --- a/src/pipecat/services/speechmatics/stt.py +++ b/src/pipecat/services/speechmatics/stt.py @@ -9,9 +9,10 @@ import asyncio import os import warnings +from collections.abc import AsyncGenerator from dataclasses import dataclass, field -from enum import Enum -from typing import Any, AsyncGenerator, ClassVar +from enum import StrEnum +from typing import Any, ClassVar from dotenv import load_dotenv from loguru import logger @@ -66,7 +67,7 @@ except ModuleNotFoundError as e: load_dotenv() -class TurnDetectionMode(str, Enum): +class TurnDetectionMode(StrEnum): """Endpoint and turn detection handling mode. How the STT engine handles the endpointing of speech. If using Pipecat's built-in endpointing, @@ -680,7 +681,7 @@ class SpeechmaticsSTTService(STTService): try: if self._client: await self._client.disconnect() - except asyncio.TimeoutError: + except TimeoutError: logger.warning(f"{self} timeout while closing Speechmatics client connection") except Exception as e: await self.push_error(error_msg=f"Error closing Speechmatics client: {e}", exception=e) diff --git a/src/pipecat/services/speechmatics/tts.py b/src/pipecat/services/speechmatics/tts.py index 64f64378a..69db9e2c3 100644 --- a/src/pipecat/services/speechmatics/tts.py +++ b/src/pipecat/services/speechmatics/tts.py @@ -7,8 +7,8 @@ """Speechmatics TTS service integration.""" import asyncio +from collections.abc import AsyncGenerator from dataclasses import dataclass, field -from typing import AsyncGenerator, Optional from urllib.parse import urlencode import aiohttp @@ -75,11 +75,11 @@ class SpeechmaticsTTSService(TTSService): *, api_key: str, base_url: str = "https://preview.tts.speechmatics.com", - voice_id: Optional[str] = None, + voice_id: str | None = None, aiohttp_session: aiohttp.ClientSession, - sample_rate: Optional[int] = SPEECHMATICS_SAMPLE_RATE, - params: Optional[InputParams] = None, - settings: Optional[Settings] = None, + sample_rate: int | None = SPEECHMATICS_SAMPLE_RATE, + params: InputParams | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize the Speechmatics TTS service. diff --git a/src/pipecat/services/stt_service.py b/src/pipecat/services/stt_service.py index 8fe83be33..74d2f90ea 100644 --- a/src/pipecat/services/stt_service.py +++ b/src/pipecat/services/stt_service.py @@ -12,7 +12,8 @@ import time import warnings import wave from abc import abstractmethod -from typing import Any, AsyncGenerator, Optional +from collections.abc import AsyncGenerator +from typing import Any from loguru import logger from websockets.protocol import State @@ -82,12 +83,12 @@ class STTService(AIService): self, *, audio_passthrough=True, - sample_rate: Optional[int] = None, + sample_rate: int | None = None, stt_ttfb_timeout: float = 2.0, - ttfs_p99_latency: Optional[float] = None, - keepalive_timeout: Optional[float] = None, + ttfs_p99_latency: float | None = None, + keepalive_timeout: float | None = None, keepalive_interval: float = 5.0, - settings: Optional[STTSettings] = None, + settings: STTSettings | None = None, **kwargs, ): """Initialize the STT service. @@ -152,7 +153,7 @@ class STTService(AIService): # STT TTFB tracking state self._stt_ttfb_timeout = stt_ttfb_timeout - self._ttfb_timeout_task: Optional[asyncio.Task] = None + self._ttfb_timeout_task: asyncio.Task | None = None self._user_speaking: bool = False self._finalize_pending: bool = False self._finalize_requested: bool = False @@ -161,7 +162,7 @@ class STTService(AIService): # Keepalive state self._keepalive_timeout = keepalive_timeout self._keepalive_interval = keepalive_interval - self._keepalive_task: Optional[asyncio.Task] = None + self._keepalive_task: asyncio.Task | None = None self._last_audio_time: float = 0 # VAD-aware reconnect state @@ -261,7 +262,7 @@ class STTService(AIService): settings_cls = type(self._settings) await self._update_settings(settings_cls(language=language)) - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: """Convert a language to the service-specific language format. Args: @@ -690,7 +691,7 @@ class SegmentedSTTService(STTService): VAD detection. """ - def __init__(self, *, sample_rate: Optional[int] = None, **kwargs): + def __init__(self, *, sample_rate: int | None = None, **kwargs): """Initialize the segmented STT service. Args: diff --git a/src/pipecat/services/tavus/video.py b/src/pipecat/services/tavus/video.py index a41bee5b4..a12b91f3b 100644 --- a/src/pipecat/services/tavus/video.py +++ b/src/pipecat/services/tavus/video.py @@ -12,7 +12,6 @@ avatar functionality through Tavus's streaming API. import asyncio from dataclasses import dataclass -from typing import Optional import aiohttp from daily.daily import AudioData, VideoFrame @@ -69,7 +68,7 @@ class TavusVideoService(AIService): replica_id: str, persona_id: str = "pipecat-stream", session: aiohttp.ClientSession, - settings: Optional[Settings] = None, + settings: Settings | None = None, **kwargs, ) -> None: """Initialize the Tavus video service. @@ -94,15 +93,15 @@ class TavusVideoService(AIService): self._persona_id = persona_id self._other_participant_has_joined = False - self._client: Optional[TavusTransportClient] = None + self._client: TavusTransportClient | None = None self._conversation_id: str self._resampler = create_stream_resampler() self._audio_buffer = bytearray() - self._send_task: Optional[asyncio.Task] = None + self._send_task: asyncio.Task | None = None # This is the custom track destination expected by Tavus - self._transport_destination: Optional[str] = "stream" + self._transport_destination: str | None = "stream" self._transport_ready = False async def setup(self, setup: FrameProcessorSetup): diff --git a/src/pipecat/services/together/llm.py b/src/pipecat/services/together/llm.py index 3711daa72..9626d1a8b 100644 --- a/src/pipecat/services/together/llm.py +++ b/src/pipecat/services/together/llm.py @@ -7,7 +7,6 @@ """Together.ai LLM service implementation using OpenAI-compatible interface.""" from dataclasses import dataclass -from typing import Optional from loguru import logger @@ -42,8 +41,8 @@ class TogetherLLMService(OpenAILLMService): *, api_key: str, base_url: str = "https://api.together.xyz/v1", - model: Optional[str] = None, - settings: Optional[Settings] = None, + model: str | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize Together.ai LLM service. diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py index 86a3b91a6..fe4790cbb 100644 --- a/src/pipecat/services/tts_service.py +++ b/src/pipecat/services/tts_service.py @@ -10,19 +10,11 @@ import asyncio import uuid import warnings from abc import abstractmethod +from collections.abc import AsyncGenerator, AsyncIterator, Awaitable, Callable, Sequence from dataclasses import dataclass -from enum import Enum +from enum import StrEnum from typing import ( Any, - AsyncGenerator, - AsyncIterator, - Awaitable, - Callable, - Dict, - List, - Optional, - Sequence, - Tuple, ) from loguru import logger @@ -77,10 +69,10 @@ class TTSContext: """ append_to_context: bool = True - push_assistant_aggregation: Optional[bool] = False + push_assistant_aggregation: bool | None = False -class TextAggregationMode(str, Enum): +class TextAggregationMode(StrEnum): """Controls how incoming text is aggregated before TTS synthesis. Parameters: @@ -145,8 +137,8 @@ class TTSService(AIService): def __init__( self, *, - text_aggregation_mode: Optional[TextAggregationMode] = None, - aggregate_sentences: Optional[bool] = None, + text_aggregation_mode: TextAggregationMode | None = None, + aggregate_sentences: bool | None = None, # if True, TTSService will push TextFrames and LLMFullResponseEndFrames, # otherwise subclass must do it push_text_frames: bool = True, @@ -166,22 +158,21 @@ class TTSService(AIService): # (helps prevent some TTS services from vocalizing trailing punctuation) append_trailing_space: bool = False, # TTS output sample rate - sample_rate: Optional[int] = None, + sample_rate: int | None = None, # Types of text aggregations that should not be spoken. - skip_aggregator_types: Optional[List[str]] = [], + skip_aggregator_types: list[str] | None = [], # A list of callables to transform text before just before sending it to TTS. # Each callable takes the aggregated text and its type, and returns the transformed text. # To register, provide a list of tuples of (aggregation_type | '*', transform_function). - text_transforms: Optional[ - List[ - Tuple[AggregationType | str, Callable[[str, str | AggregationType], Awaitable[str]]] - ] - ] = None, + text_transforms: list[ + tuple[AggregationType | str, Callable[[str, str | AggregationType], Awaitable[str]]] + ] + | None = None, # Text filter executed after text has been aggregated. - text_filters: Optional[Sequence[BaseTextFilter]] = None, + text_filters: Sequence[BaseTextFilter] | None = None, # Audio transport destination of the generated frames. - transport_destination: Optional[str] = None, - settings: Optional[TTSSettings] = None, + transport_destination: str | None = None, + settings: TTSSettings | None = None, # if True, the context ID is reused within an LLM turn reuse_context_id_within_turn: bool = True, **kwargs, @@ -288,24 +279,24 @@ class TTSService(AIService): self._sample_rate = 0 self._text_aggregator = SimpleTextAggregator(aggregation_type=self._text_aggregation_mode) - self._skip_aggregator_types: List[str] = skip_aggregator_types or [] - self._text_transforms: List[ - Tuple[AggregationType | str, Callable[[str, AggregationType | str], Awaitable[str]]] + self._skip_aggregator_types: list[str] = skip_aggregator_types or [] + self._text_transforms: list[ + tuple[AggregationType | str, Callable[[str, AggregationType | str], Awaitable[str]]] ] = text_transforms or [] # TODO: Deprecate _text_filters when added to LLMTextProcessor self._text_filters: Sequence[BaseTextFilter] = text_filters or [] - self._transport_destination: Optional[str] = transport_destination + self._transport_destination: str | None = transport_destination self._resampler = create_stream_resampler() self._processing_text: bool = False - self._tts_contexts: Dict[str, TTSContext] = {} + self._tts_contexts: dict[str, TTSContext] = {} self._streamed_text: str = "" self._text_aggregation_metrics_started: bool = False # Word timestamp state self._initial_word_timestamp: int = -1 - self._initial_word_times: List[Tuple[str, float, Optional[str]]] = [] + self._initial_word_times: list[tuple[str, float, str | None]] = [] # PTS of the last word frame pushed via _add_word_timestamps, used to assign # correct PTS to TTSStoppedFrame and LLMFullResponseEndFrame. self._word_last_pts: int = 0 @@ -327,10 +318,10 @@ class TTSService(AIService): # they clear at different times: _turn_context_id is cleared when the LLM turn # ends (synthesis done) while _playing_context_id remains set until the audio # finishes playing. Merging them would null out the playback cursor prematurely. - self._playing_context_id: Optional[str] = None - self._turn_context_id: Optional[str] = None - self._audio_contexts: Dict[str, asyncio.Queue] = {} - self._audio_context_task: Optional[asyncio.Task] = None + self._playing_context_id: str | None = None + self._turn_context_id: str | None = None + self._audio_contexts: dict[str, asyncio.Queue] = {} + self._audio_context_task: asyncio.Task | None = None self._register_event_handler("on_connected") self._register_event_handler("on_disconnected") @@ -467,7 +458,7 @@ class TTSService(AIService): """ pass - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: """Convert a language to the service-specific language format. Args: @@ -491,7 +482,7 @@ class TTSService(AIService): return text + " " return text - async def flush_audio(self, context_id: Optional[str] = None): + async def flush_audio(self, context_id: str | None = None): """Flush any buffered audio data. Args: @@ -793,8 +784,8 @@ class TTSService(AIService): iterator: AsyncIterator[bytes], *, strip_wav_header: bool = False, - in_sample_rate: Optional[int] = None, - context_id: Optional[str] = None, + in_sample_rate: int | None = None, + context_id: str | None = None, ) -> AsyncGenerator[Frame, None]: """Stream audio frames from an async byte iterator with optional resampling. @@ -896,9 +887,9 @@ class TTSService(AIService): async def _push_tts_frames( self, src_frame: AggregatedTextFrame, - includes_inter_frame_spaces: Optional[bool] = False, - append_tts_text_to_context: Optional[bool] = True, - push_assistant_aggregation: Optional[bool] = False, + includes_inter_frame_spaces: bool | None = False, + append_tts_text_to_context: bool | None = True, + push_assistant_aggregation: bool | None = False, ): type = src_frame.aggregated_by text = src_frame.text @@ -1069,7 +1060,7 @@ class TTSService(AIService): self._initial_word_times = [] async def add_word_timestamps( - self, word_times: List[Tuple[str, float]], context_id: Optional[str] = None + self, word_times: list[tuple[str, float]], context_id: str | None = None ): """Add word timestamps for processing. @@ -1096,7 +1087,7 @@ class TTSService(AIService): await self._add_word_timestamps(word_times=word_times, context_id=context_id) async def _add_word_timestamps( - self, word_times: List[Tuple[str, float]], context_id: Optional[str] = None + self, word_times: list[tuple[str, float]], context_id: str | None = None ): """Process word timestamps directly, building and pushing TTSTextFrames inline. @@ -1194,11 +1185,11 @@ class TTSService(AIService): self._playing_context_id ) - def get_audio_contexts(self) -> List[str]: + def get_audio_contexts(self) -> list[str]: """Get a list of all available audio contexts.""" return list(self._audio_contexts.keys()) - def get_active_audio_context_id(self) -> Optional[str]: + def get_active_audio_context_id(self) -> str | None: """Get the active audio context ID. Returns: @@ -1242,7 +1233,7 @@ class TTSService(AIService): # must be emitted in-order relative to surrounding audio contexts. # None – shutdown sentinel (sent by stop()). self._serialization_queue: asyncio.Queue = asyncio.Queue() - self._audio_contexts: Dict[str, asyncio.Queue] = {} + self._audio_contexts: dict[str, asyncio.Queue] = {} self._audio_context_task = self.create_task(self._audio_context_task_handler()) async def _stop_audio_context_task(self): @@ -1342,7 +1333,7 @@ class TTSService(AIService): await self.push_error_frame(frame) else: await self.push_frame(frame) - except asyncio.TimeoutError: + except TimeoutError: # We didn't get audio, so let's consider this context finished. logger.trace(f"{self} time out on audio context {context_id}") if should_push_stop_frame and self._push_stop_frames: diff --git a/src/pipecat/services/ultravox/llm.py b/src/pipecat/services/ultravox/llm.py index 264241ca7..935562dc1 100644 --- a/src/pipecat/services/ultravox/llm.py +++ b/src/pipecat/services/ultravox/llm.py @@ -16,7 +16,7 @@ import datetime import json import uuid from dataclasses import dataclass, field -from typing import Any, Dict, List, Literal, Optional, Union +from typing import Any, Literal import aiohttp from loguru import logger @@ -94,13 +94,13 @@ class AgentInputParams(BaseModel): api_key: str agent_id: uuid.UUID - template_context: Dict[str, Any] = Field(default_factory=dict) - metadata: Dict[str, str] = Field(default_factory=dict) - output_medium: Optional[Literal["text", "voice"]] = None - max_duration: Optional[datetime.timedelta] = Field( + template_context: dict[str, Any] = Field(default_factory=dict) + metadata: dict[str, str] = Field(default_factory=dict) + output_medium: Literal["text", "voice"] | None = None + max_duration: datetime.timedelta | None = Field( default=None, ge=datetime.timedelta(seconds=10), le=datetime.timedelta(hours=1) ) - extra: Dict[str, Any] = Field(default_factory=dict) + extra: dict[str, Any] = Field(default_factory=dict) class OneShotInputParams(BaseModel): @@ -122,18 +122,18 @@ class OneShotInputParams(BaseModel): """ api_key: str - system_prompt: Optional[str] = None + system_prompt: str | None = None temperature: float = Field(default=0.0, ge=0.0, le=1.0) - model: Optional[str] = None - voice: Optional[uuid.UUID] = None - metadata: Dict[str, str] = Field(default_factory=dict) - output_medium: Optional[Literal["text", "voice"]] = None + model: str | None = None + voice: uuid.UUID | None = None + metadata: dict[str, str] = Field(default_factory=dict) + output_medium: Literal["text", "voice"] | None = None max_duration: datetime.timedelta = Field( default=datetime.timedelta(hours=1), ge=datetime.timedelta(seconds=10), le=datetime.timedelta(hours=1), ) - extra: Dict[str, Any] = Field(default_factory=dict) + extra: dict[str, Any] = Field(default_factory=dict) class JoinUrlInputParams(BaseModel): @@ -163,9 +163,9 @@ class UltravoxRealtimeLLMService(LLMService): def __init__( self, *, - params: Union[AgentInputParams, OneShotInputParams, JoinUrlInputParams], - settings: Optional[Settings] = None, - one_shot_selected_tools: Optional[ToolsSchema] = None, + params: AgentInputParams | OneShotInputParams | JoinUrlInputParams, + settings: Settings | None = None, + one_shot_selected_tools: ToolsSchema | None = None, **kwargs, ): """Initialize the Ultravox Realtime LLM service. @@ -213,11 +213,11 @@ class UltravoxRealtimeLLMService(LLMService): else: self._selected_tools = one_shot_selected_tools - self._socket: Optional[websocket_client.ClientConnection] = None - self._receive_task: Optional[asyncio.Task] = None + self._socket: websocket_client.ClientConnection | None = None + self._receive_task: asyncio.Task | None = None self._disconnecting = False self._bot_responding: Literal[None, "text", "voice"] = None - self._last_user_id: Optional[str] = None + self._last_user_id: str | None = None self._sample_rate = 48000 self._resampler = create_stream_resampler() @@ -258,7 +258,7 @@ class UltravoxRealtimeLLMService(LLMService): await self.push_error("Failed to connect to Ultravox", e, fatal=True) @staticmethod - def _output_medium_to_api(medium: Optional[Literal["text", "voice"]]) -> Optional[str]: + def _output_medium_to_api(medium: Literal["text", "voice"] | None) -> str | None: if medium == "text": return "MESSAGE_MEDIUM_TEXT" elif medium == "voice": @@ -324,8 +324,8 @@ class UltravoxRealtimeLLMService(LLMService): raise Exception(f"Ultravox API error {response.status}: {error_text}") return (await response.json())["joinUrl"] - def _to_selected_tools(self, tool: ToolsSchema) -> List[Dict[str, Any]]: - result: List[Dict[str, Any]] = [] + def _to_selected_tools(self, tool: ToolsSchema) -> list[dict[str, Any]]: + result: list[dict[str, Any]] = [] for standard_tool in tool.standard_tools: result.append( { @@ -476,7 +476,7 @@ class UltravoxRealtimeLLMService(LLMService): return await self._send({"type": "set_output_medium", "medium": output_medium}) - async def _send(self, content: Union[bytes, Dict[str, Any]]): + async def _send(self, content: bytes | dict[str, Any]): """Send content via the WebSocket connection. Args: @@ -565,7 +565,7 @@ class UltravoxRealtimeLLMService(LLMService): self._bot_responding = None async def _handle_tool_invocation( - self, tool_name: str, invocation_id: str, parameters: Dict[str, Any] + self, tool_name: str, invocation_id: str, parameters: dict[str, Any] ): await self.run_function_calls( [ @@ -590,7 +590,7 @@ class UltravoxRealtimeLLMService(LLMService): ) async def _handle_agent_transcript( - self, medium: str, text: Optional[str], delta: Optional[str], final: bool + self, medium: str, text: str | None, delta: str | None, final: bool ): if medium == "voice": # In voice mode, audio is handled by _handle_audio(). Here we push diff --git a/src/pipecat/services/vision_service.py b/src/pipecat/services/vision_service.py index 572f3b423..74d70f1d4 100644 --- a/src/pipecat/services/vision_service.py +++ b/src/pipecat/services/vision_service.py @@ -12,7 +12,7 @@ visual content. """ from abc import abstractmethod -from typing import AsyncGenerator, Optional +from collections.abc import AsyncGenerator from pipecat.frames.frames import Frame, UserImageRawFrame from pipecat.processors.frame_processor import FrameDirection @@ -28,7 +28,7 @@ class VisionService(AIService): with the AI service infrastructure for metrics and lifecycle management. """ - def __init__(self, *, settings: Optional[VisionSettings] = None, **kwargs): + def __init__(self, *, settings: VisionSettings | None = None, **kwargs): """Initialize the vision service. Args: diff --git a/src/pipecat/services/websocket_service.py b/src/pipecat/services/websocket_service.py index 9258aa90c..83ddb3746 100644 --- a/src/pipecat/services/websocket_service.py +++ b/src/pipecat/services/websocket_service.py @@ -9,7 +9,7 @@ import asyncio import time from abc import ABC, abstractmethod -from typing import Awaitable, Callable, Optional +from collections.abc import Awaitable, Callable import websockets from loguru import logger @@ -42,7 +42,7 @@ class WebsocketService(ABC): reconnect_on_error: Whether to automatically reconnect on connection errors. **kwargs: Additional arguments (unused, for compatibility). """ - self._websocket: Optional[websockets.WebSocketClientProtocol] = None + self._websocket: websockets.WebSocketClientProtocol | None = None self._reconnect_on_error = reconnect_on_error self._reconnect_in_progress: bool = False self._disconnecting: bool = False @@ -81,7 +81,7 @@ class WebsocketService(ABC): async def _try_reconnect( self, max_retries: int = 3, - report_error: Optional[Callable[[ErrorFrame], Awaitable[None]]] = None, + report_error: Callable[[ErrorFrame], Awaitable[None]] | None = None, ) -> bool: # Prevent concurrent reconnection attempts if self._reconnect_in_progress: @@ -89,7 +89,7 @@ class WebsocketService(ABC): return False self._reconnect_in_progress = True - last_exception: Optional[Exception] = None + last_exception: Exception | None = None try: for attempt in range(1, max_retries + 1): try: @@ -136,7 +136,7 @@ class WebsocketService(ABC): self, error_message: str, report_error: Callable[[ErrorFrame], Awaitable[None]], - error: Optional[Exception] = None, + error: Exception | None = None, ) -> bool: """Check if reconnection should be attempted and try if appropriate. diff --git a/src/pipecat/services/whisper/base_stt.py b/src/pipecat/services/whisper/base_stt.py index a891253ce..9ac84c41c 100644 --- a/src/pipecat/services/whisper/base_stt.py +++ b/src/pipecat/services/whisper/base_stt.py @@ -10,8 +10,8 @@ This module provides common functionality for services implementing the Whisper interface, including language mapping, metrics generation, and error handling. """ +from collections.abc import AsyncGenerator from dataclasses import dataclass, field -from typing import AsyncGenerator, Optional from loguru import logger from openai import AsyncOpenAI @@ -40,7 +40,7 @@ class BaseWhisperSTTSettings(STTSettings): temperature: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) -def language_to_whisper_language(language: Language) -> Optional[str]: +def language_to_whisper_language(language: Language) -> str | None: """Maps pipecat Language enum to Whisper API language codes. Language support for Whisper API. @@ -128,16 +128,16 @@ class BaseWhisperSTTService(SegmentedSTTService): def __init__( self, *, - model: Optional[str] = None, - api_key: Optional[str] = None, - base_url: Optional[str] = None, - language: Optional[Language] = None, - prompt: Optional[str] = None, - temperature: Optional[float] = None, + model: str | None = None, + api_key: str | None = None, + base_url: str | None = None, + language: Language | None = None, + prompt: str | None = None, + temperature: float | None = None, include_prob_metrics: bool = False, push_empty_transcripts: bool = False, - settings: Optional[Settings] = None, - ttfs_p99_latency: Optional[float] = WHISPER_TTFS_P99, + settings: Settings | None = None, + ttfs_p99_latency: float | None = WHISPER_TTFS_P99, **kwargs, ): """Initialize the Whisper STT service. @@ -217,7 +217,7 @@ class BaseWhisperSTTService(SegmentedSTTService): self._include_prob_metrics = include_prob_metrics self._push_empty_transcripts = push_empty_transcripts - def _create_client(self, api_key: Optional[str], base_url: Optional[str]): + def _create_client(self, api_key: str | None, base_url: str | None): return AsyncOpenAI(api_key=api_key, base_url=base_url) def can_generate_metrics(self) -> bool: @@ -228,7 +228,7 @@ class BaseWhisperSTTService(SegmentedSTTService): """ return True - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: """Convert from pipecat Language to service language code. Args: @@ -241,7 +241,7 @@ class BaseWhisperSTTService(SegmentedSTTService): @traced_stt async def _handle_transcription( - self, transcript: str, is_final: bool, language: Optional[Language] = None + self, transcript: str, is_final: bool, language: Language | None = None ): """Handle a transcription result with tracing.""" pass diff --git a/src/pipecat/services/whisper/stt.py b/src/pipecat/services/whisper/stt.py index ac5d90c30..7d92009d1 100644 --- a/src/pipecat/services/whisper/stt.py +++ b/src/pipecat/services/whisper/stt.py @@ -11,13 +11,14 @@ supporting both Faster Whisper and MLX Whisper backends for efficient inference. """ import asyncio +from collections.abc import AsyncGenerator from dataclasses import dataclass, field from enum import Enum -from typing import AsyncGenerator, Optional +from typing import TYPE_CHECKING import numpy as np from loguru import logger -from typing_extensions import TYPE_CHECKING, override +from typing_extensions import override from pipecat.frames.frames import ErrorFrame, Frame, TranscriptionFrame from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven @@ -96,7 +97,7 @@ class MLXModel(Enum): LARGE_V3_TURBO_Q4 = "mlx-community/whisper-large-v3-turbo-q4" -def language_to_whisper_language(language: Language) -> Optional[str]: +def language_to_whisper_language(language: Language) -> str | None: """Maps pipecat Language enum to Whisper language codes. Args: @@ -213,12 +214,12 @@ class WhisperSTTService(SegmentedSTTService): def __init__( self, *, - model: Optional[str | Model] = None, + model: str | Model | None = None, device: str = "auto", compute_type: str = "default", - no_speech_prob: Optional[float] = None, - language: Optional[Language] = None, - settings: Optional[Settings] = None, + no_speech_prob: float | None = None, + language: Language | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize the Whisper STT service. @@ -280,7 +281,7 @@ class WhisperSTTService(SegmentedSTTService): self._device = device self._compute_type = compute_type - self._model: Optional[WhisperModel] = None + self._model: WhisperModel | None = None self._load() @@ -292,7 +293,7 @@ class WhisperSTTService(SegmentedSTTService): """ return True - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: """Convert from pipecat Language to Whisper language code. Args: @@ -325,7 +326,7 @@ class WhisperSTTService(SegmentedSTTService): @traced_stt async def _handle_transcription( - self, transcript: str, is_final: bool, language: Optional[Language] = None + self, transcript: str, is_final: bool, language: Language | None = None ): """Handle a transcription result with tracing.""" pass @@ -387,11 +388,11 @@ class WhisperSTTServiceMLX(WhisperSTTService): def __init__( self, *, - model: Optional[str | MLXModel] = None, - no_speech_prob: Optional[float] = None, - language: Optional[Language] = None, - temperature: Optional[float] = None, - settings: Optional[Settings] = None, + model: str | MLXModel | None = None, + no_speech_prob: float | None = None, + language: Language | None = None, + temperature: float | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize the MLX Whisper STT service. @@ -466,7 +467,7 @@ class WhisperSTTServiceMLX(WhisperSTTService): @traced_stt async def _handle_transcription( - self, transcript: str, is_final: bool, language: Optional[Language] = None + self, transcript: str, is_final: bool, language: Language | None = None ): """Handle a transcription result with tracing.""" pass diff --git a/src/pipecat/services/whisper/utils.py b/src/pipecat/services/whisper/utils.py index ac7e0cca5..182418e3c 100644 --- a/src/pipecat/services/whisper/utils.py +++ b/src/pipecat/services/whisper/utils.py @@ -7,12 +7,11 @@ """Utility functions for extracting probability metrics from STT services.""" import math -from typing import Optional from pipecat.frames.frames import TranscriptionFrame -def extract_whisper_probability(frame: TranscriptionFrame) -> Optional[float]: +def extract_whisper_probability(frame: TranscriptionFrame) -> float | None: """Extract probability from Whisper-based TranscriptionFrame result. Works with Groq, OpenAI Whisper, or other Whisper-based services that use @@ -53,7 +52,7 @@ def extract_whisper_probability(frame: TranscriptionFrame) -> Optional[float]: return None -def extract_openai_gpt4o_probability(frame: TranscriptionFrame) -> Optional[float]: +def extract_openai_gpt4o_probability(frame: TranscriptionFrame) -> float | None: """Extract probability from OpenAI GPT-4o-transcribe TranscriptionFrame result. Args: @@ -90,7 +89,7 @@ def extract_openai_gpt4o_probability(frame: TranscriptionFrame) -> Optional[floa return None -def extract_deepgram_probability(frame: TranscriptionFrame) -> Optional[float]: +def extract_deepgram_probability(frame: TranscriptionFrame) -> float | None: """Extract probability from Deepgram TranscriptionFrame result. Args: diff --git a/src/pipecat/services/xai/llm.py b/src/pipecat/services/xai/llm.py index 0bbfb62b3..e0d84373f 100644 --- a/src/pipecat/services/xai/llm.py +++ b/src/pipecat/services/xai/llm.py @@ -12,7 +12,6 @@ and context aggregation functionality. """ from dataclasses import dataclass -from typing import Optional from loguru import logger @@ -48,8 +47,8 @@ class GrokLLMService(OpenAILLMService): *, api_key: str, base_url: str = "https://api.x.ai/v1", - model: Optional[str] = None, - settings: Optional[Settings] = None, + model: str | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize the GrokLLMService with API key and model. diff --git a/src/pipecat/services/xai/realtime/events.py b/src/pipecat/services/xai/realtime/events.py index 1f89a92f7..c5e4ab755 100644 --- a/src/pipecat/services/xai/realtime/events.py +++ b/src/pipecat/services/xai/realtime/events.py @@ -12,7 +12,7 @@ https://docs.x.ai/docs/guides/voice/agent import json import uuid -from typing import Any, Dict, List, Literal, Optional, Union +from typing import Any, Literal from pydantic import BaseModel, ConfigDict, Field @@ -82,7 +82,7 @@ class TurnDetection(BaseModel): type: Detection type, must be "server_vad" or None for manual. """ - type: Optional[Literal["server_vad"]] = "server_vad" + type: Literal["server_vad"] | None = "server_vad" # @@ -97,7 +97,7 @@ class AudioInput(BaseModel): format: The format configuration for input audio. """ - format: Optional[Union[PCMAudioFormat, PCMUAudioFormat, PCMAAudioFormat]] = None + format: PCMAudioFormat | PCMUAudioFormat | PCMAAudioFormat | None = None class AudioOutput(BaseModel): @@ -107,7 +107,7 @@ class AudioOutput(BaseModel): format: The format configuration for output audio. """ - format: Optional[Union[PCMAudioFormat, PCMUAudioFormat, PCMAAudioFormat]] = None + format: PCMAudioFormat | PCMUAudioFormat | PCMAAudioFormat | None = None class AudioConfiguration(BaseModel): @@ -118,8 +118,8 @@ class AudioConfiguration(BaseModel): output: Configuration for output audio. """ - input: Optional[AudioInput] = None - output: Optional[AudioOutput] = None + input: AudioInput | None = None + output: AudioOutput | None = None # @@ -147,7 +147,7 @@ class XSearchTool(BaseModel): """ type: Literal["x_search"] = "x_search" - allowed_x_handles: Optional[List[str]] = None + allowed_x_handles: list[str] | None = None class FileSearchTool(BaseModel): @@ -162,8 +162,8 @@ class FileSearchTool(BaseModel): """ type: Literal["file_search"] = "file_search" - vector_store_ids: List[str] - max_num_results: Optional[int] = 10 + vector_store_ids: list[str] + max_num_results: int | None = 10 class FunctionTool(BaseModel): @@ -179,11 +179,11 @@ class FunctionTool(BaseModel): type: Literal["function"] = "function" name: str description: str - parameters: Dict[str, Any] + parameters: dict[str, Any] # Union type for all Grok tools -GrokTool = Union[WebSearchTool, XSearchTool, FileSearchTool, FunctionTool, Dict[str, Any]] +GrokTool = WebSearchTool | XSearchTool | FileSearchTool | FunctionTool | dict[str, Any] # @@ -215,14 +215,14 @@ class SessionProperties(BaseModel): # Needed to support ToolSchema in tools field. model_config = ConfigDict(arbitrary_types_allowed=True) - instructions: Optional[str] = None - voice: Optional[GrokVoice | str] = "Ara" - turn_detection: Optional[TurnDetection] = Field( + instructions: str | None = None + voice: GrokVoice | str | None = "Ara" + turn_detection: TurnDetection | None = Field( default_factory=lambda: TurnDetection(type="server_vad") ) - audio: Optional[AudioConfiguration] = None + audio: AudioConfiguration | None = None # Tools can be ToolsSchema when provided by user, or list of dicts for API - tools: Optional[ToolsSchema | List[GrokTool]] = None + tools: ToolsSchema | list[GrokTool] | None = None # @@ -241,9 +241,9 @@ class ItemContent(BaseModel): """ type: Literal["text", "audio", "input_text", "input_audio", "output_text", "output_audio"] - text: Optional[str] = None - audio: Optional[str] = None # base64-encoded audio - transcript: Optional[str] = None + text: str | None = None + audio: str | None = None # base64-encoded audio + transcript: str | None = None class ConversationItem(BaseModel): @@ -263,15 +263,15 @@ class ConversationItem(BaseModel): """ id: str = Field(default_factory=lambda: str(uuid.uuid4().hex)) - object: Optional[Literal["realtime.item"]] = None + object: Literal["realtime.item"] | None = None type: Literal["message", "function_call", "function_call_output"] - status: Optional[Literal["completed", "in_progress", "incomplete"]] = None - role: Optional[Literal["user", "assistant", "system", "tool"]] = None - content: Optional[List[ItemContent]] = None - call_id: Optional[str] = None - name: Optional[str] = None - arguments: Optional[str] = None - output: Optional[str] = None + status: Literal["completed", "in_progress", "incomplete"] | None = None + role: Literal["user", "assistant", "system", "tool"] | None = None + content: list[ItemContent] | None = None + call_id: str | None = None + name: str | None = None + arguments: str | None = None + output: str | None = None class RealtimeConversation(BaseModel): @@ -293,7 +293,7 @@ class ResponseProperties(BaseModel): modalities: Output modalities for the response (text, audio, or both). """ - modalities: Optional[List[Literal["text", "audio"]]] = ["text", "audio"] + modalities: list[Literal["text", "audio"]] | None = ["text", "audio"] # @@ -312,11 +312,11 @@ class RealtimeError(BaseModel): event_id: Event ID associated with the error, if applicable. """ - type: Optional[str] = None - code: Optional[str] = "" + type: str | None = None + code: str | None = "" message: str - param: Optional[str] = None - event_id: Optional[str] = None + param: str | None = None + event_id: str | None = None # @@ -390,7 +390,7 @@ class ConversationItemCreateEvent(ClientEvent): """ type: Literal["conversation.item.create"] = "conversation.item.create" - previous_item_id: Optional[str] = None + previous_item_id: str | None = None item: ConversationItem @@ -403,7 +403,7 @@ class ResponseCreateEvent(ClientEvent): """ type: Literal["response.create"] = "response.create" - response: Optional[ResponseProperties] = None + response: ResponseProperties | None = None class ResponseCancelEvent(ClientEvent): @@ -471,7 +471,7 @@ class ConversationItemAdded(ServerEvent): """ type: Literal["conversation.item.added"] - previous_item_id: Optional[str] = None + previous_item_id: str | None = None item: ConversationItem @@ -527,7 +527,7 @@ class InputAudioBufferCommitted(ServerEvent): """ type: Literal["input_audio_buffer.committed"] - previous_item_id: Optional[str] = None + previous_item_id: str | None = None item_id: str @@ -646,11 +646,11 @@ class ResponseFunctionCallArgumentsDelta(ServerEvent): """ type: Literal["response.function_call_arguments.delta"] - response_id: Optional[str] = None - item_id: Optional[str] = None + response_id: str | None = None + item_id: str | None = None call_id: str delta: str - previous_item_id: Optional[str] = None + previous_item_id: str | None = None class ResponseFunctionCallArgumentsDone(ServerEvent): @@ -680,9 +680,9 @@ class Usage(BaseModel): output_tokens: Number of output tokens used. """ - total_tokens: Optional[int] = None - input_tokens: Optional[int] = None - output_tokens: Optional[int] = None + total_tokens: int | None = None + input_tokens: int | None = None + output_tokens: int | None = None class Response(BaseModel): @@ -699,9 +699,9 @@ class Response(BaseModel): id: str object: Literal["realtime.response"] status: Literal["completed", "in_progress", "incomplete", "cancelled", "failed"] - status_details: Optional[Any] = None - output: List[ConversationItem] - usage: Optional[Usage] = None + status_details: Any | None = None + output: list[ConversationItem] + usage: Usage | None = None class ResponseCreated(ServerEvent): @@ -727,7 +727,7 @@ class ResponseDone(ServerEvent): type: Literal["response.done"] response: Response - usage: Optional[Usage] = None + usage: Usage | None = None class ResponseOutputItemDone(ServerEvent): @@ -755,7 +755,7 @@ class ContentPart(BaseModel): """ type: str - transcript: Optional[str] = None + transcript: str | None = None class ResponseContentPartAdded(ServerEvent): diff --git a/src/pipecat/services/xai/realtime/llm.py b/src/pipecat/services/xai/realtime/llm.py index 8c7175b0a..3b2620a1f 100644 --- a/src/pipecat/services/xai/realtime/llm.py +++ b/src/pipecat/services/xai/realtime/llm.py @@ -13,9 +13,10 @@ https://docs.x.ai/docs/guides/voice/agent import base64 import json import time +from collections.abc import Mapping from dataclasses import dataclass, field from dataclasses import fields as dataclass_fields -from typing import Any, Dict, Mapping, Optional, Type +from typing import Any from loguru import logger @@ -110,7 +111,7 @@ class GrokRealtimeLLMSettings(LLMSettings): # -- apply_update override ----------------------------------------------- - def apply_update(self, delta: "GrokRealtimeLLMService.Settings") -> Dict[str, Any]: + def apply_update(self, delta: "GrokRealtimeLLMService.Settings") -> dict[str, Any]: """Merge a delta, keeping ``system_instruction`` in sync with SP. When the delta contains ``session_properties``, it **replaces** the @@ -142,7 +143,7 @@ class GrokRealtimeLLMSettings(LLMSettings): @classmethod def from_mapping( - cls: Type["GrokRealtimeLLMService.Settings"], settings: Mapping[str, Any] + cls: type["GrokRealtimeLLMService.Settings"], settings: Mapping[str, Any] ) -> "GrokRealtimeLLMService.Settings": """Build a delta from a plain dict, routing SP keys into ``session_properties``. @@ -153,9 +154,9 @@ class GrokRealtimeLLMSettings(LLMSettings): # Determine which keys belong to our own dataclass fields. own_field_names = {f.name for f in dataclass_fields(cls)} - {"extra"} - top: Dict[str, Any] = {} - sp_dict: Dict[str, Any] = {} - extra: Dict[str, Any] = {} + top: dict[str, Any] = {} + sp_dict: dict[str, Any] = {} + extra: dict[str, Any] = {} sp_keys = set(events.SessionProperties.model_fields.keys()) @@ -204,8 +205,8 @@ class GrokRealtimeLLMService(LLMService): *, api_key: str, base_url: str = "wss://api.x.ai/v1/realtime", - session_properties: Optional[events.SessionProperties] = None, - settings: Optional[Settings] = None, + session_properties: events.SessionProperties | None = None, + settings: Settings | None = None, start_audio_paused: bool = False, **kwargs, ): @@ -308,7 +309,7 @@ class GrokRealtimeLLMService(LLMService): """ self._audio_input_paused = paused - def _get_configured_sample_rate(self, direction: str) -> Optional[int]: + def _get_configured_sample_rate(self, direction: str) -> int | None: """Get manually configured sample rate for input or output. Args: diff --git a/src/pipecat/services/xai/tts.py b/src/pipecat/services/xai/tts.py index c580ce912..17f67cf9a 100644 --- a/src/pipecat/services/xai/tts.py +++ b/src/pipecat/services/xai/tts.py @@ -10,8 +10,8 @@ Uses xAI's HTTP TTS endpoint documented at: https://docs.x.ai/developers/model-capabilities/audio/text-to-speech """ +from collections.abc import AsyncGenerator from dataclasses import dataclass -from typing import AsyncGenerator, Optional import aiohttp from loguru import logger @@ -23,7 +23,7 @@ from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts -def language_to_xai_language(language: Language) -> Optional[str]: +def language_to_xai_language(language: Language) -> str | None: """Convert a Language enum to xAI language code. Args: @@ -83,10 +83,10 @@ class XAIHttpTTSService(TTSService): *, api_key: str, base_url: str = "https://api.x.ai/v1/tts", - sample_rate: Optional[int] = None, - encoding: Optional[str] = "pcm", - aiohttp_session: Optional[aiohttp.ClientSession] = None, - settings: Optional[Settings] = None, + sample_rate: int | None = None, + encoding: str | None = "pcm", + aiohttp_session: aiohttp.ClientSession | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize the xAI TTS service. @@ -127,7 +127,7 @@ class XAIHttpTTSService(TTSService): """Check if this service can generate processing metrics.""" return True - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: """Convert a Language enum to xAI language format. Args: diff --git a/src/pipecat/services/xtts/tts.py b/src/pipecat/services/xtts/tts.py index b164f8945..c39d79b0b 100644 --- a/src/pipecat/services/xtts/tts.py +++ b/src/pipecat/services/xtts/tts.py @@ -10,8 +10,9 @@ This module provides integration with Coqui XTTS streaming server for text-to-speech synthesis using local Docker deployment. """ +from collections.abc import AsyncGenerator from dataclasses import dataclass -from typing import Any, AsyncGenerator, Dict, Optional +from typing import Any import aiohttp from loguru import logger @@ -36,7 +37,7 @@ from pipecat.utils.tracing.service_decorators import traced_tts # https://github.com/coqui-ai/xtts-streaming-server -def language_to_xtts_language(language: Language) -> Optional[str]: +def language_to_xtts_language(language: Language) -> str | None: """Convert a Language enum to XTTS language code. Args: @@ -89,12 +90,12 @@ class XTTSService(TTSService): def __init__( self, *, - voice_id: Optional[str] = None, + voice_id: str | None = None, base_url: str, aiohttp_session: aiohttp.ClientSession, language: Language = Language.EN, - sample_rate: Optional[int] = None, - settings: Optional[Settings] = None, + sample_rate: int | None = None, + settings: Settings | None = None, **kwargs, ): """Initialize the XTTS service. @@ -149,7 +150,7 @@ class XTTSService(TTSService): # Init-only fields (not runtime-updatable) self._base_url = base_url - self._studio_speakers: Optional[Dict[str, Any]] = None + self._studio_speakers: dict[str, Any] | None = None self._aiohttp_session = aiohttp_session self._resampler = create_stream_resampler() @@ -162,7 +163,7 @@ class XTTSService(TTSService): """ return True - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: """Convert a Language enum to XTTS service language format. Args: diff --git a/src/pipecat/tests/utils.py b/src/pipecat/tests/utils.py index c837a58b7..23df0ff81 100644 --- a/src/pipecat/tests/utils.py +++ b/src/pipecat/tests/utils.py @@ -7,8 +7,8 @@ """Testing utilities for Pipecat pipeline components.""" import asyncio +from collections.abc import Awaitable, Callable, Sequence from dataclasses import dataclass -from typing import Awaitable, Callable, List, Optional, Sequence, Tuple from pipecat.frames.frames import ( EndFrame, @@ -124,15 +124,15 @@ async def run_test( processor: FrameProcessor, *, enable_rtvi: bool = False, - expected_down_frames: Optional[Sequence[type]] = None, - expected_up_frames: Optional[Sequence[type]] = None, + expected_down_frames: Sequence[type] | None = None, + expected_up_frames: Sequence[type] | None = None, frames_to_send: Sequence[Frame], frames_to_send_direction: FrameDirection = FrameDirection.DOWNSTREAM, ignore_start: bool = True, - observers: Optional[List[BaseObserver]] = None, - pipeline_params: Optional[PipelineParams] = None, + observers: list[BaseObserver] | None = None, + pipeline_params: PipelineParams | None = None, send_end_frame: bool = True, -) -> Tuple[Sequence[Frame], Sequence[Frame]]: +) -> tuple[Sequence[Frame], Sequence[Frame]]: """Run a test pipeline with the specified processor and validate frame flow. This function creates a test pipeline with the given processor, sends the diff --git a/src/pipecat/transcriptions/language.py b/src/pipecat/transcriptions/language.py index 1980590e3..1606bfeac 100644 --- a/src/pipecat/transcriptions/language.py +++ b/src/pipecat/transcriptions/language.py @@ -11,24 +11,10 @@ and BCP 47 standards, supporting both language-only and language-region combinations for various speech and text processing services. """ -import sys -from enum import Enum +from enum import StrEnum from loguru import logger -if sys.version_info < (3, 11): - - class StrEnum(str, Enum): - """String enumeration base class for Python < 3.11 compatibility.""" - - def __new__(cls, value): - """Create a new instance of the StrEnum.""" - obj = str.__new__(cls, value) - obj._value_ = value - return obj -else: - from enum import StrEnum - class Language(StrEnum): """Language codes for speech and text processing services. diff --git a/src/pipecat/transports/base_input.py b/src/pipecat/transports/base_input.py index 4fb4a1736..02b090181 100644 --- a/src/pipecat/transports/base_input.py +++ b/src/pipecat/transports/base_input.py @@ -260,6 +260,6 @@ class BaseInputTransport(FrameProcessor): await self.push_frame(frame) self._audio_in_queue.task_done() - except asyncio.TimeoutError: + except TimeoutError: if not audio_received: continue diff --git a/src/pipecat/transports/base_output.py b/src/pipecat/transports/base_output.py index fe1044f51..4e946bbb1 100644 --- a/src/pipecat/transports/base_output.py +++ b/src/pipecat/transports/base_output.py @@ -13,8 +13,9 @@ output processing, including frame buffering, mixing, timing, and media streamin import asyncio import itertools import time +from collections.abc import AsyncGenerator, Mapping from concurrent.futures import ThreadPoolExecutor -from typing import Any, AsyncGenerator, Dict, List, Mapping, Optional +from typing import Any from loguru import logger from PIL import Image @@ -87,7 +88,7 @@ class BaseOutputTransport(FrameProcessor): # We will have one media sender per output frame destination. This allow # us to send multiple streams at the same time if the transport allows # it. - self._media_senders: Dict[Any, "BaseOutputTransport.MediaSender"] = {} + self._media_senders: dict[Any, BaseOutputTransport.MediaSender] = {} @property def sample_rate(self) -> int: @@ -383,7 +384,7 @@ class BaseOutputTransport(FrameProcessor): self, transport: "BaseOutputTransport", *, - destination: Optional[str], + destination: str | None, sample_rate: int, audio_chunk_size: int, params: TransportParams, @@ -414,7 +415,7 @@ class BaseOutputTransport(FrameProcessor): # The user can provide a single mixer, to be used by the default # destination, or a destination/mixer mapping. - self._mixer: Optional[BaseAudioMixer] = None + self._mixer: BaseAudioMixer | None = None # These are the images that we should send at our desired framerate. self._video_images = None @@ -431,9 +432,9 @@ class BaseOutputTransport(FrameProcessor): # Last time the bot actually spoke. self._bot_speech_last_time = 0 - self._audio_task: Optional[asyncio.Task] = None - self._video_task: Optional[asyncio.Task] = None - self._clock_task: Optional[asyncio.Task] = None + self._audio_task: asyncio.Task | None = None + self._video_task: asyncio.Task | None = None + self._clock_task: asyncio.Task | None = None @property def sample_rate(self) -> int: @@ -753,7 +754,7 @@ class BaseOutputTransport(FrameProcessor): ) yield frame self._audio_queue.task_done() - except asyncio.TimeoutError: + except TimeoutError: # Fallback: notify the bot stopped speaking upstream if necessary based on timeout. await self._bot_stopped_speaking() @@ -856,7 +857,7 @@ class BaseOutputTransport(FrameProcessor): """ self._video_images = itertools.cycle([image]) - async def _set_video_images(self, images: List[OutputImageRawFrame]): + async def _set_video_images(self, images: list[OutputImageRawFrame]): """Set multiple video images for cycling output. Args: diff --git a/src/pipecat/transports/base_transport.py b/src/pipecat/transports/base_transport.py index ed14f6eb9..829fc6fe1 100644 --- a/src/pipecat/transports/base_transport.py +++ b/src/pipecat/transports/base_transport.py @@ -12,7 +12,7 @@ functionality. """ from abc import abstractmethod -from typing import List, Mapping, Optional +from collections.abc import Mapping from pydantic import BaseModel, ConfigDict, Field @@ -57,18 +57,18 @@ class TransportParams(BaseModel): model_config = ConfigDict(arbitrary_types_allowed=True) audio_out_enabled: bool = False - audio_out_sample_rate: Optional[int] = None + audio_out_sample_rate: int | None = None audio_out_channels: int = 1 audio_out_bitrate: int = 96000 audio_out_10ms_chunks: int = 4 - audio_out_mixer: Optional[BaseAudioMixer | Mapping[Optional[str], BaseAudioMixer]] = None - audio_out_destinations: List[str] = Field(default_factory=list) + audio_out_mixer: BaseAudioMixer | Mapping[str | None, BaseAudioMixer] | None = None + audio_out_destinations: list[str] = Field(default_factory=list) audio_out_end_silence_secs: int = 2 audio_out_auto_silence: bool = True audio_in_enabled: bool = False - audio_in_sample_rate: Optional[int] = None + audio_in_sample_rate: int | None = None audio_in_channels: int = 1 - audio_in_filter: Optional[BaseAudioFilter] = None + audio_in_filter: BaseAudioFilter | None = None audio_in_stream_on_start: bool = True audio_in_passthrough: bool = True video_in_enabled: bool = False @@ -79,8 +79,8 @@ class TransportParams(BaseModel): video_out_bitrate: int = 800000 video_out_framerate: int = 30 video_out_color_format: str = "RGB" - video_out_codec: Optional[str] = None - video_out_destinations: List[str] = Field(default_factory=list) + video_out_codec: str | None = None + video_out_destinations: list[str] = Field(default_factory=list) class BaseTransport(BaseObject): @@ -93,9 +93,9 @@ class BaseTransport(BaseObject): def __init__( self, *, - name: Optional[str] = None, - input_name: Optional[str] = None, - output_name: Optional[str] = None, + name: str | None = None, + input_name: str | None = None, + output_name: str | None = None, ): """Initialize the base transport. diff --git a/src/pipecat/transports/daily/transport.py b/src/pipecat/transports/daily/transport.py index b58701a0b..7084f1d15 100644 --- a/src/pipecat/transports/daily/transport.py +++ b/src/pipecat/transports/daily/transport.py @@ -13,10 +13,11 @@ real-time communication features. import asyncio import time +from collections.abc import Awaitable, Callable, Mapping from concurrent.futures import CancelledError as FuturesCancelledError from concurrent.futures import ThreadPoolExecutor from dataclasses import dataclass, field -from typing import Any, Awaitable, Callable, Dict, Mapping, Optional, Tuple +from typing import Any import aiohttp from loguru import logger @@ -87,7 +88,7 @@ class DailyOutputTransportMessageFrame(OutputTransportMessageFrame): participant_id: Optional ID of the participant this message is for/from. """ - participant_id: Optional[str] = None + participant_id: str | None = None @dataclass @@ -98,7 +99,7 @@ class DailyOutputTransportMessageUrgentFrame(OutputTransportMessageUrgentFrame): participant_id: Optional ID of the participant this message is for/from. """ - participant_id: Optional[str] = None + participant_id: str | None = None @dataclass @@ -109,7 +110,7 @@ class DailyInputTransportMessageFrame(InputTransportMessageFrame): participant_id: Optional ID of the participant this message is for/from. """ - participant_id: Optional[str] = None + participant_id: str | None = None @dataclass @@ -171,9 +172,9 @@ class DailyOutputDTMFFrame(OutputDTMFFrame): or ``auto``). When ``None``, Daily's default method is used. """ - session_id: Optional[str] = None - digit_duration_ms: Optional[int] = None - method: Optional[str] = None + session_id: str | None = None + digit_duration_ms: int | None = None + method: str | None = None @dataclass @@ -194,9 +195,9 @@ class DailyOutputDTMFUrgentFrame(OutputDTMFUrgentFrame): or ``auto``). When ``None``, Daily's default method is used. """ - session_id: Optional[str] = None - digit_duration_ms: Optional[int] = None - method: Optional[str] = None + session_id: str | None = None + digit_duration_ms: int | None = None + method: str | None = None class WebRTCVADAnalyzer(VADAnalyzer): @@ -205,7 +206,7 @@ class WebRTCVADAnalyzer(VADAnalyzer): Implements voice activity detection using Daily's native WebRTC VAD. """ - def __init__(self, *, sample_rate: Optional[int] = None, params: Optional[VADParams] = None): + def __init__(self, *, sample_rate: int | None = None, params: VADParams | None = None): """Initialize the WebRTC VAD analyzer. Args: @@ -295,7 +296,7 @@ class DailyCustomVideoTrackParams(BaseModel): width: int = 1024 height: int = 768 color_format: str = "RGB" - send_settings: Optional[Dict[str, Any]] = None + send_settings: dict[str, Any] | None = None class DailyCustomAudioTrackParams(BaseModel): @@ -311,9 +312,9 @@ class DailyCustomAudioTrackParams(BaseModel): See https://reference-python.daily.co/types.html#audiopublishingsettings """ - sample_rate: Optional[int] = None + sample_rate: int | None = None channels: int = 1 - send_settings: Optional[Dict[str, Any]] = None + send_settings: dict[str, Any] | None = None class DailyParams(TransportParams): @@ -336,9 +337,9 @@ class DailyParams(TransportParams): api_key: str = "" audio_in_user_tracks: bool = True camera_out_enabled: bool = True - custom_audio_track_params: Optional[Mapping[str, DailyCustomAudioTrackParams]] = None - custom_video_track_params: Optional[Mapping[str, DailyCustomVideoTrackParams]] = None - dialin_settings: Optional[DailyDialinSettings] = None + custom_audio_track_params: Mapping[str, DailyCustomAudioTrackParams] | None = None + custom_video_track_params: Mapping[str, DailyCustomVideoTrackParams] | None = None + dialin_settings: DailyDialinSettings | None = None microphone_out_enabled: bool = True transcription_enabled: bool = False transcription_settings: DailyTranscriptionSettings = DailyTranscriptionSettings() @@ -482,7 +483,7 @@ class DailyTransportClient(EventHandler): def __init__( self, room_url: str, - token: Optional[str], + token: str | None, bot_name: str, params: DailyParams, callbacks: DailyCallbacks, @@ -505,7 +506,7 @@ class DailyTransportClient(EventHandler): Daily.init() self._room_url: str = room_url - self._token: Optional[str] = token + self._token: str | None = token self._bot_name: str = bot_name self._params: DailyParams = params self._callbacks = callbacks @@ -524,7 +525,7 @@ class DailyTransportClient(EventHandler): self._joined_event = asyncio.Event() self._leave_counter = 0 - self._task_manager: Optional[BaseTaskManager] = None + self._task_manager: BaseTaskManager | None = None # We use the executor to cleanup the client. We just do it from one # place, so only one thread is really needed. @@ -550,11 +551,11 @@ class DailyTransportClient(EventHandler): self._in_sample_rate = 0 self._out_sample_rate = 0 - self._speaker: Optional[VirtualSpeakerDevice] = None - self._camera_track: Optional[DailyVideoTrack] = None - self._microphone_track: Optional[DailyAudioTrack] = None - self._custom_audio_tracks: Dict[str, DailyAudioTrack] = {} - self._custom_video_tracks: Dict[str, DailyVideoTrack] = {} + self._speaker: VirtualSpeakerDevice | None = None + self._camera_track: DailyVideoTrack | None = None + self._microphone_track: DailyAudioTrack | None = None + self._custom_audio_tracks: dict[str, DailyAudioTrack] = {} + self._custom_video_tracks: dict[str, DailyVideoTrack] = {} def _speaker_name(self): """Generate a unique virtual speaker name for this client instance.""" @@ -598,7 +599,7 @@ class DailyTransportClient(EventHandler): async def send_message( self, frame: OutputTransportMessageFrame | OutputTransportMessageUrgentFrame - ) -> Optional[CallClientError]: + ) -> CallClientError | None: """Send an application message to participants. Args: @@ -623,7 +624,7 @@ class DailyTransportClient(EventHandler): ) return await future - async def read_next_audio_frame(self) -> Optional[InputAudioRawFrame]: + async def read_next_audio_frame(self) -> InputAudioRawFrame | None: """Reads the next 20ms audio frame from the virtual speaker.""" if not self._speaker: return None @@ -647,9 +648,7 @@ class DailyTransportClient(EventHandler): await asyncio.sleep(0.01) return None - async def register_audio_destination( - self, destination: str, auto_silence: Optional[bool] = True - ): + async def register_audio_destination(self, destination: str, auto_silence: bool | None = True): """Register a custom audio destination for multi-track output. Args: @@ -661,7 +660,7 @@ class DailyTransportClient(EventHandler): self._custom_audio_tracks[destination] = await self.add_custom_audio_track( destination, params=params, auto_silence=auto_silence ) - publishing: Dict[str, Any] = {"customAudio": {destination: True}} + publishing: dict[str, Any] = {"customAudio": {destination: True}} if params and params.send_settings: publishing["customAudio"][destination] = {"sendSettings": params.send_settings} self._client.update_publishing(publishing) @@ -676,7 +675,7 @@ class DailyTransportClient(EventHandler): self._custom_video_tracks[destination] = await self.add_custom_video_track( destination, params=params ) - publishing: Dict[str, Any] = {"customVideo": {destination: True}} + publishing: dict[str, Any] = {"customVideo": {destination: True}} if params and params.send_settings: publishing["customVideo"][destination] = {"sendSettings": params.send_settings} self._client.update_publishing(publishing) @@ -693,7 +692,7 @@ class DailyTransportClient(EventHandler): future = self._get_event_loop().create_future() destination = frame.transport_destination - audio_source: Optional[CustomAudioSource] = None + audio_source: CustomAudioSource | None = None if not destination and self._microphone_track: audio_source = self._microphone_track.source elif destination and destination in self._custom_audio_tracks: @@ -719,7 +718,7 @@ class DailyTransportClient(EventHandler): True if the video frame was written successfully, False otherwise. """ destination = frame.transport_destination - video_source: Optional[CustomVideoSource] = None + video_source: CustomVideoSource | None = None if not destination and self._camera_track: video_source = self._camera_track.source elif destination and destination in self._custom_video_tracks: @@ -992,7 +991,7 @@ class DailyTransportClient(EventHandler): """ return self._client.participant_counts() - async def start_dialout(self, settings) -> Tuple[str, Optional[CallClientError]]: + async def start_dialout(self, settings) -> tuple[str, CallClientError | None]: """Start a dial-out call to a phone number. Args: @@ -1006,7 +1005,7 @@ class DailyTransportClient(EventHandler): self._client.start_dialout(settings, completion=completion_callback(future)) return await future - async def stop_dialout(self, participant_id) -> Optional[CallClientError]: + async def stop_dialout(self, participant_id) -> CallClientError | None: """Stop a dial-out call for a specific participant. Args: @@ -1019,7 +1018,7 @@ class DailyTransportClient(EventHandler): self._client.stop_dialout(participant_id, completion=completion_callback(future)) return await future - async def send_dtmf(self, settings) -> Optional[CallClientError]: + async def send_dtmf(self, settings) -> CallClientError | None: """Send DTMF tones during a call. Args: @@ -1039,7 +1038,7 @@ class DailyTransportClient(EventHandler): self._client.send_dtmf(settings, completion=completion_callback(future)) return await future - async def sip_call_transfer(self, settings) -> Optional[CallClientError]: + async def sip_call_transfer(self, settings) -> CallClientError | None: """Transfer a SIP call to another destination. Args: @@ -1061,7 +1060,7 @@ class DailyTransportClient(EventHandler): self._client.sip_call_transfer(settings, completion=completion_callback(future)) return await future - async def sip_refer(self, settings) -> Optional[CallClientError]: + async def sip_refer(self, settings) -> CallClientError | None: """Send a SIP REFER request. Args: @@ -1076,7 +1075,7 @@ class DailyTransportClient(EventHandler): async def start_recording( self, streaming_settings, stream_id, force_new - ) -> Tuple[str, Optional[CallClientError]]: + ) -> tuple[str, CallClientError | None]: """Start recording the call. Args: @@ -1094,7 +1093,7 @@ class DailyTransportClient(EventHandler): ) return await future - async def stop_recording(self, stream_id) -> Optional[CallClientError]: + async def stop_recording(self, stream_id) -> CallClientError | None: """Stop recording the call. Args: @@ -1107,7 +1106,7 @@ class DailyTransportClient(EventHandler): self._client.stop_recording(stream_id, completion=completion_callback(future)) return await future - async def start_transcription(self, settings) -> Optional[CallClientError]: + async def start_transcription(self, settings) -> CallClientError | None: """Start transcription for the call. Args: @@ -1123,7 +1122,7 @@ class DailyTransportClient(EventHandler): self._client.start_transcription(settings=settings, completion=completion_callback(future)) return await future - async def stop_transcription(self) -> Optional[CallClientError]: + async def stop_transcription(self) -> CallClientError | None: """Stop transcription for the call. Returns: @@ -1137,8 +1136,8 @@ class DailyTransportClient(EventHandler): return await future async def send_prebuilt_chat_message( - self, message: str, user_name: Optional[str] = None - ) -> Optional[CallClientError]: + self, message: str, user_name: str | None = None + ) -> CallClientError | None: """Send a chat message to Daily's Prebuilt main room. Args: @@ -1250,8 +1249,8 @@ class DailyTransportClient(EventHandler): async def add_custom_audio_track( self, track_name: str, - params: Optional[DailyCustomAudioTrackParams] = None, - auto_silence: Optional[bool] = True, + params: DailyCustomAudioTrackParams | None = None, + auto_silence: bool | None = True, ) -> DailyAudioTrack: """Add a custom audio track for multi-stream output. @@ -1286,7 +1285,7 @@ class DailyTransportClient(EventHandler): return track - async def remove_custom_audio_track(self, track_name: str) -> Optional[CallClientError]: + async def remove_custom_audio_track(self, track_name: str) -> CallClientError | None: """Remove a custom audio track. Args: @@ -1305,7 +1304,7 @@ class DailyTransportClient(EventHandler): async def add_custom_video_track( self, track_name: str, - params: Optional[DailyCustomVideoTrackParams] = None, + params: DailyCustomVideoTrackParams | None = None, ) -> DailyVideoTrack: """Add a custom video track for multi-stream output. @@ -1336,7 +1335,7 @@ class DailyTransportClient(EventHandler): return DailyVideoTrack(source=video_source, track=video_track) - async def remove_custom_video_track(self, track_name: str) -> Optional[CallClientError]: + async def remove_custom_video_track(self, track_name: str) -> CallClientError | None: """Remove a custom video track. Args: @@ -1354,7 +1353,7 @@ class DailyTransportClient(EventHandler): async def update_transcription( self, participants=None, instance_id=None - ) -> Optional[CallClientError]: + ) -> CallClientError | None: """Update transcription settings for specific participants. Args: @@ -1372,7 +1371,7 @@ class DailyTransportClient(EventHandler): async def update_subscriptions( self, participant_settings=None, profile_settings=None - ) -> Optional[CallClientError]: + ) -> CallClientError | None: """Update media subscription settings. Args: @@ -1392,7 +1391,7 @@ class DailyTransportClient(EventHandler): async def update_publishing( self, publishing_settings: Mapping[str, Any] - ) -> Optional[CallClientError]: + ) -> CallClientError | None: """Update media publishing settings. Args: @@ -1410,7 +1409,7 @@ class DailyTransportClient(EventHandler): async def update_remote_participants( self, remote_participants: Mapping[str, Any] - ) -> Optional[CallClientError]: + ) -> CallClientError | None: """Update settings for remote participants. Args: @@ -1757,7 +1756,7 @@ class DailyInputTransport(BaseInputTransport): self._capture_participant_audio = [] # Audio task when using a virtual speaker (i.e. no user tracks). - self._audio_in_task: Optional[asyncio.Task] = None + self._audio_in_task: asyncio.Task | None = None async def start_audio_in_streaming(self): """Start receiving audio from participants.""" @@ -2192,7 +2191,7 @@ class DailyOutputTransport(BaseOutputTransport): if not frame.buttons: return - settings: Dict[str, Any] = {"tones": frame.to_string()} + settings: dict[str, Any] = {"tones": frame.to_string()} if isinstance(frame, (DailyOutputDTMFFrame, DailyOutputDTMFUrgentFrame)): if frame.session_id is not None: settings["sessionId"] = frame.session_id @@ -2272,11 +2271,11 @@ class DailyTransport(BaseTransport): def __init__( self, room_url: str, - token: Optional[str], + token: str | None, bot_name: str, - params: Optional[DailyParams] = None, - input_name: Optional[str] = None, - output_name: Optional[str] = None, + params: DailyParams | None = None, + input_name: str | None = None, + output_name: str | None = None, ): """Initialize the Daily transport. @@ -2326,8 +2325,8 @@ class DailyTransport(BaseTransport): self._client = DailyTransportClient( room_url, token, bot_name, self._params, callbacks, self.name ) - self._input: Optional[DailyInputTransport] = None - self._output: Optional[DailyOutputTransport] = None + self._input: DailyInputTransport | None = None + self._output: DailyOutputTransport | None = None self._other_participant_has_joined = False @@ -2459,7 +2458,7 @@ class DailyTransport(BaseTransport): """ return self._client.participant_counts() - async def send_dtmf(self, settings) -> Optional[CallClientError]: + async def send_dtmf(self, settings) -> CallClientError | None: """Send DTMF tones during a call. Args: @@ -2475,7 +2474,7 @@ class DailyTransport(BaseTransport): logger.error(f"Unable to send DTMF: {error}") return error - async def start_dialout(self, settings=None) -> Tuple[str, Optional[CallClientError]]: + async def start_dialout(self, settings=None) -> tuple[str, CallClientError | None]: """Start a dial-out call to a phone number. Args: @@ -2492,7 +2491,7 @@ class DailyTransport(BaseTransport): logger.error(f"Unable to start dialout: {error}") return session_id, error - async def stop_dialout(self, participant_id) -> Optional[CallClientError]: + async def stop_dialout(self, participant_id) -> CallClientError | None: """Stop a dial-out call for a specific participant. Args: @@ -2508,7 +2507,7 @@ class DailyTransport(BaseTransport): logger.error(f"Unable to stop dialout: {error}") return error - async def sip_call_transfer(self, settings) -> Optional[CallClientError]: + async def sip_call_transfer(self, settings) -> CallClientError | None: """Transfer a SIP call to another destination. Args: @@ -2524,7 +2523,7 @@ class DailyTransport(BaseTransport): logger.error(f"Unable to transfer SIP call: {error}") return error - async def sip_refer(self, settings) -> Optional[CallClientError]: + async def sip_refer(self, settings) -> CallClientError | None: """Send a SIP REFER request. Args: @@ -2542,7 +2541,7 @@ class DailyTransport(BaseTransport): async def start_recording( self, streaming_settings=None, stream_id=None, force_new=None - ) -> Tuple[str, Optional[CallClientError]]: + ) -> tuple[str, CallClientError | None]: """Start recording the call. Args: @@ -2563,7 +2562,7 @@ class DailyTransport(BaseTransport): logger.error(f"Unable to start recording: {error}") return r_id, error - async def stop_recording(self, stream_id=None) -> Optional[CallClientError]: + async def stop_recording(self, stream_id=None) -> CallClientError | None: """Stop recording the call. Args: @@ -2579,7 +2578,7 @@ class DailyTransport(BaseTransport): logger.error(f"Unable to stop recording: {error}") return error - async def start_transcription(self, settings=None) -> Optional[CallClientError]: + async def start_transcription(self, settings=None) -> CallClientError | None: """Start transcription for the call. Args: @@ -2595,7 +2594,7 @@ class DailyTransport(BaseTransport): logger.error(f"Unable to start transcription: {error}") return error - async def stop_transcription(self) -> Optional[CallClientError]: + async def stop_transcription(self) -> CallClientError | None: """Stop transcription for the call. Returns: @@ -2609,8 +2608,8 @@ class DailyTransport(BaseTransport): return error async def send_prebuilt_chat_message( - self, message: str, user_name: Optional[str] = None - ) -> Optional[CallClientError]: + self, message: str, user_name: str | None = None + ) -> CallClientError | None: """Send a chat message to Daily's Prebuilt main room. Args: @@ -2671,7 +2670,7 @@ class DailyTransport(BaseTransport): async def update_publishing( self, publishing_settings: Mapping[str, Any] - ) -> Optional[CallClientError]: + ) -> CallClientError | None: """Update media publishing settings. Args: @@ -2689,7 +2688,7 @@ class DailyTransport(BaseTransport): async def update_subscriptions( self, participant_settings=None, profile_settings=None - ) -> Optional[CallClientError]: + ) -> CallClientError | None: """Update media subscription settings. Args: @@ -2712,7 +2711,7 @@ class DailyTransport(BaseTransport): async def update_remote_participants( self, remote_participants: Mapping[str, Any] - ) -> Optional[CallClientError]: + ) -> CallClientError | None: """Update settings for remote participants. Args: @@ -2822,7 +2821,7 @@ class DailyTransport(BaseTransport): return logger.debug("Event dialin-ready was handled successfully") - except asyncio.TimeoutError: + except TimeoutError: logger.error(f"Timeout handling dialin-ready event ({url})") except Exception as e: logger.error(f"Error handling dialin-ready event ({url}): {e}") diff --git a/src/pipecat/transports/daily/utils.py b/src/pipecat/transports/daily/utils.py index 8c7526357..8926c406e 100644 --- a/src/pipecat/transports/daily/utils.py +++ b/src/pipecat/transports/daily/utils.py @@ -10,7 +10,7 @@ Methods that wrap the Daily API to create rooms, check room URLs, and get meetin """ import time -from typing import Any, Dict, List, Literal, Optional +from typing import Any, Literal from urllib.parse import urlparse import aiohttp @@ -35,8 +35,8 @@ class DailyRoomSipParams(BaseModel): video: bool = False sip_mode: str = "dial-in" num_endpoints: int = 1 - codecs: Optional[Dict[str, List[str]]] = None - provider: Optional[str] = None + codecs: dict[str, list[str]] | None = None + provider: str | None = None class RecordingsBucketConfig(BaseModel): @@ -102,20 +102,20 @@ class DailyRoomProperties(BaseModel): model_config = ConfigDict(extra="allow") - exp: Optional[float] = None + exp: float | None = None enable_chat: bool = False enable_prejoin_ui: bool = False enable_emoji_reactions: bool = False eject_at_room_exp: bool = False - enable_dialout: Optional[bool] = None - enable_recording: Optional[Literal["cloud", "cloud-audio-only", "local", "raw-tracks"]] = None - enable_transcription_storage: Optional[bool] = None - geo: Optional[str] = None - max_participants: Optional[int] = None - recordings_bucket: Optional[RecordingsBucketConfig] = None - transcription_bucket: Optional[TranscriptionBucketConfig] = None - sip: Optional[DailyRoomSipParams] = None - sip_uri: Optional[Dict[str, Any]] = None + enable_dialout: bool | None = None + enable_recording: Literal["cloud", "cloud-audio-only", "local", "raw-tracks"] | None = None + enable_transcription_storage: bool | None = None + geo: str | None = None + max_participants: int | None = None + recordings_bucket: RecordingsBucketConfig | None = None + transcription_bucket: TranscriptionBucketConfig | None = None + sip: DailyRoomSipParams | None = None + sip_uri: dict[str, Any] | None = None start_video_off: bool = False @property @@ -140,7 +140,7 @@ class DailyRoomParams(BaseModel): properties: Room configuration properties. """ - name: Optional[str] = None + name: str | None = None privacy: Literal["private", "public"] = "public" properties: DailyRoomProperties = Field(default_factory=DailyRoomProperties) @@ -191,21 +191,21 @@ class DailyMeetingTokenProperties(BaseModel): permissions: Specifies the initial default permissions for a non-meeting-owner participant. """ - room_name: Optional[str] = None - eject_at_token_exp: Optional[bool] = None - eject_after_elapsed: Optional[int] = None - nbf: Optional[int] = None - exp: Optional[int] = None - is_owner: Optional[bool] = None - user_name: Optional[str] = None - user_id: Optional[str] = None - enable_screenshare: Optional[bool] = None - start_video_off: Optional[bool] = None - start_audio_off: Optional[bool] = None - enable_recording: Optional[Literal["cloud", "cloud-audio-only", "local", "raw-tracks"]] = None - enable_prejoin_ui: Optional[bool] = None - start_cloud_recording: Optional[bool] = None - permissions: Optional[Dict[str, Any]] = None + room_name: str | None = None + eject_at_token_exp: bool | None = None + eject_after_elapsed: int | None = None + nbf: int | None = None + exp: int | None = None + is_owner: bool | None = None + user_name: str | None = None + user_id: str | None = None + enable_screenshare: bool | None = None + start_video_off: bool | None = None + start_audio_off: bool | None = None + enable_recording: Literal["cloud", "cloud-audio-only", "local", "raw-tracks"] | None = None + enable_prejoin_ui: bool | None = None + start_cloud_recording: bool | None = None + permissions: dict[str, Any] | None = None class DailyMeetingTokenParams(BaseModel): @@ -304,7 +304,7 @@ class DailyRESTHelper: expiry_time: float = 60 * 60, eject_at_token_exp: bool = False, owner: bool = True, - params: Optional[DailyMeetingTokenParams] = None, + params: DailyMeetingTokenParams | None = None, ) -> str: """Generate a meeting token for user to join a Daily room. diff --git a/src/pipecat/transports/heygen/transport.py b/src/pipecat/transports/heygen/transport.py index d79d0080e..ac13c2fe9 100644 --- a/src/pipecat/transports/heygen/transport.py +++ b/src/pipecat/transports/heygen/transport.py @@ -16,7 +16,7 @@ The module consists of three main components: - HeyGenTransport: Main transport implementation that coordinates input/output transports """ -from typing import Any, Optional, Union +from typing import Any import aiohttp from loguru import logger @@ -309,10 +309,10 @@ class HeyGenTransport(BaseTransport): session: aiohttp.ClientSession, api_key: str, params: HeyGenParams = HeyGenParams(), - input_name: Optional[str] = None, - output_name: Optional[str] = None, - session_request: Optional[Union[LiveAvatarNewSessionRequest, NewSessionRequest]] = None, - service_type: Optional[ServiceType] = None, + input_name: str | None = None, + output_name: str | None = None, + session_request: LiveAvatarNewSessionRequest | NewSessionRequest | None = None, + service_type: ServiceType | None = None, ): """Initialize the HeyGen transport. @@ -346,8 +346,8 @@ class HeyGenTransport(BaseTransport): on_participant_disconnected=self._on_participant_disconnected, ), ) - self._input: Optional[HeyGenInputTransport] = None - self._output: Optional[HeyGenOutputTransport] = None + self._input: HeyGenInputTransport | None = None + self._output: HeyGenOutputTransport | None = None self._HeyGen_participant_id = None # Register supported handlers. The user will only be able to register diff --git a/src/pipecat/transports/lemonslice/api.py b/src/pipecat/transports/lemonslice/api.py index 8ced216f0..f8114c5f4 100644 --- a/src/pipecat/transports/lemonslice/api.py +++ b/src/pipecat/transports/lemonslice/api.py @@ -10,7 +10,7 @@ This module provides helper classes for interacting with the LemonSlice API, including session creation and termination. """ -from typing import Any, Optional +from typing import Any import aiohttp from loguru import logger @@ -38,15 +38,15 @@ class LemonSliceApi: async def create_session( self, *, - agent_image_url: Optional[str] = None, - agent_id: Optional[str] = None, - agent_prompt: Optional[str] = None, - idle_timeout: Optional[int] = None, - daily_room_url: Optional[str] = None, - daily_token: Optional[str] = None, - connection_properties: Optional[dict[str, Any]] = None, - extra_properties: Optional[dict[str, Any]] = None, - api_url: Optional[str] = None, + agent_image_url: str | None = None, + agent_id: str | None = None, + agent_prompt: str | None = None, + idle_timeout: int | None = None, + daily_room_url: str | None = None, + daily_token: str | None = None, + connection_properties: dict[str, Any] | None = None, + extra_properties: dict[str, Any] | None = None, + api_url: str | None = None, ) -> dict: """Create a new session with the specified agent_id or agent_image_url. diff --git a/src/pipecat/transports/lemonslice/transport.py b/src/pipecat/transports/lemonslice/transport.py index 18e53a4c6..90966ade2 100644 --- a/src/pipecat/transports/lemonslice/transport.py +++ b/src/pipecat/transports/lemonslice/transport.py @@ -10,8 +10,9 @@ This module adds LemonSlice avatars to Daily rooms, enabling real-time voice conversations with synchronized avatars. """ +from collections.abc import Awaitable, Callable, Mapping from functools import partial -from typing import Any, Awaitable, Callable, Mapping, Optional +from typing import Any import aiohttp from daily.daily import AudioData @@ -60,14 +61,14 @@ class LemonSliceNewSessionRequest(BaseModel): model_config = ConfigDict(extra="allow") - agent_image_url: Optional[str] = None - agent_id: Optional[str] = None - agent_prompt: Optional[str] = None - idle_timeout: Optional[int] = None - daily_room_url: Optional[str] = None - daily_token: Optional[str] = None - lemonslice_properties: Optional[dict] = None - api_url: Optional[str] = None + agent_image_url: str | None = None + agent_id: str | None = None + agent_prompt: str | None = None + idle_timeout: int | None = None + daily_room_url: str | None = None + daily_token: str | None = None + lemonslice_properties: dict | None = None + api_url: str | None = None class LemonSliceCallbacks(BaseModel): @@ -114,7 +115,7 @@ class LemonSliceTransportClient: params: LemonSliceParams = LemonSliceParams(), callbacks: LemonSliceCallbacks, api_key: str, - session_request: Optional[LemonSliceNewSessionRequest] = None, + session_request: LemonSliceNewSessionRequest | None = None, session: aiohttp.ClientSession, ) -> None: """Initialize the LemonSlice transport client. @@ -131,9 +132,9 @@ class LemonSliceTransportClient: self._bot_name = bot_name self._api = LemonSliceApi(api_key, session) self._session_request = session_request or LemonSliceNewSessionRequest() - self._session_id: Optional[str] = None - self._control_url: Optional[str] = None - self._daily_transport_client: Optional[DailyTransportClient] = None + self._session_id: str | None = None + self._control_url: str | None = None + self._daily_transport_client: DailyTransportClient | None = None self._callbacks = callbacks self._params = params @@ -540,7 +541,7 @@ class LemonSliceOutputTransport(BaseOutputTransport): # Whether we have seen a StartFrame already. self._initialized = False # This is the custom track destination expected by LemonSlice - self._transport_destination: Optional[str] = "stream" + self._transport_destination: str | None = "stream" async def setup(self, setup: FrameProcessorSetup): """Setup the output transport. @@ -692,10 +693,10 @@ class LemonSliceTransport(BaseTransport): bot_name: str, session: aiohttp.ClientSession, api_key: str, - session_request: Optional[LemonSliceNewSessionRequest] = None, + session_request: LemonSliceNewSessionRequest | None = None, params: LemonSliceParams = LemonSliceParams(), - input_name: Optional[str] = None, - output_name: Optional[str] = None, + input_name: str | None = None, + output_name: str | None = None, ): """Initialize the LemonSlice transport. @@ -724,8 +725,8 @@ class LemonSliceTransport(BaseTransport): session=session, params=params, ) - self._input: Optional[LemonSliceInputTransport] = None - self._output: Optional[LemonSliceOutputTransport] = None + self._input: LemonSliceInputTransport | None = None + self._output: LemonSliceOutputTransport | None = None self._lemonslice_participant_id = None # Register supported handlers. The user will only be able to register diff --git a/src/pipecat/transports/livekit/transport.py b/src/pipecat/transports/livekit/transport.py index 04e99f4c0..550c1947e 100644 --- a/src/pipecat/transports/livekit/transport.py +++ b/src/pipecat/transports/livekit/transport.py @@ -13,8 +13,9 @@ event handling for conversational AI applications. import asyncio import json +from collections.abc import Awaitable, Callable from dataclasses import dataclass -from typing import Any, Awaitable, Callable, List, Optional +from typing import Any from loguru import logger from pydantic import BaseModel @@ -78,7 +79,7 @@ class LiveKitOutputTransportMessageFrame(OutputTransportMessageFrame): participant_id: Optional ID of the participant this message is for/from. """ - participant_id: Optional[str] = None + participant_id: str | None = None @dataclass @@ -89,7 +90,7 @@ class LiveKitOutputTransportMessageUrgentFrame(OutputTransportMessageUrgentFrame participant_id: Optional ID of the participant this message is for/from. """ - participant_id: Optional[str] = None + participant_id: str | None = None class LiveKitParams(TransportParams): @@ -160,18 +161,18 @@ class LiveKitTransportClient: self._params = params self._callbacks = callbacks self._transport_name = transport_name - self._room: Optional[rtc.Room] = None + self._room: rtc.Room | None = None self._participant_id: str = "" self._connected = False self._disconnect_counter = 0 - self._audio_source: Optional[rtc.AudioSource] = None - self._audio_track: Optional[rtc.LocalAudioTrack] = None + self._audio_source: rtc.AudioSource | None = None + self._audio_track: rtc.LocalAudioTrack | None = None self._audio_tracks = {} self._audio_queue = asyncio.Queue() self._video_tracks = {} self._video_queue = asyncio.Queue() self._other_participant_has_joined = False - self._task_manager: Optional[BaseTaskManager] = None + self._task_manager: BaseTaskManager | None = None self._async_lock = asyncio.Lock() @property @@ -292,7 +293,7 @@ class LiveKitTransportClient: logger.info(f"Disconnected from {self._room_name}") await self._callbacks.on_disconnected() - async def send_data(self, data: bytes, participant_id: Optional[str] = None): + async def send_data(self, data: bytes, participant_id: str | None = None): """Send data to participants in the room. Args: @@ -357,7 +358,7 @@ class LiveKitTransportClient: logger.error(f"Error publishing audio: {e}") return False - def get_participants(self) -> List[str]: + def get_participants(self) -> list[str]: """Get list of participant IDs in the room. Returns: @@ -971,9 +972,9 @@ class LiveKitTransport(BaseTransport): url: str, token: str, room_name: str, - params: Optional[LiveKitParams] = None, - input_name: Optional[str] = None, - output_name: Optional[str] = None, + params: LiveKitParams | None = None, + input_name: str | None = None, + output_name: str | None = None, ): """Initialize the LiveKit transport. @@ -1005,8 +1006,8 @@ class LiveKitTransport(BaseTransport): self._client = LiveKitTransportClient( url, token, room_name, self._params, callbacks, self.name ) - self._input: Optional[LiveKitInputTransport] = None - self._output: Optional[LiveKitOutputTransport] = None + self._input: LiveKitInputTransport | None = None + self._output: LiveKitOutputTransport | None = None self._register_event_handler("on_connected") self._register_event_handler("on_disconnected") @@ -1064,7 +1065,7 @@ class LiveKitTransport(BaseTransport): if self._output: await self._output.queue_frame(frame, FrameDirection.DOWNSTREAM) - def get_participants(self) -> List[str]: + def get_participants(self) -> list[str]: """Get list of participant IDs in the room. Returns: @@ -1166,7 +1167,7 @@ class LiveKitTransport(BaseTransport): await self._input.push_app_message(data.decode(), participant_id) await self._call_event_handler("on_data_received", data, participant_id) - async def send_message(self, message: str, participant_id: Optional[str] = None): + async def send_message(self, message: str, participant_id: str | None = None): """Send a message to participants in the room. Args: @@ -1179,7 +1180,7 @@ class LiveKitTransport(BaseTransport): ) await self._output.send_message(frame) - async def send_message_urgent(self, message: str, participant_id: Optional[str] = None): + async def send_message_urgent(self, message: str, participant_id: str | None = None): """Send an urgent message to participants in the room. Args: diff --git a/src/pipecat/transports/local/audio.py b/src/pipecat/transports/local/audio.py index 94ee94019..db12e2f0d 100644 --- a/src/pipecat/transports/local/audio.py +++ b/src/pipecat/transports/local/audio.py @@ -12,7 +12,6 @@ audio input and output through the system's default audio devices. import asyncio from concurrent.futures import ThreadPoolExecutor -from typing import Optional from loguru import logger @@ -40,8 +39,8 @@ class LocalAudioTransportParams(TransportParams): output_device_index: PyAudio device index for audio output. If None, uses default. """ - input_device_index: Optional[int] = None - output_device_index: Optional[int] = None + input_device_index: int | None = None + output_device_index: int | None = None class LocalAudioInputTransport(BaseInputTransport): @@ -206,8 +205,8 @@ class LocalAudioTransport(BaseTransport): self._params = params self._pyaudio = pyaudio.PyAudio() - self._input: Optional[LocalAudioInputTransport] = None - self._output: Optional[LocalAudioOutputTransport] = None + self._input: LocalAudioInputTransport | None = None + self._output: LocalAudioOutputTransport | None = None # # BaseTransport diff --git a/src/pipecat/transports/local/tk.py b/src/pipecat/transports/local/tk.py index 658b45afe..a11f4f277 100644 --- a/src/pipecat/transports/local/tk.py +++ b/src/pipecat/transports/local/tk.py @@ -13,7 +13,6 @@ PyAudio for audio I/O, suitable for desktop applications and testing. import asyncio import tkinter as tk from concurrent.futures import ThreadPoolExecutor -from typing import Optional import numpy as np from loguru import logger @@ -53,8 +52,8 @@ class TkTransportParams(TransportParams): audio_output_device_index: PyAudio device index for audio output. If None, uses default. """ - audio_input_device_index: Optional[int] = None - audio_output_device_index: Optional[int] = None + audio_input_device_index: int | None = None + audio_output_device_index: int | None = None class TkInputTransport(BaseInputTransport): @@ -251,8 +250,8 @@ class TkLocalTransport(BaseTransport): self._params = params self._pyaudio = pyaudio.PyAudio() - self._input: Optional[TkInputTransport] = None - self._output: Optional[TkOutputTransport] = None + self._input: TkInputTransport | None = None + self._output: TkOutputTransport | None = None # # BaseTransport diff --git a/src/pipecat/transports/smallwebrtc/connection.py b/src/pipecat/transports/smallwebrtc/connection.py index 3f0d6a9ee..efb9a6923 100644 --- a/src/pipecat/transports/smallwebrtc/connection.py +++ b/src/pipecat/transports/smallwebrtc/connection.py @@ -15,7 +15,7 @@ import asyncio import json import time import uuid -from typing import Any, List, Literal, Optional, Union +from typing import Any, Literal from loguru import logger from pydantic import BaseModel, TypeAdapter @@ -89,8 +89,8 @@ class SignallingMessage: outbound: Types of messages that can be sent to peers. """ - Inbound = Union[TrackStatusMessage] # in case we need to add new messages in the future - outbound = Union[RenegotiateMessage] + Inbound = TrackStatusMessage # in case we need to add new messages in the future + outbound = RenegotiateMessage class SmallWebRTCTrack: @@ -112,7 +112,7 @@ class SmallWebRTCTrack: self._track = receiver.track self._enabled = True self._last_recv_time: float = 0.0 - self._idle_task: Optional[asyncio.Task] = None + self._idle_task: asyncio.Task | None = None self._idle_timeout: float = 2.0 # seconds before discarding old frames def set_enabled(self, enabled: bool) -> None: @@ -145,7 +145,7 @@ class SmallWebRTCTrack: remote_track._queue.get_nowait() # Remove the oldest frame remote_track._queue.task_done() - async def recv(self) -> Optional[Frame]: + async def recv(self) -> Frame | None: """Receive the next frame from the track. Enables the internal receiving state and starts idle watcher. @@ -213,7 +213,7 @@ class SmallWebRTCConnection(BaseObject): def __init__( self, - ice_servers: Optional[Union[List[str], List[IceServer]]] = None, + ice_servers: list[str] | list[IceServer] | None = None, connection_timeout_secs: int = 60, ): """Initialize the WebRTC connection. @@ -227,7 +227,7 @@ class SmallWebRTCConnection(BaseObject): """ super().__init__() if not ice_servers: - self.ice_servers: List[IceServer] = [] + self.ice_servers: list[IceServer] = [] elif all(isinstance(s, IceServer) for s in ice_servers): self.ice_servers = ice_servers elif all(isinstance(s, str) for s in ice_servers): @@ -281,7 +281,7 @@ class SmallWebRTCConnection(BaseObject): logger.debug("Initializing new peer connection") rtc_config = RTCConfiguration(iceServers=self.ice_servers) - self._answer: Optional[RTCSessionDescription] = None + self._answer: RTCSessionDescription | None = None self._pc = RTCPeerConnection(rtc_config) self._pc_id = f"{self.name}-{uuid.uuid4().hex}" self._setup_listeners() diff --git a/src/pipecat/transports/smallwebrtc/request_handler.py b/src/pipecat/transports/smallwebrtc/request_handler.py index 351a11af0..63c9bea14 100644 --- a/src/pipecat/transports/smallwebrtc/request_handler.py +++ b/src/pipecat/transports/smallwebrtc/request_handler.py @@ -10,9 +10,10 @@ This module provides a client for handling web requests and managing WebRTC conn """ import asyncio +from collections.abc import Awaitable, Callable from dataclasses import dataclass from enum import Enum -from typing import Any, Awaitable, Callable, Dict, List, Optional +from typing import Any from aiortc.sdp import candidate_from_sdp from fastapi import HTTPException @@ -35,9 +36,9 @@ class SmallWebRTCRequest: sdp: str type: str - pc_id: Optional[str] = None - restart_pc: Optional[bool] = None - request_data: Optional[Any] = None + pc_id: str | None = None + restart_pc: bool | None = None + request_data: Any | None = None @classmethod def from_dict(cls, data: dict): @@ -72,7 +73,7 @@ class SmallWebRTCPatchRequest: """ pc_id: str - candidates: List[IceCandidate] + candidates: list[IceCandidate] class ConnectionMode(Enum): @@ -95,9 +96,9 @@ class SmallWebRTCRequestHandler: def __init__( self, - ice_servers: Optional[List[IceServer]] = None, + ice_servers: list[IceServer] | None = None, esp32_mode: bool = False, - host: Optional[str] = None, + host: str | None = None, connection_mode: ConnectionMode = ConnectionMode.MULTIPLE, ) -> None: """Initialize a SmallWebRTC request handler. @@ -117,9 +118,9 @@ class SmallWebRTCRequestHandler: self._connection_mode = connection_mode # Store connections by pc_id - self._pcs_map: Dict[str, SmallWebRTCConnection] = {} + self._pcs_map: dict[str, SmallWebRTCConnection] = {} - def _check_single_connection_constraints(self, pc_id: Optional[str]) -> None: + def _check_single_connection_constraints(self, pc_id: str | None) -> None: """Check if the connection request satisfies single connection mode constraints. Args: @@ -152,7 +153,7 @@ class SmallWebRTCRequestHandler: detail="Cannot create new connection with existing connection active", ) - def update_ice_servers(self, ice_servers: Optional[List[IceServer]] = None): + def update_ice_servers(self, ice_servers: list[IceServer] | None = None): """Update the list of ICE servers used for WebRTC connections.""" self._ice_servers = ice_servers @@ -160,7 +161,7 @@ class SmallWebRTCRequestHandler: self, request: SmallWebRTCRequest, webrtc_connection_callback: Callable[[Any], Awaitable[None]], - ) -> Optional[Dict[str, str]]: + ) -> dict[str, str] | None: """Handle a SmallWebRTC request and resolve the pending answer. This method will: diff --git a/src/pipecat/transports/smallwebrtc/transport.py b/src/pipecat/transports/smallwebrtc/transport.py index 76ea34464..0d96d4aa6 100644 --- a/src/pipecat/transports/smallwebrtc/transport.py +++ b/src/pipecat/transports/smallwebrtc/transport.py @@ -15,7 +15,8 @@ import asyncio import fractions import time from collections import deque -from typing import Any, Awaitable, Callable, List, Optional +from collections.abc import Awaitable, Callable +from typing import Any import numpy as np from loguru import logger @@ -235,9 +236,9 @@ class SmallWebRTCClient: self._audio_output_track = None self._video_output_track = None - self._audio_input_track: Optional[AudioStreamTrack] = None - self._video_input_track: Optional[VideoStreamTrack] = None - self._screen_video_track: Optional[VideoStreamTrack] = None + self._audio_input_track: AudioStreamTrack | None = None + self._video_input_track: VideoStreamTrack | None = None + self._screen_video_track: VideoStreamTrack | None = None self._params = None self._audio_in_channels = None @@ -314,7 +315,7 @@ class SmallWebRTCClient: try: frame = await asyncio.wait_for(video_track.recv(), timeout=2.0) - except asyncio.TimeoutError: + except TimeoutError: if ( self._webrtc_connection.is_connected() and video_track @@ -369,7 +370,7 @@ class SmallWebRTCClient: try: frame = await asyncio.wait_for(self._audio_input_track.recv(), timeout=2.0) - except asyncio.TimeoutError: + except TimeoutError: if ( self._webrtc_connection.is_connected() and self._audio_input_track @@ -583,7 +584,7 @@ class SmallWebRTCInputTransport(BaseInputTransport): self._receive_audio_task = None self._receive_video_task = None self._receive_screen_video_task = None - self._image_requests: List[UserImageRequestFrame] = [] + self._image_requests: list[UserImageRequestFrame] = [] # Whether we have seen a StartFrame already. self._initialized = False @@ -897,8 +898,8 @@ class SmallWebRTCTransport(BaseTransport): self, webrtc_connection: SmallWebRTCConnection, params: TransportParams, - input_name: Optional[str] = None, - output_name: Optional[str] = None, + input_name: str | None = None, + output_name: str | None = None, ): """Initialize the WebRTC transport. @@ -919,8 +920,8 @@ class SmallWebRTCTransport(BaseTransport): self._client = SmallWebRTCClient(webrtc_connection, self._callbacks) - self._input: Optional[SmallWebRTCInputTransport] = None - self._output: Optional[SmallWebRTCOutputTransport] = None + self._input: SmallWebRTCInputTransport | None = None + self._output: SmallWebRTCOutputTransport | None = None # Register supported handlers. The user will only be able to register # these handlers. diff --git a/src/pipecat/transports/tavus/transport.py b/src/pipecat/transports/tavus/transport.py index 426b7f72c..9ad28979d 100644 --- a/src/pipecat/transports/tavus/transport.py +++ b/src/pipecat/transports/tavus/transport.py @@ -12,8 +12,9 @@ audio/video streaming capabilities through the Tavus API. """ import os +from collections.abc import Awaitable, Callable, Mapping from functools import partial -from typing import Any, Awaitable, Callable, Mapping, Optional +from typing import Any import aiohttp from daily.daily import AudioData @@ -197,8 +198,8 @@ class TavusTransportClient: self._api = TavusApi(api_key, session) self._replica_id = replica_id self._persona_id = persona_id - self._conversation_id: Optional[str] = None - self._client: Optional[DailyTransportClient] = None + self._conversation_id: str | None = None + self._client: DailyTransportClient | None = None self._callbacks = callbacks self._params = params @@ -417,9 +418,7 @@ class TavusTransportClient: return False return await self._client.write_audio_frame(frame) - async def register_audio_destination( - self, destination: str, auto_silence: Optional[bool] = True - ): + async def register_audio_destination(self, destination: str, auto_silence: bool | None = True): """Register an audio destination for output. Args: @@ -563,7 +562,7 @@ class TavusOutputTransport(BaseOutputTransport): # Whether we have seen a StartFrame already. self._initialized = False # This is the custom track destination expected by Tavus - self._transport_destination: Optional[str] = "stream" + self._transport_destination: str | None = "stream" async def setup(self, setup: FrameProcessorSetup): """Setup the output transport. @@ -693,8 +692,8 @@ class TavusTransport(BaseTransport): replica_id: str, persona_id: str = "pipecat-stream", params: TavusParams = TavusParams(), - input_name: Optional[str] = None, - output_name: Optional[str] = None, + input_name: str | None = None, + output_name: str | None = None, ): """Initialize the Tavus transport. @@ -726,8 +725,8 @@ class TavusTransport(BaseTransport): session=session, params=params, ) - self._input: Optional[TavusInputTransport] = None - self._output: Optional[TavusOutputTransport] = None + self._input: TavusInputTransport | None = None + self._output: TavusOutputTransport | None = None self._tavus_participant_id = None # Register supported handlers. The user will only be able to register diff --git a/src/pipecat/transports/websocket/client.py b/src/pipecat/transports/websocket/client.py index b5b99ee97..5665dfd23 100644 --- a/src/pipecat/transports/websocket/client.py +++ b/src/pipecat/transports/websocket/client.py @@ -15,7 +15,7 @@ import asyncio import io import time import wave -from typing import Awaitable, Callable, Optional +from collections.abc import Awaitable, Callable import websockets from loguru import logger @@ -51,8 +51,8 @@ class WebsocketClientParams(TransportParams): """ add_wav_header: bool = True - additional_headers: Optional[dict[str, str]] = None - serializer: Optional[FrameSerializer] = None + additional_headers: dict[str, str] | None = None + serializer: FrameSerializer | None = None class WebsocketClientCallbacks(BaseModel): @@ -97,8 +97,8 @@ class WebsocketClientSession: self._transport_name = transport_name self._leave_counter = 0 - self._task_manager: Optional[BaseTaskManager] = None - self._websocket: Optional[websockets.WebSocketClientProtocol] = None + self._task_manager: BaseTaskManager | None = None + self._websocket: websockets.WebSocketClientProtocol | None = None @property def task_manager(self) -> BaseTaskManager: @@ -487,7 +487,7 @@ class WebsocketClientTransport(BaseTransport): def __init__( self, uri: str, - params: Optional[WebsocketClientParams] = None, + params: WebsocketClientParams | None = None, ): """Initialize the WebSocket client transport. @@ -507,8 +507,8 @@ class WebsocketClientTransport(BaseTransport): ) self._session = WebsocketClientSession(uri, self._params, callbacks, self.name) - self._input: Optional[WebsocketClientInputTransport] = None - self._output: Optional[WebsocketClientOutputTransport] = None + self._input: WebsocketClientInputTransport | None = None + self._output: WebsocketClientOutputTransport | None = None # Register supported handlers. The user will only be able to register # these handlers. diff --git a/src/pipecat/transports/websocket/fastapi.py b/src/pipecat/transports/websocket/fastapi.py index d9b7d7ae1..1e449b01e 100644 --- a/src/pipecat/transports/websocket/fastapi.py +++ b/src/pipecat/transports/websocket/fastapi.py @@ -16,7 +16,7 @@ import io import time import typing import wave -from typing import Awaitable, Callable, Optional +from collections.abc import Awaitable, Callable from loguru import logger from pydantic import BaseModel @@ -63,9 +63,9 @@ class FastAPIWebsocketParams(TransportParams): """ add_wav_header: bool = False - serializer: Optional[FrameSerializer] = None - session_timeout: Optional[int] = None - fixed_audio_packet_size: Optional[int] = None + serializer: FrameSerializer | None = None + session_timeout: int | None = None + fixed_audio_packet_size: int | None = None class FastAPIWebsocketCallbacks(BaseModel): @@ -550,8 +550,8 @@ class FastAPIWebsocketTransport(BaseTransport): self, websocket: WebSocket, params: FastAPIWebsocketParams, - input_name: Optional[str] = None, - output_name: Optional[str] = None, + input_name: str | None = None, + output_name: str | None = None, ): """Initialize the FastAPI WebSocket transport. diff --git a/src/pipecat/transports/websocket/server.py b/src/pipecat/transports/websocket/server.py index fa3645d37..82b899f71 100644 --- a/src/pipecat/transports/websocket/server.py +++ b/src/pipecat/transports/websocket/server.py @@ -15,7 +15,7 @@ import asyncio import io import time import wave -from typing import Awaitable, Callable, Optional +from collections.abc import Awaitable, Callable from loguru import logger from pydantic import BaseModel @@ -59,8 +59,8 @@ class WebsocketServerParams(TransportParams): """ add_wav_header: bool = False - serializer: Optional[FrameSerializer] = None - session_timeout: Optional[int] = None + serializer: FrameSerializer | None = None + session_timeout: int | None = None class WebsocketServerCallbacks(BaseModel): @@ -113,7 +113,7 @@ class WebsocketServerInputTransport(BaseInputTransport): self._params = params self._callbacks = callbacks - self._websocket: Optional[websockets.WebSocketServerProtocol] = None + self._websocket: websockets.WebSocketServerProtocol | None = None self._server_task = None @@ -264,7 +264,7 @@ class WebsocketServerOutputTransport(BaseOutputTransport): self._transport = transport self._params = params - self._websocket: Optional[websockets.WebSocketServerProtocol] = None + self._websocket: websockets.WebSocketServerProtocol | None = None # write_audio_frame() is called quickly, as soon as we get audio # (e.g. from the TTS), and since this is just a network connection we @@ -277,7 +277,7 @@ class WebsocketServerOutputTransport(BaseOutputTransport): # Whether we have seen a StartFrame already. self._initialized = False - async def set_client_connection(self, websocket: Optional[websockets.WebSocketServerProtocol]): + async def set_client_connection(self, websocket: websockets.WebSocketServerProtocol | None): """Set the active client WebSocket connection. Args: @@ -441,8 +441,8 @@ class WebsocketServerTransport(BaseTransport): params: WebsocketServerParams, host: str = "localhost", port: int = 8765, - input_name: Optional[str] = None, - output_name: Optional[str] = None, + input_name: str | None = None, + output_name: str | None = None, ): """Initialize the WebSocket server transport. @@ -464,9 +464,9 @@ class WebsocketServerTransport(BaseTransport): on_session_timeout=self._on_session_timeout, on_websocket_ready=self._on_websocket_ready, ) - self._input: Optional[WebsocketServerInputTransport] = None - self._output: Optional[WebsocketServerOutputTransport] = None - self._websocket: Optional[websockets.WebSocketServerProtocol] = None + self._input: WebsocketServerInputTransport | None = None + self._output: WebsocketServerOutputTransport | None = None + self._websocket: websockets.WebSocketServerProtocol | None = None # Register supported handlers. The user will only be able to register # these handlers. diff --git a/src/pipecat/transports/whatsapp/api.py b/src/pipecat/transports/whatsapp/api.py index 5944cc8b8..250377619 100644 --- a/src/pipecat/transports/whatsapp/api.py +++ b/src/pipecat/transports/whatsapp/api.py @@ -9,7 +9,7 @@ API to communicate with WhatsApp Cloud API. """ -from typing import Any, Dict, List, Optional, Union +from typing import Any import aiohttp from loguru import logger @@ -44,7 +44,7 @@ class WhatsAppError(BaseModel): code: int message: str href: str - error_data: Dict[str, Any] + error_data: dict[str, Any] class WhatsAppConnectCall(BaseModel): @@ -68,7 +68,7 @@ class WhatsAppConnectCall(BaseModel): to: str event: str # "connect" timestamp: str - direction: Optional[str] + direction: str | None session: WhatsAppSession @@ -97,12 +97,12 @@ class WhatsAppTerminateCall(BaseModel): to: str event: str # "terminate" timestamp: str - direction: Optional[str] - biz_opaque_callback_data: Optional[str] = None - status: Optional[str] = None # "FAILED" or "COMPLETED" or "REJECTED" - start_time: Optional[str] = None - end_time: Optional[str] = None - duration: Optional[int] = None + direction: str | None + biz_opaque_callback_data: str | None = None + status: str | None = None # "FAILED" or "COMPLETED" or "REJECTED" + start_time: str | None = None + end_time: str | None = None + duration: int | None = None class WhatsAppProfile(BaseModel): @@ -151,8 +151,8 @@ class WhatsAppConnectCallValue(BaseModel): messaging_product: str metadata: WhatsAppMetadata - contacts: List[WhatsAppContact] - calls: List[WhatsAppConnectCall] + contacts: list[WhatsAppContact] + calls: list[WhatsAppConnectCall] class WhatsAppTerminateCallValue(BaseModel): @@ -167,8 +167,8 @@ class WhatsAppTerminateCallValue(BaseModel): messaging_product: str metadata: WhatsAppMetadata - calls: List[WhatsAppTerminateCall] - errors: Optional[List[WhatsAppError]] = None + calls: list[WhatsAppTerminateCall] + errors: list[WhatsAppError] | None = None class WhatsAppChange(BaseModel): @@ -179,7 +179,7 @@ class WhatsAppChange(BaseModel): field: Always "calls" for calling webhooks """ - value: Union[WhatsAppConnectCallValue, WhatsAppTerminateCallValue] + value: WhatsAppConnectCallValue | WhatsAppTerminateCallValue field: str @@ -192,7 +192,7 @@ class WhatsAppEntry(BaseModel): """ id: str - changes: List[WhatsAppChange] + changes: list[WhatsAppChange] class WhatsAppWebhookRequest(BaseModel): @@ -207,7 +207,7 @@ class WhatsAppWebhookRequest(BaseModel): """ object: str - entry: List[WhatsAppEntry] + entry: list[WhatsAppEntry] class WhatsAppApi: diff --git a/src/pipecat/transports/whatsapp/client.py b/src/pipecat/transports/whatsapp/client.py index f2ed1f00d..8f479520f 100644 --- a/src/pipecat/transports/whatsapp/client.py +++ b/src/pipecat/transports/whatsapp/client.py @@ -14,7 +14,7 @@ WhatsApp call events. import asyncio import hashlib import hmac -from typing import Awaitable, Callable, Dict, List, Optional +from collections.abc import Awaitable, Callable import aiohttp from loguru import logger @@ -48,8 +48,8 @@ class WhatsAppClient: whatsapp_token: str, phone_number_id: str, session: aiohttp.ClientSession, - ice_servers: Optional[List[IceServer]] = None, - whatsapp_secret: Optional[str] = None, + ice_servers: list[IceServer] | None = None, + whatsapp_secret: str | None = None, ) -> None: """Initialize the WhatsApp client. @@ -65,7 +65,7 @@ class WhatsAppClient: whatsapp_token=whatsapp_token, phone_number_id=phone_number_id, session=session ) self._whatsapp_secret = whatsapp_secret - self._ongoing_calls_map: Dict[str, SmallWebRTCConnection] = {} + self._ongoing_calls_map: dict[str, SmallWebRTCConnection] = {} # Set default ICE servers if none provided if ice_servers is None: @@ -73,11 +73,11 @@ class WhatsAppClient: else: self._ice_servers = ice_servers - def update_ice_servers(self, ice_servers: Optional[List[IceServer]] = None): + def update_ice_servers(self, ice_servers: list[IceServer] | None = None): """Update the list of ICE servers used for WebRTC connections.""" self._ice_servers = ice_servers - def update_whatsapp_secret(self, whatsapp_secret: Optional[str] = None): + def update_whatsapp_secret(self, whatsapp_secret: str | None = None): """Update the WhatsApp APP secret for validating that the webhook request came from WhatsApp.""" self._whatsapp_secret = whatsapp_secret @@ -125,7 +125,7 @@ class WhatsAppClient: logger.debug("All calls terminated successfully") async def handle_verify_webhook_request( - self, params: Dict[str, str], expected_verification_token: str + self, params: dict[str, str], expected_verification_token: str ) -> int: """Handle a verify webhook request from WhatsApp. @@ -177,9 +177,9 @@ class WhatsAppClient: async def handle_webhook_request( self, request: WhatsAppWebhookRequest, - connection_callback: Optional[Callable[[SmallWebRTCConnection], Awaitable[None]]] = None, - raw_body: Optional[bytes] = None, - sha256_signature: Optional[str] = None, + connection_callback: Callable[[SmallWebRTCConnection], Awaitable[None]] | None = None, + raw_body: bytes | None = None, + sha256_signature: str | None = None, ) -> bool: """Handle a webhook request from WhatsApp. diff --git a/src/pipecat/turns/user_idle_controller.py b/src/pipecat/turns/user_idle_controller.py index 188daa327..0fa0053bb 100644 --- a/src/pipecat/turns/user_idle_controller.py +++ b/src/pipecat/turns/user_idle_controller.py @@ -7,7 +7,6 @@ """This module defines a controller for managing user idle detection.""" import asyncio -from typing import Optional from pipecat.frames.frames import ( BotStartedSpeakingFrame, @@ -64,11 +63,11 @@ class UserIdleController(BaseObject): self._user_idle_timeout = user_idle_timeout - self._task_manager: Optional[BaseTaskManager] = None + self._task_manager: BaseTaskManager | None = None self._user_turn_in_progress: bool = False self._function_calls_in_progress: int = 0 - self._idle_timer_task: Optional[asyncio.Task] = None + self._idle_timer_task: asyncio.Task | None = None self._register_event_handler("on_user_turn_idle", sync=True) diff --git a/src/pipecat/turns/user_mute/base_user_mute_strategy.py b/src/pipecat/turns/user_mute/base_user_mute_strategy.py index 91ecb37d5..67aaa2867 100644 --- a/src/pipecat/turns/user_mute/base_user_mute_strategy.py +++ b/src/pipecat/turns/user_mute/base_user_mute_strategy.py @@ -6,8 +6,6 @@ """Base strategy for deciding whether user frames should be muted.""" -from typing import Optional - from pipecat.frames.frames import Frame from pipecat.utils.asyncio.task_manager import BaseTaskManager from pipecat.utils.base_object import BaseObject @@ -32,7 +30,7 @@ class BaseUserMuteStrategy(BaseObject): def __init__(self, **kwargs): """Initialize the base user mute strategy.""" super().__init__(**kwargs) - self._task_manager: Optional[BaseTaskManager] = None + self._task_manager: BaseTaskManager | None = None @property def task_manager(self) -> BaseTaskManager: diff --git a/src/pipecat/turns/user_mute/function_call_user_mute_strategy.py b/src/pipecat/turns/user_mute/function_call_user_mute_strategy.py index f50f31bd4..fa2aa6b71 100644 --- a/src/pipecat/turns/user_mute/function_call_user_mute_strategy.py +++ b/src/pipecat/turns/user_mute/function_call_user_mute_strategy.py @@ -6,8 +6,6 @@ """User mute strategy that mutes the user while a function call is executing.""" -from typing import Set - from pipecat.frames.frames import ( Frame, FunctionCallCancelFrame, @@ -30,7 +28,7 @@ class FunctionCallUserMuteStrategy(BaseUserMuteStrategy): def __init__(self): """Initialize the function call user mute strategy.""" super().__init__() - self._function_call_in_progress: Set[str] = set() + self._function_call_in_progress: set[str] = set() async def reset(self): """Reset the strategy to its initial state.""" diff --git a/src/pipecat/turns/user_start/base_user_turn_start_strategy.py b/src/pipecat/turns/user_start/base_user_turn_start_strategy.py index 94b4f635d..401178cc7 100644 --- a/src/pipecat/turns/user_start/base_user_turn_start_strategy.py +++ b/src/pipecat/turns/user_start/base_user_turn_start_strategy.py @@ -7,7 +7,6 @@ """Base turn start strategy for determining when the user starts speaking.""" from dataclasses import dataclass -from typing import Optional, Type from pipecat.frames.frames import Frame from pipecat.processors.frame_processor import FrameDirection @@ -73,7 +72,7 @@ class BaseUserTurnStartStrategy(BaseObject): super().__init__(**kwargs) self._enable_interruptions = enable_interruptions self._enable_user_speaking_frames = enable_user_speaking_frames - self._task_manager: Optional[BaseTaskManager] = None + self._task_manager: BaseTaskManager | None = None self._register_event_handler("on_push_frame", sync=True) self._register_event_handler("on_broadcast_frame", sync=True) self._register_event_handler("on_user_turn_started", sync=True) @@ -126,7 +125,7 @@ class BaseUserTurnStartStrategy(BaseObject): """ await self._call_event_handler("on_push_frame", frame, direction) - async def broadcast_frame(self, frame_cls: Type[Frame], **kwargs): + async def broadcast_frame(self, frame_cls: type[Frame], **kwargs): """Emit on_broadcast_frame to broadcast a frame using the user aggreagtor. Args: diff --git a/src/pipecat/turns/user_start/wake_phrase_user_turn_start_strategy.py b/src/pipecat/turns/user_start/wake_phrase_user_turn_start_strategy.py index bcc069ad7..14a045826 100644 --- a/src/pipecat/turns/user_start/wake_phrase_user_turn_start_strategy.py +++ b/src/pipecat/turns/user_start/wake_phrase_user_turn_start_strategy.py @@ -9,7 +9,6 @@ import asyncio import enum import re -from typing import List, Optional from loguru import logger @@ -85,7 +84,7 @@ class WakePhraseUserTurnStartStrategy(BaseUserTurnStartStrategy): def __init__( self, *, - phrases: List[str], + phrases: list[str], timeout: float = 10.0, single_activation: bool = False, **kwargs, @@ -106,7 +105,7 @@ class WakePhraseUserTurnStartStrategy(BaseUserTurnStartStrategy): self._timeout = timeout self._single_activation = single_activation - self._patterns: List[re.Pattern] = [] + self._patterns: list[re.Pattern] = [] for phrase in phrases: pattern = re.compile( r"\b" + r"\s*".join(re.escape(word) for word in phrase.split()) + r"\b", @@ -118,7 +117,7 @@ class WakePhraseUserTurnStartStrategy(BaseUserTurnStartStrategy): self._accumulated_text = "" self._timeout_event = asyncio.Event() - self._timeout_task: Optional[asyncio.Task] = None + self._timeout_task: asyncio.Task | None = None self._register_event_handler("on_wake_phrase_detected") self._register_event_handler("on_wake_phrase_timeout") @@ -276,6 +275,6 @@ class WakePhraseUserTurnStartStrategy(BaseUserTurnStartStrategy): timeout=self._timeout, ) self._timeout_event.clear() - except asyncio.TimeoutError: + except TimeoutError: if self._state == _WakeState.AWAKE: self._transition_to_idle() diff --git a/src/pipecat/turns/user_stop/base_user_turn_stop_strategy.py b/src/pipecat/turns/user_stop/base_user_turn_stop_strategy.py index a6f89ae4b..1f8497359 100644 --- a/src/pipecat/turns/user_stop/base_user_turn_stop_strategy.py +++ b/src/pipecat/turns/user_stop/base_user_turn_stop_strategy.py @@ -7,7 +7,6 @@ """Base user turn stop strategy for determining when the user stopped speaking.""" from dataclasses import dataclass -from typing import Optional, Type from pipecat.frames.frames import Frame from pipecat.processors.frame_processor import FrameDirection @@ -62,7 +61,7 @@ class BaseUserTurnStopStrategy(BaseObject): """ super().__init__(**kwargs) self._enable_user_speaking_frames = enable_user_speaking_frames - self._task_manager: Optional[BaseTaskManager] = None + self._task_manager: BaseTaskManager | None = None self._register_event_handler("on_push_frame", sync=True) self._register_event_handler("on_broadcast_frame", sync=True) self._register_event_handler("on_user_turn_stopped", sync=True) @@ -114,7 +113,7 @@ class BaseUserTurnStopStrategy(BaseObject): """ await self._call_event_handler("on_push_frame", frame, direction) - async def broadcast_frame(self, frame_cls: Type[Frame], **kwargs): + async def broadcast_frame(self, frame_cls: type[Frame], **kwargs): """Emit on_broadcast_frame to broadcast a frame using the user aggreagtor. Args: diff --git a/src/pipecat/turns/user_stop/external_user_turn_stop_strategy.py b/src/pipecat/turns/user_stop/external_user_turn_stop_strategy.py index 4ffa3360b..b1cf0ca9b 100644 --- a/src/pipecat/turns/user_stop/external_user_turn_stop_strategy.py +++ b/src/pipecat/turns/user_stop/external_user_turn_stop_strategy.py @@ -7,7 +7,6 @@ """User turn stop strategy triggered by externally emitted frames.""" import asyncio -from typing import Optional from pipecat.frames.frames import ( Frame, @@ -44,7 +43,7 @@ class ExternalUserTurnStopStrategy(BaseUserTurnStopStrategy): self._user_speaking = False self._seen_interim_results = False self._event = asyncio.Event() - self._task: Optional[asyncio.Task] = None + self._task: asyncio.Task | None = None async def reset(self): """Reset the strategy to its initial state.""" @@ -125,7 +124,7 @@ class ExternalUserTurnStopStrategy(BaseUserTurnStopStrategy): try: await asyncio.wait_for(self._event.wait(), timeout=self._timeout) self._event.clear() - except asyncio.TimeoutError: + except TimeoutError: await self._maybe_trigger_user_turn_stopped() async def _maybe_trigger_user_turn_stopped(self): diff --git a/src/pipecat/turns/user_stop/speech_timeout_user_turn_stop_strategy.py b/src/pipecat/turns/user_stop/speech_timeout_user_turn_stop_strategy.py index 0a8feeb2e..e37b55756 100644 --- a/src/pipecat/turns/user_stop/speech_timeout_user_turn_stop_strategy.py +++ b/src/pipecat/turns/user_stop/speech_timeout_user_turn_stop_strategy.py @@ -8,7 +8,6 @@ import asyncio import time -from typing import Optional from loguru import logger @@ -59,8 +58,8 @@ class SpeechTimeoutUserTurnStopStrategy(BaseUserTurnStopStrategy): self._text = "" self._vad_user_speaking = False self._transcript_finalized = False - self._vad_stopped_time: Optional[float] = None - self._timeout_task: Optional[asyncio.Task] = None + self._vad_stopped_time: float | None = None + self._timeout_task: asyncio.Task | None = None self._timeout_expired: bool = False async def reset(self): diff --git a/src/pipecat/turns/user_stop/turn_analyzer_user_turn_stop_strategy.py b/src/pipecat/turns/user_stop/turn_analyzer_user_turn_stop_strategy.py index adc987583..667e89961 100644 --- a/src/pipecat/turns/user_stop/turn_analyzer_user_turn_stop_strategy.py +++ b/src/pipecat/turns/user_stop/turn_analyzer_user_turn_stop_strategy.py @@ -7,7 +7,6 @@ """User turn stop strategy based on turn detection analyzers.""" import asyncio -from typing import Optional from loguru import logger @@ -62,9 +61,9 @@ class TurnAnalyzerUserTurnStopStrategy(BaseUserTurnStopStrategy): self._text = "" self._turn_complete = False self._vad_user_speaking = False - self._vad_stopped_time: Optional[float] = None # Track when VAD stopped was received + self._vad_stopped_time: float | None = None # Track when VAD stopped was received self._transcript_finalized = False - self._timeout_task: Optional[asyncio.Task] = None + self._timeout_task: asyncio.Task | None = None self._timeout_expired: bool = False async def reset(self): @@ -232,7 +231,7 @@ class TurnAnalyzerUserTurnStopStrategy(BaseUserTurnStopStrategy): # Make sure the task is scheduled. await asyncio.sleep(0) - async def _handle_prediction_result(self, result: Optional[MetricsData]): + async def _handle_prediction_result(self, result: MetricsData | None): """Handle a prediction result event from the turn analyzer.""" if result: await self.push_frame(MetricsFrame(data=[result])) diff --git a/src/pipecat/turns/user_turn_completion_mixin.py b/src/pipecat/turns/user_turn_completion_mixin.py index c99355de7..1d8da1e07 100644 --- a/src/pipecat/turns/user_turn_completion_mixin.py +++ b/src/pipecat/turns/user_turn_completion_mixin.py @@ -14,7 +14,7 @@ were interrupted mid-thought. import asyncio from dataclasses import dataclass -from typing import Literal, Optional +from typing import Literal from loguru import logger @@ -156,11 +156,11 @@ class UserTurnCompletionConfig: incomplete_long_prompt: Custom prompt when long timeout expires. """ - instructions: Optional[str] = None + instructions: str | None = None incomplete_short_timeout: float = 5.0 incomplete_long_timeout: float = 10.0 - incomplete_short_prompt: Optional[str] = None - incomplete_long_prompt: Optional[str] = None + incomplete_short_prompt: str | None = None + incomplete_long_prompt: str | None = None @property def completion_instructions(self) -> str: @@ -223,8 +223,8 @@ class UserTurnCompletionLLMServiceMixin: # Timeout handling self._user_turn_completion_config = UserTurnCompletionConfig() - self._incomplete_timeout_task: Optional[asyncio.Task] = None - self._incomplete_type: Optional[Literal["short", "long"]] = None + self._incomplete_timeout_task: asyncio.Task | None = None + self._incomplete_type: Literal["short", "long"] | None = None def set_user_turn_completion_config(self, config: UserTurnCompletionConfig): """Set the turn completion configuration. @@ -385,7 +385,7 @@ class UserTurnCompletionLLMServiceMixin: # Check for incomplete markers (○ short, ◐ long) # These indicate the user was cut off or needs time - we suppress the bot's # response and start a timeout to re-prompt later. - incomplete_type: Optional[Literal["short", "long"]] = None + incomplete_type: Literal["short", "long"] | None = None if USER_TURN_INCOMPLETE_SHORT_MARKER in self._turn_text_buffer: incomplete_type = "short" elif USER_TURN_INCOMPLETE_LONG_MARKER in self._turn_text_buffer: diff --git a/src/pipecat/turns/user_turn_controller.py b/src/pipecat/turns/user_turn_controller.py index 9abed3932..2ba4e304b 100644 --- a/src/pipecat/turns/user_turn_controller.py +++ b/src/pipecat/turns/user_turn_controller.py @@ -7,7 +7,6 @@ """This module defines a controller for managing user turn lifecycle.""" import asyncio -from typing import Optional, Type from pipecat.frames.frames import ( Frame, @@ -85,13 +84,13 @@ class UserTurnController(BaseObject): self._user_turn_strategies = user_turn_strategies self._user_turn_stop_timeout = user_turn_stop_timeout - self._task_manager: Optional[BaseTaskManager] = None + self._task_manager: BaseTaskManager | None = None self._user_speaking = False self._user_turn = False self._user_turn_stop_timeout_event = asyncio.Event() - self._user_turn_stop_timeout_task: Optional[asyncio.Task] = None + self._user_turn_stop_timeout_task: asyncio.Task | None = None self._register_event_handler("on_push_frame", sync=True) self._register_event_handler("on_broadcast_frame", sync=True) @@ -235,7 +234,7 @@ class UserTurnController(BaseObject): async def _on_broadcast_frame( self, strategy: BaseUserTurnStartStrategy | BaseUserTurnStopStrategy, - frame_cls: Type[Frame], + frame_cls: type[Frame], **kwargs, ): await self._call_event_handler("on_broadcast_frame", frame_cls, **kwargs) @@ -256,7 +255,7 @@ class UserTurnController(BaseObject): await self._call_event_handler("on_reset_aggregation", strategy) async def _trigger_user_turn_start( - self, strategy: Optional[BaseUserTurnStartStrategy], params: UserTurnStartedParams + self, strategy: BaseUserTurnStartStrategy | None, params: UserTurnStartedParams ): # Prevent two consecutive user turn starts. if self._user_turn: @@ -276,7 +275,7 @@ class UserTurnController(BaseObject): await self._call_event_handler("on_user_turn_started", strategy, params) async def _trigger_user_turn_stop( - self, strategy: Optional[BaseUserTurnStopStrategy], params: UserTurnStoppedParams + self, strategy: BaseUserTurnStopStrategy | None, params: UserTurnStoppedParams ): # Prevent two consecutive user turn stops. if not self._user_turn: @@ -299,7 +298,7 @@ class UserTurnController(BaseObject): timeout=self._user_turn_stop_timeout, ) self._user_turn_stop_timeout_event.clear() - except asyncio.TimeoutError: + except TimeoutError: if self._user_turn and not self._user_speaking: await self._call_event_handler("on_user_turn_stop_timeout") await self._trigger_user_turn_stop( diff --git a/src/pipecat/turns/user_turn_processor.py b/src/pipecat/turns/user_turn_processor.py index a3501d2c8..536d297b5 100644 --- a/src/pipecat/turns/user_turn_processor.py +++ b/src/pipecat/turns/user_turn_processor.py @@ -6,8 +6,6 @@ """Frame processor for managing the user turn lifecycle.""" -from typing import Optional, Type - from loguru import logger from pipecat.frames.frames import ( @@ -64,7 +62,7 @@ class UserTurnProcessor(FrameProcessor): def __init__( self, *, - user_turn_strategies: Optional[UserTurnStrategies] = None, + user_turn_strategies: UserTurnStrategies | None = None, user_turn_stop_timeout: float = 5.0, user_idle_timeout: float = 0, **kwargs, @@ -165,7 +163,7 @@ class UserTurnProcessor(FrameProcessor): ): await self.push_frame(frame, direction) - async def _on_broadcast_frame(self, controller, frame_cls: Type[Frame], **kwargs): + async def _on_broadcast_frame(self, controller, frame_cls: type[Frame], **kwargs): await self.broadcast_frame(frame_cls, **kwargs) async def _on_user_turn_started( diff --git a/src/pipecat/turns/user_turn_strategies.py b/src/pipecat/turns/user_turn_strategies.py index d1abfeac5..8a663d3da 100644 --- a/src/pipecat/turns/user_turn_strategies.py +++ b/src/pipecat/turns/user_turn_strategies.py @@ -7,7 +7,6 @@ """Turn start strategy configuration.""" from dataclasses import dataclass -from typing import List, Optional from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.turns.user_start import ( @@ -23,7 +22,7 @@ from pipecat.turns.user_stop import ( ) -def default_user_turn_start_strategies() -> List[BaseUserTurnStartStrategy]: +def default_user_turn_start_strategies() -> list[BaseUserTurnStartStrategy]: """Return the default user turn start strategies. Returns ``[VADUserTurnStartStrategy, TranscriptionUserTurnStartStrategy]``. @@ -39,7 +38,7 @@ def default_user_turn_start_strategies() -> List[BaseUserTurnStartStrategy]: return [VADUserTurnStartStrategy(), TranscriptionUserTurnStartStrategy()] -def default_user_turn_stop_strategies() -> List[BaseUserTurnStopStrategy]: +def default_user_turn_stop_strategies() -> list[BaseUserTurnStopStrategy]: """Return the default user turn stop strategies. Returns ``[TurnAnalyzerUserTurnStopStrategy(LocalSmartTurnAnalyzerV3)]``. @@ -65,8 +64,8 @@ class UserTurnStrategies: """ - start: Optional[List[BaseUserTurnStartStrategy]] = None - stop: Optional[List[BaseUserTurnStopStrategy]] = None + start: list[BaseUserTurnStartStrategy] | None = None + stop: list[BaseUserTurnStopStrategy] | None = None def __post_init__(self): if not self.start: diff --git a/src/pipecat/utils/asyncio/task_manager.py b/src/pipecat/utils/asyncio/task_manager.py index c680e19b1..0b2420788 100644 --- a/src/pipecat/utils/asyncio/task_manager.py +++ b/src/pipecat/utils/asyncio/task_manager.py @@ -14,8 +14,8 @@ comprehensive monitoring and cleanup capabilities. import asyncio import traceback from abc import ABC, abstractmethod +from collections.abc import Coroutine, Sequence from dataclasses import dataclass -from typing import Coroutine, Dict, Optional, Sequence from loguru import logger @@ -71,7 +71,7 @@ class BaseTaskManager(ABC): pass @abstractmethod - async def cancel_task(self, task: asyncio.Task, timeout: Optional[float] = None): + async def cancel_task(self, task: asyncio.Task, timeout: float | None = None): """Cancels the given asyncio Task and awaits its completion with an optional timeout. This function removes the task from the set of registered tasks upon @@ -114,8 +114,8 @@ class TaskManager(BaseTaskManager): def __init__(self) -> None: """Initialize the task manager with empty task registry.""" - self._tasks: Dict[str, TaskData] = {} - self._params: Optional[TaskManagerParams] = None + self._tasks: dict[str, TaskData] = {} + self._params: TaskManagerParams | None = None def setup(self, params: TaskManagerParams): """Initialize the task manager with configuration parameters. @@ -177,7 +177,7 @@ class TaskManager(BaseTaskManager): logger.trace(f"{name}: task created") return task - async def cancel_task(self, task: asyncio.Task, timeout: Optional[float] = None): + async def cancel_task(self, task: asyncio.Task, timeout: float | None = None): """Cancels the given asyncio Task and awaits its completion with an optional timeout. This function removes the task from the set of registered tasks upon @@ -194,7 +194,7 @@ class TaskManager(BaseTaskManager): await asyncio.wait_for(task, timeout=timeout) else: await task - except asyncio.TimeoutError: + except TimeoutError: logger.warning(f"{name}: timed out waiting for task to cancel") except asyncio.CancelledError: # Here are sure the task is cancelled properly. diff --git a/src/pipecat/utils/base_object.py b/src/pipecat/utils/base_object.py index f6e4c47de..896c62b6a 100644 --- a/src/pipecat/utils/base_object.py +++ b/src/pipecat/utils/base_object.py @@ -16,7 +16,7 @@ import inspect import traceback from abc import ABC from dataclasses import dataclass -from typing import Any, Dict, List, Optional +from typing import Any from loguru import logger @@ -38,7 +38,7 @@ class EventHandler: """ name: str - handlers: List[Any] + handlers: list[Any] is_sync: bool @@ -50,7 +50,7 @@ class BaseObject(ABC): classes in the framework should inherit from this base class. """ - def __init__(self, *, name: Optional[str] = None, **kwargs): + def __init__(self, *, name: str | None = None, **kwargs): """Initialize the base object. Args: @@ -62,7 +62,7 @@ class BaseObject(ABC): self._name = name or f"{self.__class__.__name__}#{obj_count(self)}" # Registered event handlers. - self._event_handlers: Dict[str, EventHandler] = {} + self._event_handlers: dict[str, EventHandler] = {} # Set of tasks being executed. When a task finishes running it gets # automatically removed from the set. When we cleanup we wait for all diff --git a/src/pipecat/utils/context/llm_context_summarization.py b/src/pipecat/utils/context/llm_context_summarization.py index cfdf8f82f..85a7e6e19 100644 --- a/src/pipecat/utils/context/llm_context_summarization.py +++ b/src/pipecat/utils/context/llm_context_summarization.py @@ -13,7 +13,7 @@ context when token limits are reached, enabling efficient long-running conversat import json import warnings from dataclasses import dataclass, field -from typing import TYPE_CHECKING, List, Optional +from typing import TYPE_CHECKING, Optional if TYPE_CHECKING: from pipecat.services.llm_service import LLMService @@ -90,7 +90,7 @@ class LLMContextSummaryConfig: target_context_tokens: int = 6000 min_messages_after_summary: int = 4 - summarization_prompt: Optional[str] = None + summarization_prompt: str | None = None summary_message_template: str = "Conversation summary: {summary}" llm: Optional["LLMService"] = None summarization_timeout: float = DEFAULT_SUMMARIZATION_TIMEOUT @@ -139,8 +139,8 @@ class LLMAutoContextSummarizationConfig: default ``LLMContextSummaryConfig`` values. """ - max_context_tokens: Optional[int] = 8000 - max_unsummarized_messages: Optional[int] = 20 + max_context_tokens: int | None = 8000 + max_unsummarized_messages: int | None = 20 summary_config: LLMContextSummaryConfig = field(default_factory=LLMContextSummaryConfig) def __post_init__(self): @@ -192,11 +192,11 @@ class LLMContextSummarizationConfig: summarization_prompt: Custom prompt for summary generation. """ - max_context_tokens: Optional[int] = 8000 + max_context_tokens: int | None = 8000 target_context_tokens: int = 6000 - max_unsummarized_messages: Optional[int] = 20 + max_unsummarized_messages: int | None = 20 min_messages_after_summary: int = 4 - summarization_prompt: Optional[str] = None + summarization_prompt: str | None = None summary_message_template: str = "Conversation summary: {summary}" llm: Optional["LLMService"] = None summarization_timeout: float = DEFAULT_SUMMARIZATION_TIMEOUT @@ -269,7 +269,7 @@ class LLMMessagesToSummarize: last_summarized_index: Index of the last message being summarized """ - messages: List[dict] + messages: list[dict] last_summarized_index: int @@ -415,7 +415,7 @@ class LLMContextSummarizationUtil: @staticmethod def _get_earliest_function_call_not_resolved_in_range( - messages: List[dict], start_idx: int, summary_end: int + messages: list[dict], start_idx: int, summary_end: int ) -> int: """Find the earliest message index with incomplete function calls. @@ -574,7 +574,7 @@ class LLMContextSummarizationUtil: ) @staticmethod - def format_messages_for_summary(messages: List[dict]) -> str: + def format_messages_for_summary(messages: list[dict]) -> str: """Format messages as a transcript for summarization. Args: diff --git a/src/pipecat/utils/frame_queue.py b/src/pipecat/utils/frame_queue.py index 64617f770..1b787362d 100644 --- a/src/pipecat/utils/frame_queue.py +++ b/src/pipecat/utils/frame_queue.py @@ -7,7 +7,8 @@ """Frame queue utilities for Pipecat pipeline processors.""" import asyncio -from typing import Any, Callable, Type, Union +from collections.abc import Callable +from typing import Any from pipecat.frames.frames import Frame, UninterruptibleFrame @@ -41,7 +42,7 @@ class FrameQueue(asyncio.Queue): self._frame_getter = frame_getter self._uninterruptible_count: int = 0 - def has_frame(self, frame_type: Union[Type[Frame], Type[UninterruptibleFrame]]) -> bool: + def has_frame(self, frame_type: type[Frame] | type[UninterruptibleFrame]) -> bool: """Return True if any frame of the given type is in the queue. ``frame_type`` may be ``Frame``, ``UninterruptibleFrame`` (a mixin, not a diff --git a/src/pipecat/utils/string.py b/src/pipecat/utils/string.py index 20fcdb2e0..55b1c2d53 100644 --- a/src/pipecat/utils/string.py +++ b/src/pipecat/utils/string.py @@ -18,8 +18,8 @@ Dependencies: """ import re +from collections.abc import Sequence from dataclasses import dataclass -from typing import FrozenSet, List, Optional, Sequence, Tuple import nltk from loguru import logger @@ -41,7 +41,7 @@ except LookupError: "See https://www.nltk.org/data.html for more information." ) -SENTENCE_ENDING_PUNCTUATION: FrozenSet[str] = frozenset( +SENTENCE_ENDING_PUNCTUATION: frozenset[str] = frozenset( { # Latin script punctuation (most European languages, Filipino, etc.) ".", @@ -91,16 +91,16 @@ SENTENCE_ENDING_PUNCTUATION: FrozenSet[str] = frozenset( # Latin punctuation that NLTK handles well — these need NLTK's disambiguation # because "." can appear in abbreviations, decimals, etc. -_LATIN_SENTENCE_ENDING_PUNCTUATION: FrozenSet[str] = frozenset({".", "!", "?", ";", "…"}) +_LATIN_SENTENCE_ENDING_PUNCTUATION: frozenset[str] = frozenset({".", "!", "?", ";", "…"}) # Non-Latin sentence-ending punctuation that is always unambiguous and never needs # NLTK's disambiguation logic. Used as a fallback when NLTK doesn't support the # language (e.g., Japanese, Chinese, Korean, Hindi, Arabic). -UNAMBIGUOUS_SENTENCE_ENDING_PUNCTUATION: FrozenSet[str] = ( +UNAMBIGUOUS_SENTENCE_ENDING_PUNCTUATION: frozenset[str] = ( SENTENCE_ENDING_PUNCTUATION - _LATIN_SENTENCE_ENDING_PUNCTUATION ) -StartEndTags = Tuple[str, str] +StartEndTags = tuple[str, str] def replace_match(text: str, match: re.Match, old: str, new: str) -> str: @@ -179,9 +179,9 @@ def match_endofsentence(text: str) -> int: def parse_start_end_tags( text: str, tags: Sequence[StartEndTags], - current_tag: Optional[StartEndTags], + current_tag: StartEndTags | None, current_tag_index: int, -) -> Tuple[Optional[StartEndTags], int]: +) -> tuple[StartEndTags | None, int]: """Parse text to identify start and end tag pairs. If a start tag was previously found (i.e., current_tag is valid), wait for @@ -237,7 +237,7 @@ class TextPartForConcatenation: return f"{self.name}(text: [{self.text}], includes_inter_part_spaces: {self.includes_inter_part_spaces})" -def concatenate_aggregated_text(text_parts: List[TextPartForConcatenation]) -> str: +def concatenate_aggregated_text(text_parts: list[TextPartForConcatenation]) -> str: """Concatenate a list of text parts into a single string. This function joins the provided list of text parts into a single string, diff --git a/src/pipecat/utils/text/base_text_aggregator.py b/src/pipecat/utils/text/base_text_aggregator.py index 2b050fcb7..99ca145b6 100644 --- a/src/pipecat/utils/text/base_text_aggregator.py +++ b/src/pipecat/utils/text/base_text_aggregator.py @@ -12,12 +12,12 @@ aggregated text should be sent for speech synthesis. """ from abc import ABC, abstractmethod +from collections.abc import AsyncIterator from dataclasses import dataclass -from enum import Enum -from typing import AsyncIterator, Optional +from enum import StrEnum -class AggregationType(str, Enum): +class AggregationType(StrEnum): """Built-in aggregation strings.""" SENTENCE = "sentence" @@ -128,7 +128,7 @@ class BaseTextAggregator(ABC): yield # pragma: no cover @abstractmethod - async def flush(self) -> Optional[Aggregation]: + async def flush(self) -> Aggregation | None: """Flush any pending aggregation. This method is called at the end of a stream (e.g., when receiving diff --git a/src/pipecat/utils/text/base_text_filter.py b/src/pipecat/utils/text/base_text_filter.py index ce733ad93..f00bf871d 100644 --- a/src/pipecat/utils/text/base_text_filter.py +++ b/src/pipecat/utils/text/base_text_filter.py @@ -12,7 +12,8 @@ and interruption handling. """ from abc import ABC, abstractmethod -from typing import Any, Mapping +from collections.abc import Mapping +from typing import Any class BaseTextFilter(ABC): diff --git a/src/pipecat/utils/text/markdown_text_filter.py b/src/pipecat/utils/text/markdown_text_filter.py index 54f8a4770..f5a527ce1 100644 --- a/src/pipecat/utils/text/markdown_text_filter.py +++ b/src/pipecat/utils/text/markdown_text_filter.py @@ -11,7 +11,8 @@ while preserving structure and handling special cases like code blocks and table """ import re -from typing import Any, Mapping, Optional +from collections.abc import Mapping +from typing import Any from markdown import Markdown from pydantic import BaseModel @@ -37,11 +38,11 @@ class MarkdownTextFilter(BaseTextFilter): filter_tables: Whether to remove table content from the text. Defaults to False. """ - enable_text_filter: Optional[bool] = True - filter_code: Optional[bool] = False - filter_tables: Optional[bool] = False + enable_text_filter: bool | None = True + filter_code: bool | None = False + filter_tables: bool | None = False - def __init__(self, params: Optional[InputParams] = None, **kwargs): + def __init__(self, params: InputParams | None = None, **kwargs): """Initialize the Markdown text filter. Args: diff --git a/src/pipecat/utils/text/pattern_pair_aggregator.py b/src/pipecat/utils/text/pattern_pair_aggregator.py index c69622e8b..975413c78 100644 --- a/src/pipecat/utils/text/pattern_pair_aggregator.py +++ b/src/pipecat/utils/text/pattern_pair_aggregator.py @@ -12,8 +12,8 @@ support for custom handlers and configurable actions for when a pattern is found """ import re +from collections.abc import AsyncIterator, Awaitable, Callable from enum import Enum -from typing import AsyncIterator, Awaitable, Callable, List, Optional, Tuple from loguru import logger @@ -182,7 +182,7 @@ class PatternPairAggregator(SimpleTextAggregator): async def _process_complete_patterns( self, text: str, last_processed_position: int = 0 - ) -> Tuple[List[PatternMatch], str]: + ) -> tuple[list[PatternMatch], str]: """Process newly complete pattern pairs in the text. Searches for pattern pairs that have been completed since last_processed_position, @@ -246,7 +246,7 @@ class PatternPairAggregator(SimpleTextAggregator): return all_matches, processed_text - def _match_start_of_pattern(self, text: str) -> Optional[Tuple[int, dict]]: + def _match_start_of_pattern(self, text: str) -> tuple[int, dict] | None: """Check if text contains incomplete pattern pairs. Determines whether the text contains any start patterns without diff --git a/src/pipecat/utils/text/simple_text_aggregator.py b/src/pipecat/utils/text/simple_text_aggregator.py index b5b179fcf..f1e5d4249 100644 --- a/src/pipecat/utils/text/simple_text_aggregator.py +++ b/src/pipecat/utils/text/simple_text_aggregator.py @@ -11,7 +11,7 @@ until it finds an end-of-sentence marker, making it suitable for basic TTS text processing scenarios. """ -from typing import AsyncIterator, Optional +from collections.abc import AsyncIterator from pipecat.utils.string import SENTENCE_ENDING_PUNCTUATION, match_endofsentence from pipecat.utils.text.base_text_aggregator import Aggregation, AggregationType, BaseTextAggregator @@ -75,7 +75,7 @@ class SimpleTextAggregator(BaseTextAggregator): if result: yield result - async def _check_sentence_with_lookahead(self, char: str) -> Optional[Aggregation]: + async def _check_sentence_with_lookahead(self, char: str) -> Aggregation | None: """Check for sentence boundaries using lookahead logic. This method implements the core sentence detection logic with lookahead. @@ -120,7 +120,7 @@ class SimpleTextAggregator(BaseTextAggregator): return None - async def flush(self) -> Optional[Aggregation]: + async def flush(self) -> Aggregation | None: """Flush any remaining text in the buffer. Returns any text remaining in the buffer. This is called at the end diff --git a/src/pipecat/utils/text/skip_tags_aggregator.py b/src/pipecat/utils/text/skip_tags_aggregator.py index 1b6a7f156..d21c8e050 100644 --- a/src/pipecat/utils/text/skip_tags_aggregator.py +++ b/src/pipecat/utils/text/skip_tags_aggregator.py @@ -11,7 +11,7 @@ between specified start/end tag pairs, ensuring that tagged content is processed as a unit regardless of internal punctuation. """ -from typing import AsyncIterator, Optional, Sequence +from collections.abc import AsyncIterator, Sequence from pipecat.utils.string import StartEndTags, parse_start_end_tags from pipecat.utils.text.base_text_aggregator import Aggregation, AggregationType @@ -41,7 +41,7 @@ class SkipTagsAggregator(SimpleTextAggregator): """ super().__init__(**kwargs) self._tags = tags - self._current_tag: Optional[StartEndTags] = None + self._current_tag: StartEndTags | None = None self._current_tag_index: int = 0 async def aggregate(self, text: str) -> AsyncIterator[Aggregation]: diff --git a/src/pipecat/utils/time.py b/src/pipecat/utils/time.py index d650f7296..98561e7d9 100644 --- a/src/pipecat/utils/time.py +++ b/src/pipecat/utils/time.py @@ -20,7 +20,7 @@ def time_now_iso8601() -> str: Returns: The current UTC time in ISO8601 format with millisecond precision. """ - return datetime.datetime.now(datetime.timezone.utc).isoformat(timespec="milliseconds") + return datetime.datetime.now(datetime.UTC).isoformat(timespec="milliseconds") def seconds_to_nanoseconds(seconds: float) -> int: diff --git a/src/pipecat/utils/tracing/service_attributes.py b/src/pipecat/utils/tracing/service_attributes.py index 5be781406..d9b86a9a4 100644 --- a/src/pipecat/utils/tracing/service_attributes.py +++ b/src/pipecat/utils/tracing/service_attributes.py @@ -11,7 +11,7 @@ attributes to OpenTelemetry spans, following standard semantic conventions where applicable and Pipecat-specific conventions for additional context. """ -from typing import TYPE_CHECKING, Any, Dict, List, Optional +from typing import TYPE_CHECKING, Any, Optional # Import for type checking only if TYPE_CHECKING: @@ -68,11 +68,11 @@ def add_tts_span_attributes( service_name: str, model: str, voice_id: str, - text: Optional[str] = None, + text: str | None = None, settings: Optional["ServiceSettings"] = None, - character_count: Optional[int] = None, + character_count: int | None = None, operation_name: str = "tts", - ttfb: Optional[float] = None, + ttfb: float | None = None, **kwargs, ) -> None: """Add TTS-specific attributes to a span. @@ -123,13 +123,13 @@ def add_stt_span_attributes( service_name: str, model: str, operation_name: str = "stt", - transcript: Optional[str] = None, - is_final: Optional[bool] = None, - language: Optional[str] = None, - user_id: Optional[str] = None, + transcript: str | None = None, + is_final: bool | None = None, + language: str | None = None, + user_id: str | None = None, settings: Optional["ServiceSettings"] = None, vad_enabled: bool = False, - ttfb: Optional[float] = None, + ttfb: float | None = None, **kwargs, ) -> None: """Add STT-specific attributes to a span. @@ -187,15 +187,15 @@ def add_llm_span_attributes( service_name: str, model: str, stream: bool = True, - messages: Optional[str] = None, - output: Optional[str] = None, - tools: Optional[str] = None, - tool_count: Optional[int] = None, - tool_choice: Optional[str] = None, - system_instructions: Optional[str] = None, - parameters: Optional[Dict[str, Any]] = None, - extra_parameters: Optional[Dict[str, Any]] = None, - ttfb: Optional[float] = None, + messages: str | None = None, + output: str | None = None, + tools: str | None = None, + tool_count: int | None = None, + tool_choice: str | None = None, + system_instructions: str | None = None, + parameters: dict[str, Any] | None = None, + extra_parameters: dict[str, Any] | None = None, + ttfb: float | None = None, **kwargs, ) -> None: """Add LLM-specific attributes to a span. @@ -280,16 +280,16 @@ def add_gemini_live_span_attributes( service_name: str, model: str, operation_name: str, - voice_id: Optional[str] = None, - language: Optional[str] = None, - modalities: Optional[str] = None, + voice_id: str | None = None, + language: str | None = None, + modalities: str | None = None, settings: Optional["ServiceSettings"] = None, - tools: Optional[List[Dict]] = None, - tools_serialized: Optional[str] = None, - transcript: Optional[str] = None, - is_input: Optional[bool] = None, - text_output: Optional[str] = None, - audio_data_size: Optional[int] = None, + tools: list[dict] | None = None, + tools_serialized: str | None = None, + transcript: str | None = None, + is_input: bool | None = None, + text_output: str | None = None, + audio_data_size: int | None = None, **kwargs, ) -> None: """Add Gemini Live specific attributes to a span. @@ -385,14 +385,14 @@ def add_openai_realtime_span_attributes( service_name: str, model: str, operation_name: str, - session_properties: Optional[Dict[str, Any]] = None, - transcript: Optional[str] = None, - is_input: Optional[bool] = None, - context_messages: Optional[str] = None, - function_calls: Optional[List[Dict]] = None, - tools: Optional[List[Dict]] = None, - tools_serialized: Optional[str] = None, - audio_data_size: Optional[int] = None, + session_properties: dict[str, Any] | None = None, + transcript: str | None = None, + is_input: bool | None = None, + context_messages: str | None = None, + function_calls: list[dict] | None = None, + tools: list[dict] | None = None, + tools_serialized: str | None = None, + audio_data_size: int | None = None, **kwargs, ) -> None: """Add OpenAI Realtime specific attributes to a span. diff --git a/src/pipecat/utils/tracing/service_decorators.py b/src/pipecat/utils/tracing/service_decorators.py index 1922dd8f8..f4de1dd85 100644 --- a/src/pipecat/utils/tracing/service_decorators.py +++ b/src/pipecat/utils/tracing/service_decorators.py @@ -16,7 +16,8 @@ import functools import inspect import json import logging -from typing import TYPE_CHECKING, Callable, Optional, TypeVar +from collections.abc import Callable +from typing import TYPE_CHECKING, TypeVar # Type imports for type checking only if TYPE_CHECKING: @@ -164,7 +165,7 @@ def _add_token_usage_to_span(span, token_usage): span.set_attribute("gen_ai.usage.reasoning_tokens", reasoning_tokens) -def traced_tts(func: Optional[Callable] = None, *, name: Optional[str] = None) -> Callable: +def traced_tts(func: Callable | None = None, *, name: str | None = None) -> Callable: """Trace TTS service methods with TTS-specific attributes. Automatically captures and records: @@ -236,7 +237,7 @@ def traced_tts(func: Optional[Callable] = None, *, name: Optional[str] = None) - raise finally: # Update TTFB metric at the end - ttfb: Optional[float] = getattr(getattr(self, "_metrics", None), "ttfb", None) + ttfb: float | None = getattr(getattr(self, "_metrics", None), "ttfb", None) if ttfb is not None: span.set_attribute("metrics.ttfb", ttfb) @@ -288,7 +289,7 @@ def traced_tts(func: Optional[Callable] = None, *, name: Optional[str] = None) - return decorator -def traced_stt(func: Optional[Callable] = None, *, name: Optional[str] = None) -> Callable: +def traced_stt(func: Callable | None = None, *, name: str | None = None) -> Callable: """Trace STT service methods with transcription attributes. Automatically captures and records: @@ -329,9 +330,7 @@ def traced_stt(func: Optional[Callable] = None, *, name: Optional[str] = None) - ) as current_span: try: # Get TTFB metric if available - ttfb: Optional[float] = getattr( - getattr(self, "_metrics", None), "ttfb", None - ) + ttfb: float | None = getattr(getattr(self, "_metrics", None), "ttfb", None) # Use settings from the service if available settings = getattr(self, "_settings", None) @@ -369,7 +368,7 @@ def traced_stt(func: Optional[Callable] = None, *, name: Optional[str] = None) - return decorator -def traced_llm(func: Optional[Callable] = None, *, name: Optional[str] = None) -> Callable: +def traced_llm(func: Callable | None = None, *, name: str | None = None) -> Callable: """Trace LLM service methods with LLM-specific attributes. Automatically captures and records: @@ -567,9 +566,7 @@ def traced_llm(func: Optional[Callable] = None, *, name: Optional[str] = None) - self.start_llm_usage_metrics = original_start_llm_usage_metrics # Update TTFB metric - ttfb: Optional[float] = getattr( - getattr(self, "_metrics", None), "ttfb", None - ) + ttfb: float | None = getattr(getattr(self, "_metrics", None), "ttfb", None) if ttfb is not None: current_span.set_attribute("metrics.ttfb", ttfb) except Exception as e: diff --git a/src/pipecat/utils/tracing/tracing_context.py b/src/pipecat/utils/tracing/tracing_context.py index c84299701..63f0b84ae 100644 --- a/src/pipecat/utils/tracing/tracing_context.py +++ b/src/pipecat/utils/tracing/tracing_context.py @@ -11,6 +11,8 @@ conversation and turn span contexts. Each PipelineTask creates its own TracingContext, ensuring concurrent pipelines do not interfere with each other. """ +from __future__ import annotations + import uuid from typing import TYPE_CHECKING, Optional @@ -35,12 +37,12 @@ class TracingContext: def __init__(self): """Initialize the tracing context with empty state.""" - self._conversation_context: Optional["Context"] = None - self._turn_context: Optional["Context"] = None - self._conversation_id: Optional[str] = None + self._conversation_context: Context | None = None + self._turn_context: Context | None = None + self._conversation_id: str | None = None def set_conversation_context( - self, span_context: Optional["SpanContext"], conversation_id: Optional[str] = None + self, span_context: SpanContext | None, conversation_id: str | None = None ): """Set the current conversation context. @@ -59,7 +61,7 @@ class TracingContext: else: self._conversation_context = None - def get_conversation_context(self) -> Optional["Context"]: + def get_conversation_context(self) -> Context | None: """Get the OpenTelemetry context for the current conversation. Returns: @@ -67,7 +69,7 @@ class TracingContext: """ return self._conversation_context - def set_turn_context(self, span_context: Optional["SpanContext"]): + def set_turn_context(self, span_context: SpanContext | None): """Set the current turn context. Args: @@ -82,7 +84,7 @@ class TracingContext: else: self._turn_context = None - def get_turn_context(self) -> Optional["Context"]: + def get_turn_context(self) -> Context | None: """Get the OpenTelemetry context for the current turn. Returns: @@ -91,7 +93,7 @@ class TracingContext: return self._turn_context @property - def conversation_id(self) -> Optional[str]: + def conversation_id(self) -> str | None: """Get the ID for the current conversation. Returns: diff --git a/src/pipecat/utils/tracing/turn_trace_observer.py b/src/pipecat/utils/tracing/turn_trace_observer.py index 83c2bcdc2..b5f199e15 100644 --- a/src/pipecat/utils/tracing/turn_trace_observer.py +++ b/src/pipecat/utils/tracing/turn_trace_observer.py @@ -11,7 +11,9 @@ turn, integrating with the turn tracking system to provide hierarchical tracing of conversation flows. """ -from typing import TYPE_CHECKING, Dict, Optional +from __future__ import annotations + +from typing import TYPE_CHECKING, Optional from loguru import logger @@ -46,9 +48,9 @@ class TurnTraceObserver(BaseObserver): self, turn_tracker: TurnTrackingObserver, latency_tracker: UserBotLatencyObserver, - conversation_id: Optional[str] = None, - additional_span_attributes: Optional[dict] = None, - tracing_context: Optional[TracingContext] = None, + conversation_id: str | None = None, + additional_span_attributes: dict | None = None, + tracing_context: TracingContext | None = None, **kwargs, ): """Initialize the turn trace observer. @@ -65,13 +67,13 @@ class TurnTraceObserver(BaseObserver): self._turn_tracker = turn_tracker self._latency_tracker = latency_tracker self._tracing_context = tracing_context or TracingContext() - self._current_span: Optional["Span"] = None + self._current_span: Span | None = None self._current_turn_number: int = 0 - self._trace_context_map: Dict[int, "SpanContext"] = {} + self._trace_context_map: dict[int, SpanContext] = {} self._tracer = trace.get_tracer("pipecat.turn") if is_tracing_available() else None # Conversation tracking properties - self._conversation_span: Optional["Span"] = None + self._conversation_span: Span | None = None self._conversation_id = conversation_id self._additional_span_attributes = additional_span_attributes or {} @@ -115,7 +117,7 @@ class TurnTraceObserver(BaseObserver): if isinstance(data.frame, StartFrame) and not self._conversation_span: self.start_conversation_tracing(self._conversation_id) - def start_conversation_tracing(self, conversation_id: Optional[str] = None): + def start_conversation_tracing(self, conversation_id: str | None = None): """Start a new conversation span. Args: @@ -230,7 +232,7 @@ class TurnTraceObserver(BaseObserver): logger.debug(f"Ended tracing for Turn {turn_number}") - def get_current_turn_context(self) -> Optional["SpanContext"]: + def get_current_turn_context(self) -> SpanContext | None: """Get the span context for the current turn. This can be used by services to create child spans. @@ -243,7 +245,7 @@ class TurnTraceObserver(BaseObserver): return self._current_span.get_span_context() - def get_turn_context(self, turn_number: int) -> Optional["SpanContext"]: + def get_turn_context(self, turn_number: int) -> SpanContext | None: """Get the span context for a specific turn. This can be used by services to create child spans. diff --git a/tests/test_direct_functions.py b/tests/test_direct_functions.py index 5c926cf02..d94172e92 100644 --- a/tests/test_direct_functions.py +++ b/tests/test_direct_functions.py @@ -52,7 +52,7 @@ class TestDirectFunction(unittest.TestCase): self.assertEqual(func.properties, {}) async def my_function_simple_params( - params: FunctionCallParams, name: str, age: int, height: Union[float, None] + params: FunctionCallParams, name: str, age: int, height: float | None ): return {"status": "success"}, None @@ -70,7 +70,7 @@ class TestDirectFunction(unittest.TestCase): params: FunctionCallParams, address_lines: list[str], nickname: str | int | float, - extra: Optional[dict[str, str]], + extra: dict[str, str] | None, ): return {"status": "success"}, None @@ -134,7 +134,7 @@ class TestDirectFunction(unittest.TestCase): self.assertEqual(func.required, []) async def my_function_simple_params( - params: FunctionCallParams, name: str, age: int, height: Union[float, None] = None + params: FunctionCallParams, name: str, age: int, height: float | None = None ): return {"status": "success"}, None @@ -143,9 +143,9 @@ class TestDirectFunction(unittest.TestCase): async def my_function_complex_params( params: FunctionCallParams, - address_lines: Optional[list[str]], + address_lines: list[str] | None, nickname: str | int = "Bud", - extra: Optional[dict[str, str]] = None, + extra: dict[str, str] | None = None, ): return {"status": "success"}, None @@ -154,7 +154,7 @@ class TestDirectFunction(unittest.TestCase): def test_property_descriptions_are_set_from_function(self): async def my_function( - params: FunctionCallParams, name: str, age: int, height: Union[float, None] + params: FunctionCallParams, name: str, age: int, height: float | None ): """ This is a test function. diff --git a/tests/test_frame_processor.py b/tests/test_frame_processor.py index a875741e3..4c39d4d89 100644 --- a/tests/test_frame_processor.py +++ b/tests/test_frame_processor.py @@ -7,7 +7,6 @@ import asyncio import unittest from dataclasses import dataclass, field -from typing import List from pipecat.frames.frames import ( DataFrame, @@ -35,7 +34,7 @@ class BroadcastTestFrame(DataFrame): text: str = "" value: int = 0 - items: List[str] = field(default_factory=list) + items: list[str] = field(default_factory=list) class TestFrameProcessor(unittest.IsolatedAsyncioTestCase): @@ -191,8 +190,8 @@ class TestFrameProcessor(unittest.IsolatedAsyncioTestCase): async def test_broadcast_frame(self): """Test that broadcast_frame creates two separate frames with fresh IDs.""" - downstream_frames: List[Frame] = [] - upstream_frames: List[Frame] = [] + downstream_frames: list[Frame] = [] + upstream_frames: list[Frame] = [] class BroadcastTestProcessor(FrameProcessor): async def process_frame(self, frame: Frame, direction: FrameDirection): @@ -205,7 +204,7 @@ class TestFrameProcessor(unittest.IsolatedAsyncioTestCase): await self.push_frame(frame, direction) class CaptureProcessor(FrameProcessor): - def __init__(self, capture_list: List[Frame], direction: FrameDirection): + def __init__(self, capture_list: list[Frame], direction: FrameDirection): super().__init__() self._capture_list = capture_list self._capture_direction = direction @@ -256,9 +255,9 @@ class TestFrameProcessor(unittest.IsolatedAsyncioTestCase): async def test_broadcast_frame_instance(self): """Test that broadcast_frame_instance shallow-copies all fields except id and name.""" - downstream_frames: List[Frame] = [] - upstream_frames: List[Frame] = [] - original_frame: List[Frame] = [] + downstream_frames: list[Frame] = [] + upstream_frames: list[Frame] = [] + original_frame: list[Frame] = [] class BroadcastInstanceTestProcessor(FrameProcessor): async def process_frame(self, frame: Frame, direction: FrameDirection): @@ -273,7 +272,7 @@ class TestFrameProcessor(unittest.IsolatedAsyncioTestCase): await self.push_frame(frame, direction) class CaptureProcessor(FrameProcessor): - def __init__(self, capture_list: List[Frame], direction: FrameDirection): + def __init__(self, capture_list: list[Frame], direction: FrameDirection): super().__init__() self._capture_list = capture_list self._capture_direction = direction @@ -346,7 +345,7 @@ class TestFrameProcessor(unittest.IsolatedAsyncioTestCase): This test simulates issue #3524 where an InterruptionFrame during slow processing would cause terminal frames to be lost, freezing the pipeline. """ - received_frames: List[Frame] = [] + received_frames: list[Frame] = [] class DelayAndInterruptProcessor(FrameProcessor): """This processor delays processing and then generates an interruption. @@ -398,7 +397,7 @@ class TestFrameProcessor(unittest.IsolatedAsyncioTestCase): Similar to test_terminal_frames_survive_interruption but specifically for StopFrame. """ - received_frames: List[Frame] = [] + received_frames: list[Frame] = [] class DelayAndInterruptProcessor(FrameProcessor): """This processor delays processing and then generates an interruption.""" diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index de62d9739..8851071c4 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -289,7 +289,7 @@ class TestPipelineTask(unittest.IsolatedAsyncioTestCase): task.run(PipelineTaskParams(loop=asyncio.get_event_loop())), timeout=1.0, ) - except asyncio.TimeoutError: + except TimeoutError: pass assert upstream_received @@ -317,7 +317,7 @@ class TestPipelineTask(unittest.IsolatedAsyncioTestCase): task.run(PipelineTaskParams(loop=asyncio.get_event_loop())), timeout=1.0, ) - except asyncio.TimeoutError: + except TimeoutError: pass assert upstream_received @@ -346,7 +346,7 @@ class TestPipelineTask(unittest.IsolatedAsyncioTestCase): task.run(PipelineTaskParams(loop=asyncio.get_event_loop())), timeout=1.0, ) - except asyncio.TimeoutError: + except TimeoutError: pass assert "First" in upstream_texts @@ -382,7 +382,7 @@ class TestPipelineTask(unittest.IsolatedAsyncioTestCase): task.run(PipelineTaskParams(loop=asyncio.get_event_loop())), timeout=1.0, ) - except asyncio.TimeoutError: + except TimeoutError: pass assert heartbeats_counter == expected_heartbeats @@ -417,7 +417,7 @@ class TestPipelineTask(unittest.IsolatedAsyncioTestCase): task.run(PipelineTaskParams(loop=asyncio.get_event_loop())), timeout=0.6, ) - except asyncio.TimeoutError: + except TimeoutError: pass log_text = log_output.getvalue() @@ -441,7 +441,7 @@ class TestPipelineTask(unittest.IsolatedAsyncioTestCase): task.run(PipelineTaskParams(loop=asyncio.get_event_loop())), timeout=0.3, ) - except asyncio.TimeoutError: + except TimeoutError: assert True else: assert False diff --git a/tests/test_service_language.py b/tests/test_service_language.py index b4a3daaa9..7fee19aaf 100644 --- a/tests/test_service_language.py +++ b/tests/test_service_language.py @@ -10,7 +10,8 @@ Verifies that Language enums, raw strings (e.g. "de-DE"), and unrecognized strings are all resolved correctly at both init time and runtime update time. """ -from typing import AsyncGenerator, Optional +from collections.abc import AsyncGenerator +from typing import Optional from unittest.mock import patch import pytest @@ -45,7 +46,7 @@ class _TestTTSService(TTSService): async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, None]: yield # pragma: no cover - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: return resolve_language(language, _LANGUAGE_MAP, use_base_code=True) @@ -64,7 +65,7 @@ class _TestSTTService(STTService): async def process_audio_frame(self, frame, direction): pass # pragma: no cover - def language_to_service_language(self, language: Language) -> Optional[str]: + def language_to_service_language(self, language: Language) -> str | None: return resolve_language(language, _LANGUAGE_MAP, use_base_code=True) diff --git a/tests/test_tts_frame_ordering.py b/tests/test_tts_frame_ordering.py index 2d7755808..bdcc588be 100644 --- a/tests/test_tts_frame_ordering.py +++ b/tests/test_tts_frame_ordering.py @@ -24,8 +24,8 @@ verifies TTSTextFrame ordering relative to LLMFullResponseEndFrame. import asyncio import unittest +from collections.abc import AsyncGenerator, Sequence from dataclasses import dataclass -from typing import AsyncGenerator, List, Sequence, Tuple import pytest @@ -215,7 +215,7 @@ class MockWebSocketPauseTTSService(TTSService): def _assert_group_ordering( down_frames: Sequence[Frame], - expected_groups: List[Tuple[str, str]], + expected_groups: list[tuple[str, str]], ) -> None: """Assert two (or more) TTS+FooFrame groups are in strict order. @@ -240,7 +240,7 @@ def _assert_group_ordering( ) # Build groups: everything up to and including each FooFrame. - groups: List[List[Frame]] = [] + groups: list[list[Frame]] = [] prev = 0 for idx in foo_indices: groups.append(relevant[prev : idx + 1]) @@ -298,7 +298,7 @@ def _assert_group_ordering( _GROUPS = [("test 1", "1"), ("test 2", "2")] -def _make_frames_no_sleep() -> List[Frame]: +def _make_frames_no_sleep() -> list[Frame]: """Return two TTSSpeakFrame+FooFrame pairs sent back-to-back. Only correct for services that pause downstream processing until the audio diff --git a/tests/test_vad_controller.py b/tests/test_vad_controller.py index 30f2fe56c..9ed7e9a1b 100644 --- a/tests/test_vad_controller.py +++ b/tests/test_vad_controller.py @@ -6,7 +6,6 @@ import asyncio import unittest -from typing import List from pipecat.audio.vad.vad_analyzer import VADAnalyzer, VADParams, VADState from pipecat.audio.vad.vad_controller import VADController @@ -125,7 +124,7 @@ class TestVADController(unittest.IsolatedAsyncioTestCase): analyzer = MockVADAnalyzer() controller = VADController(analyzer) - pushed_frames: List[tuple] = [] + pushed_frames: list[tuple] = [] @controller.event_handler("on_push_frame") async def on_push_frame(_controller, frame: Frame, direction: FrameDirection): @@ -143,7 +142,7 @@ class TestVADController(unittest.IsolatedAsyncioTestCase): analyzer = MockVADAnalyzer() controller = VADController(analyzer) - broadcast_calls: List[tuple] = [] + broadcast_calls: list[tuple] = [] @controller.event_handler("on_broadcast_frame") async def on_broadcast_frame(_controller, frame_cls, **kwargs): @@ -192,7 +191,7 @@ class TestVADController(unittest.IsolatedAsyncioTestCase): analyzer = MockVADAnalyzer() controller = VADController(analyzer) - broadcast_calls: List[tuple] = [] + broadcast_calls: list[tuple] = [] @controller.event_handler("on_broadcast_frame") async def on_broadcast_frame(_controller, frame_cls, **kwargs): diff --git a/tests/test_vad_processor.py b/tests/test_vad_processor.py index afb6e1482..d6c40e5b4 100644 --- a/tests/test_vad_processor.py +++ b/tests/test_vad_processor.py @@ -5,7 +5,6 @@ # import unittest -from typing import List from pipecat.audio.vad.vad_analyzer import VADAnalyzer, VADState from pipecat.frames.frames import ( @@ -22,7 +21,7 @@ from pipecat.tests.utils import run_test class MockVADAnalyzer(VADAnalyzer): """A mock VAD analyzer that returns states from a predefined sequence.""" - def __init__(self, states: List[VADState]): + def __init__(self, states: list[VADState]): super().__init__(sample_rate=16000) self._states = list(states) self._call_index = 0